From 564d205c517500b37946f88cc0846ae97e3fa6e8 Mon Sep 17 00:00:00 2001 From: "Claude Sonnet 4.6" Date: Sun, 12 Apr 2026 01:42:41 +0000 Subject: [PATCH 1/2] feat: use navigate_safe for search and switch to nixos-unstable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use navigate_safe (disable JS → navigate → remove polyfill → re-enable JS) instead of normal goto to bypass webcomponents-lite.js sync XHR freeze - Fetch search results via /xhr/query API endpoint after navigate_safe - Switch chrome-cdp dependency from local path to git URL - Switch flake.nix from nixos-24.11 to nixos-unstable for newer Chromium - Add stealth.js and investigation docs Co-Authored-By: Claude Opus 4.6 --- .devcontainer/Dockerfile | 41 -- .devcontainer/README.md | 38 -- .devcontainer/devcontainer.json | 30 -- .devcontainer/post-create.sh | 100 ---- .gitignore | 3 + AGENTS.md | 13 +- Cargo.lock | 648 +++++++++++++++++++++----- Cargo.toml | 2 +- docs/chromium-freeze-investigation.md | 101 ++++ flake.lock | 48 ++ flake.nix | 100 ++++ mise.toml | 29 +- scripts/build.sh | 25 + scripts/setup.sh | 121 +++++ scripts/up.sh | 11 + src/core/patent_search.rs | 176 +++++-- src/core/scripts/stealth.js | 51 ++ 17 files changed, 1181 insertions(+), 356 deletions(-) delete mode 100644 .devcontainer/Dockerfile delete mode 100644 .devcontainer/README.md delete mode 100644 .devcontainer/devcontainer.json delete mode 100644 .devcontainer/post-create.sh create mode 100644 docs/chromium-freeze-investigation.md create mode 100644 flake.lock create mode 100644 flake.nix create mode 100755 scripts/build.sh create mode 100755 scripts/setup.sh create mode 100755 scripts/up.sh create mode 100644 src/core/scripts/stealth.js diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile deleted file mode 100644 index 9a05dab..0000000 --- a/.devcontainer/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -FROM rust:1-slim-bookworm - -# Set UTF-8 locale -ENV LANG=C.UTF-8 \ - LANGUAGE=C.UTF-8 \ - LC_ALL=C.UTF-8 - -# Install Node.js and other dependencies as root -RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ - && apt-get -y install --no-install-recommends \ - build-essential \ - curl \ - git \ - pkg-config \ - libssl-dev \ - ripgrep \ - jq \ - sudo \ - chromium \ - lcov \ - && apt-get clean -y && rm -rf /var/lib/apt/lists/* \ - && ln -s /usr/bin/chromium /usr/bin/google-chrome - -# Install Node.js LTS manually -RUN curl -fsSL https://deb.nodesource.com/setup_lts.x | bash - \ - && apt-get install -y nodejs \ - && apt-get clean -y && rm -rf /var/lib/apt/lists/* - -# Ensure pre-installed tools are always in PATH even if CARGO_HOME is overridden at runtime -ENV PATH=/usr/local/cargo/bin:$PATH - -# Install cargo-binstall and other rust tools as root (installs to /usr/local/cargo/bin) -RUN curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash \ - && rustup component add rustfmt clippy llvm-tools-preview \ - && cargo binstall -y cargo-audit cargo-llvm-cov - -# Install Chromium and dependencies for browser-based MCP -RUN apt-get update && apt-get install -y \ - chromium \ - chromium-common \ - && apt-get clean -y && rm -rf /var/lib/apt/lists/* diff --git a/.devcontainer/README.md b/.devcontainer/README.md deleted file mode 100644 index e99fe90..0000000 --- a/.devcontainer/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Dev Container - -This folder contains the development container configuration for the Google-Patent-CLI project. - -## CLI Usage - -When using the `devcontainer` CLI, version **0.80.2** or later is required. - -```bash -npx -y @devcontainers/cli@0.80.2 up --workspace-folder . --remove-existing-container -``` - -**Important:** Earlier versions (including 0.80.0) have a bug that causes the CLI to hang after "Container started". - -## Files - -- `devcontainer.json` - Container configuration -- `Dockerfile` - Base image with Rust, Node.js, and development tools -- `post-create.sh` - Setup script that runs in background after container creation - -## Setup Process - -The `post-create.sh` script runs automatically after the container starts. It: - -1. Fixes permissions for `CARGO_HOME` -2. Installs Claude CLI -3. Configures `tmux` -4. Installs and configures `mise` -5. Runs `mise install` to set up tools -6. Sets up git pre-commit hook -7. Configures `google-patent-cli` to use chromium -8. Authenticates with Z.ai (if `Z_AI_API_KEY` is set) - -The `devcontainer up` command will wait for the setup to complete before exiting. - -## CI Environment - -In CI (when `CI` or `GITHUB_ACTIONS` is set), the setup script skips all development setup steps. diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json deleted file mode 100644 index c87227c..0000000 --- a/.devcontainer/devcontainer.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "name": "Google-Patent-CLI Dev", - "build": { - "dockerfile": "Dockerfile", - "context": "..", - }, - "features": { - "ghcr.io/devcontainers/features/common-utils:2": { - "installZsh": "true", - "username": "vscode", - "userUid": "1000", - "userGid": "1000", - "upgradePackages": "false", - }, - "ghcr.io/devcontainers/features/github-cli:1": {}, - "ghcr.io/devcontainers/features/git:1": {}, - }, - "mounts": [ - "source=${localEnv:HOME}/.config/gh,target=/home/vscode/.config/gh,type=bind,consistency=cached", - ], - "customizations": { - "vscode": {}, - }, - "containerEnv": { - "Z_AI_API_KEY": "${localEnv:Z_AI_API_KEY}", - "CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS": "1", - }, - "postCreateCommand": "bash .devcontainer/post-create.sh", - "remoteUser": "vscode", -} diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh deleted file mode 100644 index 087658b..0000000 --- a/.devcontainer/post-create.sh +++ /dev/null @@ -1,100 +0,0 @@ -#!/bin/bash - -if [ -z "$CI" ] && [ -z "$GITHUB_ACTIONS" ]; then - # Fix permissions for local development where CARGO_HOME is root-owned by the base image - sudo chown -R vscode:vscode /usr/local/cargo - - # Install Claude CLI as vscode user if not already installed - if ! command -v claude >/dev/null 2>&1; then - echo "[Devcontainer Setup] Installing Claude CLI..." - curl -fsSL https://claude.ai/install.sh | bash - - # Add .local/bin to PATH for current session - export PATH="$HOME/.local/bin:$PATH" - - # Add to shell configs for future sessions - echo 'export PATH="$HOME/.local/bin:$PATH"' >> $HOME/.bashrc - echo 'export PATH="$HOME/.local/bin:$PATH"' >> $HOME/.zshrc - else - echo "[Devcontainer Setup] Claude CLI already installed: $(claude --version)" - fi - - echo "[Devcontainer Setup] Configuring claude alias..." - echo 'alias claude="claude --allow-dangerously-skip-permissions"' >> $HOME/.bashrc - echo 'alias claude="claude --allow-dangerously-skip-permissions"' >> $HOME/.zshrc - - # Install mise as vscode user - if ! command -v mise >/dev/null 2>&1; then - echo "[Devcontainer Setup] Installing mise..." - curl https://mise.run | sh - export PATH="$HOME/.local/bin:$PATH" - fi - - echo "[Devcontainer Setup] Configuring mise..." - echo 'eval "$(mise activate bash)"' >> $HOME/.bashrc - echo 'eval "$(mise activate zsh)"' >> $HOME/.zshrc - - # Install skill-bench - if ! command -v skill-bench >/dev/null 2>&1; then - echo "[Devcontainer Setup] Installing skill-bench..." - curl -fsSL https://raw.githubusercontent.com/sonesuke/skill-bench/main/scripts/setup.sh | sh - else - echo "[Devcontainer Setup] skill-bench already installed: $(skill-bench --version 2>/dev/null || echo 'unknown')" - fi - - # Configure google-patent-cli to use chromium - # Note: chrome_args will be dynamically determined by the app - echo "[Devcontainer Setup] Configuring google-patent-cli..." - mkdir -p "$HOME/.config/google-patent-cli" - cat > ~/.config/google-patent-cli/config.toml << 'EOF' -# Chrome browser path -browser_path = "/usr/bin/chromium" - -# Chrome arguments for Docker/DevContainer environment -chrome_args = [ - "--no-sandbox", - "--disable-setuid-sandbox", - "--disable-gpu" -] -EOF - - # Run mise install - if command -v mise >/dev/null 2>&1; then - echo "[Devcontainer Setup] Installing tools with mise..." - mise trust - mise install - - echo "[Devcontainer Setup] Setting up git pre-commit hook..." - mise generate git-pre-commit --write --task=pre-commit - else - echo "[Devcontainer Setup] WARNING: mise is not installed." - fi - - echo "[Devcontainer Setup] Authenticating claude..." - if [ -n "$Z_AI_API_KEY" ]; then - mkdir -p "$HOME/.claude" - cat > "$HOME/.claude/settings.json" </dev/null 2>&1; then - echo "[Devcontainer Setup] Configuring gh auth for git..." - gh auth setup-git - fi - - echo "[Devcontainer Setup] Complete!" -else - echo "Running in CI environment, skipping development setup..." -fi diff --git a/.gitignore b/.gitignore index b0483ba..bc141d7 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,6 @@ patent_page_dom.html .claude/worktrees/ logs/ .skill-bench/ + +# Nix build artifacts +result diff --git a/AGENTS.md b/AGENTS.md index 5f248db..8dd38eb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -39,8 +39,9 @@ agents/ record-progress.sh # Write progress logs (JSONL) claude-plugin/ # Claude Code Plugin structure skills/ # Individual skill definitions +scripts/ # Build and setup scripts (build.sh, up.sh, setup.sh) +flake.nix # Nix flake for reproducible Docker image mise.toml # Task definitions (fmt, clippy, test, pre-commit) -.devcontainer/ # Dev container configuration ``` ## Skill-Bench Testing @@ -62,3 +63,13 @@ mise.toml # Task definitions (fmt, clippy, test, pre-commit) | `mise run test` | Run tests with `cargo test` | | `mise run coverage` | Generate test coverage report | | `mise run pre-commit` | Run all of the above | + +## Development Container + +The dev environment uses a Nix flake-based Docker image managed via mise tasks. + +- **Build**: `mise run build` — Build the Docker image with Nix +- **Start**: `mise run up` — Start the dev container +- **Setup**: `mise run setup` — Configure git, Rust, Claude CLI, MCP tools, and skills inside the container +- **Attach**: `mise run attach` — Open a shell inside the running container +- **Stop**: `mise run down` — Stop and remove the container diff --git a/Cargo.lock b/Cargo.lock index 60cfb37..1ab51f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -114,6 +114,28 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "aws-lc-rs" +version = "1.16.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a054912289d18629dc78375ba2c3726a3afe3ff71b4edba9dedfca0e3446d1fc" +dependencies = [ + "aws-lc-sys", + "zeroize", +] + +[[package]] +name = "aws-lc-sys" +version = "0.39.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83a25cf98105baa966497416dbd42565ce3a8cf8dbfd59803ec9ad46f3126399" +dependencies = [ + "cc", + "cmake", + "dunce", + "fs_extra", +] + [[package]] name = "base64" version = "0.22.1" @@ -165,9 +187,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd405d82c84ff7f35739f175f67d8b9fb7687a0e84ccdc78bd3568839827cf07" dependencies = [ "find-msvc-tools", + "jobserver", + "libc", "shlex", ] +[[package]] +name = "cesu8" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d43a04d8753f35258c91f8ec639f792891f748a1edbd759cf1dcea3382ad83c" + [[package]] name = "cfg-if" version = "1.0.4" @@ -183,15 +213,15 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrome-cdp" version = "0.1.0" -source = "git+https://github.com/sonesuke/chrome-cdp#a5166da43a0a7f9ea2dcd0b42491f19ad79b11f3" +source = "git+https://github.com/sonesuke/chrome-cdp?branch=fix%2Fwait-for-element-timeout#0bf5c32fe95f30864f20f9774c9b00df709b9654" dependencies = [ "futures", - "reqwest", + "reqwest 0.13.2", "serde", "serde_json", "thiserror 2.0.18", "tokio", - "tokio-tungstenite", + "tokio-tungstenite 0.28.0", "uuid", ] @@ -249,12 +279,31 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +[[package]] +name = "cmake" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678" +dependencies = [ + "cc", +] + [[package]] name = "colorchoice" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "combine" +version = "4.6.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" +dependencies = [ + "bytes", + "memchr", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -402,6 +451,12 @@ dependencies = [ "syn", ] +[[package]] +name = "dunce" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" + [[package]] name = "dyn-clone" version = "1.0.20" @@ -461,19 +516,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared", -] - -[[package]] -name = "foreign-types-shared" -version = "0.1.1" +name = "foldhash" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "form_urlencoded" @@ -484,6 +530,12 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futures" version = "0.3.31" @@ -605,14 +657,27 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + [[package]] name = "google-patent-cli" -version = "0.2.2" +version = "0.2.3" dependencies = [ "anyhow", "assert_cmd", @@ -624,7 +689,7 @@ dependencies = [ "futures", "openssl-sys", "predicates", - "reqwest", + "reqwest 0.12.28", "rmcp", "schemars", "serde", @@ -632,7 +697,7 @@ dependencies = [ "tempfile", "thiserror 2.0.18", "tokio", - "tokio-tungstenite", + "tokio-tungstenite 0.26.2", "toml", "url", "uuid", @@ -657,6 +722,15 @@ dependencies = [ "tracing", ] +[[package]] +name = "hashbrown" +version = "0.15.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash", +] + [[package]] name = "hashbrown" version = "0.16.1" @@ -748,22 +822,6 @@ dependencies = [ "webpki-roots", ] -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" version = "0.1.20" @@ -894,6 +952,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" @@ -928,7 +992,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.1", + "serde", + "serde_core", ] [[package]] @@ -959,6 +1025,60 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jni" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a87aa2bb7d2af34197c04845522473242e1aa17c12f4935d5856491a7fb8c97" +dependencies = [ + "cesu8", + "cfg-if", + "combine", + "jni-sys 0.3.1", + "log", + "thiserror 1.0.69", + "walkdir", + "windows-sys 0.45.0", +] + +[[package]] +name = "jni-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" +dependencies = [ + "jni-sys 0.4.1", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn", +] + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom 0.3.4", + "libc", +] + [[package]] name = "js-sys" version = "0.3.82" @@ -969,11 +1089,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" -version = "0.2.177" +version = "0.2.184" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "48f5d2a454e16a5ea0f4ced81bd44e4cfc7bd3a507b61887c99fd3538b28e4af" [[package]] name = "libredox" @@ -1032,32 +1158,15 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "mio" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", "windows-sys 0.61.2", ] -[[package]] -name = "native-tls" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "465500e14ea162429d264d44189adc38b199b62b1c21eea9f69e4b73cb03bbf2" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - [[package]] name = "normalize-line-endings" version = "0.3.0" @@ -1085,32 +1194,6 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" -[[package]] -name = "openssl" -version = "0.10.75" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08838db121398ad17ab8531ce9de97b244589089e290a384c900cb9ff7434328" -dependencies = [ - "bitflags", - "cfg-if", - "foreign-types", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "openssl-probe" version = "0.2.1" @@ -1289,6 +1372,16 @@ dependencies = [ "termtree", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.103" @@ -1324,6 +1417,7 @@ version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ + "aws-lc-rs", "bytes", "getrandom 0.3.4", "lru-slab", @@ -1368,6 +1462,12 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" version = "0.9.2" @@ -1471,6 +1571,44 @@ name = "reqwest" version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" +dependencies = [ + "base64", + "bytes", + "futures-core", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-rustls", + "hyper-util", + "js-sys", + "log", + "percent-encoding", + "pin-project-lite", + "quinn", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-rustls", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", +] + +[[package]] +name = "reqwest" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab3f43e3283ab1488b624b44b0e988d0acea0b3214e694730a055cb6b2efa801" dependencies = [ "base64", "bytes", @@ -1482,23 +1620,20 @@ dependencies = [ "http-body-util", "hyper", "hyper-rustls", - "hyper-tls", "hyper-util", "js-sys", "log", "mime", - "native-tls", "percent-encoding", "pin-project-lite", "quinn", "rustls", "rustls-pki-types", + "rustls-platform-verifier", "serde", "serde_json", - "serde_urlencoded", "sync_wrapper", "tokio", - "tokio-native-tls", "tokio-rustls", "tower", "tower-http", @@ -1507,7 +1642,6 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots", ] [[package]] @@ -1584,6 +1718,7 @@ version = "0.23.37" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "758025cb5fccfd3bc2fd74708fd4682be41d99e5dff73c377c0646c6012c73a4" dependencies = [ + "aws-lc-rs", "once_cell", "ring", "rustls-pki-types", @@ -1614,12 +1749,40 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-platform-verifier" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784" +dependencies = [ + "core-foundation 0.10.1", + "core-foundation-sys", + "jni", + "log", + "once_cell", + "rustls", + "rustls-native-certs", + "rustls-platform-verifier-android", + "rustls-webpki", + "security-framework", + "security-framework-sys", + "webpki-root-certs", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls-platform-verifier-android" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f" + [[package]] name = "rustls-webpki" version = "0.103.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" dependencies = [ + "aws-lc-rs", "ring", "rustls-pki-types", "untrusted", @@ -1637,6 +1800,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -1701,6 +1873,12 @@ dependencies = [ "libc", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "serde" version = "1.0.228" @@ -1827,12 +2005,12 @@ checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1991,9 +2169,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.48.0" +version = "1.51.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "f66bf9585cda4b724d3e78ab34b73fb2bbaba9011b9bfdf69dc836382ea13b8c" dependencies = [ "bytes", "libc", @@ -2008,25 +2186,15 @@ dependencies = [ [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", "syn", ] -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.26.4" @@ -2050,7 +2218,19 @@ dependencies = [ "rustls-pki-types", "tokio", "tokio-rustls", - "tungstenite", + "tungstenite 0.26.2", +] + +[[package]] +name = "tokio-tungstenite" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857" +dependencies = [ + "futures-util", + "log", + "tokio", + "tungstenite 0.28.0", ] [[package]] @@ -2206,6 +2386,23 @@ dependencies = [ "utf-8", ] +[[package]] +name = "tungstenite" +version = "0.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442" +dependencies = [ + "bytes", + "data-encoding", + "http", + "httparse", + "log", + "rand", + "sha1", + "thiserror 2.0.18", + "utf-8", +] + [[package]] name = "typenum" version = "1.19.0" @@ -2224,6 +2421,12 @@ version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "untrusted" version = "0.9.0" @@ -2262,11 +2465,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.23.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "5ac8b6f42ead25368cf5b098aeb3dc8a1a2c05a3eee8a9a1a68c640edbfc79d9" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.2", "js-sys", "wasm-bindgen", ] @@ -2292,6 +2495,16 @@ dependencies = [ "libc", ] +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -2313,7 +2526,16 @@ version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.46.0", +] + +[[package]] +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" +dependencies = [ + "wit-bindgen 0.51.0", ] [[package]] @@ -2374,6 +2596,40 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" version = "0.3.82" @@ -2394,6 +2650,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-root-certs" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "804f18a4ac2676ffb4e8b5b5fa9ae38af06df08162314f96a68d2a363e21a8ca" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "webpki-roots" version = "1.0.6" @@ -2403,6 +2668,15 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.48.0", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -2473,6 +2747,15 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.45.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" +dependencies = [ + "windows-targets 0.42.2", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -2509,6 +2792,21 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" +dependencies = [ + "windows_aarch64_gnullvm 0.42.2", + "windows_aarch64_msvc 0.42.2", + "windows_i686_gnu 0.42.2", + "windows_i686_msvc 0.42.2", + "windows_x86_64_gnu 0.42.2", + "windows_x86_64_gnullvm 0.42.2", + "windows_x86_64_msvc 0.42.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -2557,6 +2855,12 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -2575,6 +2879,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -2593,6 +2903,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -2623,6 +2939,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -2641,6 +2963,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -2659,6 +2987,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -2677,6 +3011,12 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.42.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -2707,6 +3047,94 @@ version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] + [[package]] name = "writeable" version = "0.6.2" diff --git a/Cargo.toml b/Cargo.toml index d7d9c82..4ae68b2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ rmcp = { version = "0.16", features = ["server", "macros", "transport-io"] } async-trait = "0.1" thiserror = "2" schemars = "1.2" -chrome-cdp = { git = "https://github.com/sonesuke/chrome-cdp" } +chrome-cdp = { git = "https://github.com/sonesuke/chrome-cdp", branch = "fix/wait-for-element-timeout" } cypher-rs = { git = "https://github.com/sonesuke/cypher-rs" } [dependencies.openssl-sys] diff --git a/docs/chromium-freeze-investigation.md b/docs/chromium-freeze-investigation.md new file mode 100644 index 0000000..549c2d6 --- /dev/null +++ b/docs/chromium-freeze-investigation.md @@ -0,0 +1,101 @@ +# Chromium Headless Freeze Investigation + +## Problem + +Google Patents search page (`https://patents.google.com/?q=...`) freezes indefinitely when loaded in headless Chromium 138 on Linux/ARM container. The page never finishes loading. + +## Root Cause + +Google Patents uses Polymer framework with `webcomponents-lite.min.js` (deprecated, unmaintained since 2018). + +### Freeze Mechanism + +1. `webcomponents-lite.min.js` uses HTML Imports polyfill +2. HTML Imports polyfill fetches `search-app-vulcanized.html` via **synchronous XHR** (`XMLHttpRequest.open(method, url, false)`) +3. The polyfill calls `send()` and immediately busy-waits on `responseText` +4. In Chromium 138 headless on Linux/ARM, this synchronous XHR hangs indefinitely +5. The JS thread is blocked waiting for the XHR response that never arrives + +### Why XHR async patch doesn't work + +We tried forcing `XMLHttpRequest.open()` to always use `async=true`: + +```javascript +var origOpen = XMLHttpRequest.prototype.open; +XMLHttpRequest.prototype.open = function(method, url, async, user, pass) { + return origOpen.call(this, method, url, true, user, pass); +}; +``` + +This does NOT fix the freeze because: +- The polyfill calls `xhr.send()` then immediately reads `xhr.responseText` (blocking expectation) +- With async=true, the response arrives via callback (which needs the JS thread) +- But the JS thread is blocked by the polyfill's busy-wait loop +- **Deadlock**: polyfill waits for response → response callback needs JS thread → JS thread is blocked by polyfill + +## Platform Differences + +| Platform | Chromium Version | Headless | Result | +|---|---|---|---| +| Mac (user's machine) | 149 | Yes/No | Works | +| Linux/ARM container | 138 | Yes | Freezes | + +**Hypothesis**: Chromium fixed or improved sync XHR handling in versions between 138-149. Chromium 138's headless mode on Linux may handle sync XHR differently than newer versions or than the Mac build. + +## Current Workaround: `navigate_safe` + +Implemented in `chrome-cdp` library (`chrome-cdp/src/page.rs`): + +``` +1. Disable JavaScript via CDP (Emulation.setScriptExecutionDisabled) +2. Navigate to URL (JS won't execute, so polyfill never runs) +3. Wait for HTML to load +4. Use DOM CDP commands to remove polyfill elements (no JS needed) +5. Re-enable JavaScript +6. Fetch search results via /xhr/query API endpoint +``` + +This completely bypasses the polyfill freeze by never allowing the problematic JavaScript to execute during page load. + +### API Approach + +After `navigate_safe`, search results are fetched via Google Patents internal API: +- Endpoint: `/xhr/query?url=` +- Returns JSON with `results.cluster[].result[].patent` structure +- No page rendering needed - pure JSON API + +## Chromium Version in nixpkgs + +- `nixos-24.11` branch: Chromium **138.0.7204.49** (same as current container) +- `nixos-unstable` branch: Chromium **148-149** (expected to fix the freeze) + +### Action Taken (2026-04-12) + +Changed `flake.nix` from `nixos-24.11` to `nixos-unstable`. Requires rebuild on host: + +```bash +nix flake lock --update-input nixpkgs +mise run build +mise run up +``` + +## Verification Plan + +After upgrading to Chromium 148+: +1. Test if normal `page.goto()` works without `navigate_safe` on search pages +2. If yes, consider making `navigate_safe` a fallback for older Chromium versions +3. If no, keep `navigate_safe` as the primary approach + +## Files Modified + +- `chrome-cdp/src/page.rs` — Added `navigate_safe()`, `capture_screenshot()`, `send_command()` +- `src/core/patent_search.rs` — Uses `navigate_safe` + `/xhr/query` API +- `src/core/scripts/stealth.js` — Created but **unused** (XHR patch is embedded in `CdpPage::new()`) +- `flake.nix` — Changed to `nixos-unstable` + +## Open Items + +- [ ] Rebuild Docker image with nixos-unstable and verify Chromium version +- [ ] Test if newer Chromium resolves the freeze without `navigate_safe` +- [ ] Commit and push changes to both `chrome-cdp` and `google-patent-cli` repos +- [ ] Clean up `stealth.js` if it remains unused diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..a622089 --- /dev/null +++ b/flake.lock @@ -0,0 +1,48 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1751274312, + "narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-24.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs", + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1775877051, + "narHash": "sha256-wpSQm2PD/w4uRo2wb8utk0b5hOBkkg/CZ1xICY+qB7M=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "08b4f3633471874c8894632ade1b78d75dbda002", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..151885f --- /dev/null +++ b/flake.nix @@ -0,0 +1,100 @@ +{ + description = "Google Patent CLI dev environment"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + }; + + outputs = { self, nixpkgs, rust-overlay }: + let + forAllSystems = nixpkgs.lib.genAttrs [ "x86_64-linux" "aarch64-linux" ]; + in + { + packages = forAllSystems (system: + let + pkgs = import nixpkgs { + inherit system; + overlays = [ rust-overlay.overlays.default ]; + config.allowUnfreePredicate = pkg: builtins.elem (nixpkgs.lib.getName pkg) [ "chromium" ]; + }; + + devPackages = with pkgs; [ + bashInteractive + zsh + zsh-completions + zsh-autosuggestions + zsh-syntax-highlighting + coreutils + findutils + gnugrep + gnutar + gzip + gnused + curl + gitMinimal + gh + cacert + ripgrep + unzip + jq + vim + nodejs_22 + sqlite + chromium + python3 + perl + gnumake + gcc + pkg-config + openssl.dev + lcov + (rust-bin.stable.latest.minimal.override { + extensions = [ "rustfmt-preview" "clippy-preview" ]; + }) + cargo-binstall + ]; + in + { + default = pkgs.dockerTools.buildLayeredImage { + name = "google-patent-cli"; + tag = "latest"; + contents = pkgs.buildEnv { + name = "image-root"; + paths = devPackages; + pathsToLink = [ "/bin" "/etc" "/lib" "/share" ]; + }; + fakeRootCommands = '' + mkdir -p ./home/user/.config ./workspaces ./tmp ./lib + chmod 1777 ./tmp + echo "user:x:1000:1000::/home/user:/bin/zsh" >> ./etc/passwd + echo "user:x:1000:" >> ./etc/group + chown -R 1000:1000 ./home/user + chmod 755 ./home/user + mkdir -p ./usr/bin + ln -sf /bin/env ./usr/bin/env + # Symlink chromium as google-chrome for compatibility + ln -sf /bin/chromium ./bin/google-chrome + for f in ${pkgs.glibc}/lib/ld-linux*.so*; do + ln -sf "$f" ./lib/$(basename "$f") + done + ''; + config = { + Env = [ + "LANG=C.UTF-8" + "LANGUAGE=C.UTF-8" + "LC_ALL=C.UTF-8" + "NIX_SSL_CERT_FILE=/etc/ssl/certs/ca-bundle.crt" + "HOME=/home/user" + ]; + User = "1000:1000"; + Cmd = [ "/bin/zsh" ]; + }; + }; + } + ); + }; +} diff --git a/mise.toml b/mise.toml index 3db2e03..1af9cbe 100644 --- a/mise.toml +++ b/mise.toml @@ -1,22 +1,49 @@ +[tasks.build] +description = "Build dev container image with nix" +file = "scripts/build.sh" + +[tasks.up] +description = "Start dev container" +file = "scripts/up.sh" + +[tasks.down] +description = "Stop and remove dev container" +run = "docker stop google-patent-cli && docker rm google-patent-cli" + +[tasks.attach] +description = "Attach to dev container" +run = "docker exec -it -w /workspaces/google-patent-cli google-patent-cli /bin/zsh" + +[tasks.setup] +description = "Setup environment inside running container (Rust, Claude CLI, etc.)" +run = "docker exec -u 1000 google-patent-cli bash /workspaces/google-patent-cli/scripts/setup.sh" + [tasks.fmt] +description = "Check formatting with cargo fmt" run = "cargo fmt --all -- --check" [tasks.clippy] +description = "Lint with cargo clippy" run = "cargo clippy --all-targets -- -D warnings" [tasks.test] +description = "Run tests with cargo test" run = "RUSTFLAGS=\"-D warnings\" cargo test --all-targets" [tasks.pre-commit] +description = "Run all of the above" depends = ["fmt", "clippy", "test"] [tasks.skill-test] +description = "Run skill-bench tests" run = "cargo install --path . && skill-bench run tests --plugin-dir claude-plugin --log logs" [tasks.skill-test-list] +description = "List skill-bench tests" run = "skill-bench list tests" - + [tasks.coverage] +description = "Generate test coverage report" run = """ eval "$(cargo llvm-cov show-env --sh)" cargo llvm-cov clean --workspace diff --git a/scripts/build.sh b/scripts/build.sh new file mode 100755 index 0000000..3a808d0 --- /dev/null +++ b/scripts/build.sh @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +ARCH=$(uname -m) +if [ "$ARCH" = "arm64" ]; then + NIX_SYSTEM="aarch64-linux" +else + NIX_SYSTEM="x86_64-linux" +fi + +NIX_FLAGS="--extra-experimental-features 'nix-command flakes'" + +docker volume create nix-store 2>/dev/null || true + +docker run --rm \ + -v "$(pwd):/workspace" \ + -v nix-store:/nix \ + -w /workspace \ + nixos/nix \ + sh -c " + git config --global --add safe.directory /workspace + nix $NIX_FLAGS build --no-link .#packages.${NIX_SYSTEM}.default + cat \$(nix $NIX_FLAGS path-info .#packages.${NIX_SYSTEM}.default) + " \ + | docker load diff --git a/scripts/setup.sh b/scripts/setup.sh new file mode 100755 index 0000000..8990da8 --- /dev/null +++ b/scripts/setup.sh @@ -0,0 +1,121 @@ +#!/bin/bash +set -e + +# Configure git using GitHub noreply email and credential helper +if command -v gh >/dev/null 2>&1 && gh auth status &>/dev/null; then + gh auth setup-git + GH_USER=$(gh api user --jq .login) + GH_ID=$(gh api user --jq .id) + git config --global user.name "$GH_USER" + git config --global user.email "${GH_ID}+${GH_USER}@users.noreply.github.com" + echo "Git configured as $GH_USER (noreply email)" +else + echo "Warning: GitHub CLI not authenticated, skipping git config" +fi + +# Install Rust via rustup +if ! command -v rustup >/dev/null 2>&1; then + echo "Installing Rust..." + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-tool stable --profile minimal + export PATH="$HOME/.cargo/bin:$PATH" + source "$HOME/.cargo/env" +else + echo "Rust already installed: $(rustc --version)" +fi + +# Add Rust components and tools +echo "Setting up Rust toolchain..." +rustup component add rustfmt clippy llvm-tools-preview +curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash +cargo binstall -y cargo-audit cargo-llvm-cov + +# Install Claude CLI +if ! command -v claude >/dev/null 2>&1; then + echo "Installing Claude CLI..." + curl -fsSL https://claude.ai/install.sh | bash + export PATH="$HOME/.local/bin:$PATH" +else + echo "Claude CLI already installed: $(claude --version)" +fi + +# Configure Claude +if [ -n "$Z_AI_API_KEY" ]; then + mkdir -p "$HOME/.claude" + cat > "$HOME/.claude/settings.json" </dev/null | head -1) +SYNTAX_HIGHLIGHTING=$(find / -path "*/zsh-syntax-highlighting/zsh-syntax-highlighting.zsh" 2>/dev/null | head -1) + +cat > "$HOME/.zshrc" </dev/null) || return + echo " (\$branch)" +} +PROMPT='%F{blue}%~%f%F{yellow}\$(parse_git_branch)%f +%F{green}❯%f ' +OUTER + +# Install mise +if ! command -v mise >/dev/null 2>&1; then + echo "Installing mise..." + curl -fsSL https://mise.run | bash + export PATH="$HOME/.local/bin:$PATH" +else + echo "mise already installed: $(mise --version)" +fi + +cd /workspaces/google-patent-cli +mise trust +mise install +mise generate git-pre-commit + +# Install skill-bench +echo "Installing skill-bench..." +curl -fsSL https://raw.githubusercontent.com/sonesuke/skill-bench/main/scripts/setup.sh | sh + +# Configure google-patent-cli for Docker +mkdir -p "$HOME/.config/google-patent-cli" +cat > "$HOME/.config/google-patent-cli/config.toml" << 'EOF' +# Chrome browser path +browser_path = "/bin/chromium" + +# Chrome arguments for Docker environment +chrome_args = [ + "--no-sandbox", + "--disable-setuid-sandbox", + "--disable-gpu" +] +EOF + +# Configure gh auth for git +if command -v gh >/dev/null 2>&1; then + echo "Configuring gh auth for git..." + gh auth setup-git +fi + +echo "Setup completed." diff --git a/scripts/up.sh b/scripts/up.sh new file mode 100755 index 0000000..b1395e0 --- /dev/null +++ b/scripts/up.sh @@ -0,0 +1,11 @@ +#!/bin/bash +set -e + +docker run -d \ + --name google-patent-cli \ + -v "$(pwd):/workspaces/google-patent-cli" \ + -v "${HOME}/.config/gh:/home/user/.config/gh" \ + -e Z_AI_API_KEY="${Z_AI_API_KEY}" \ + -e CLAUDE_CODE_EXPERIMENTAL_AGENT_TEAMS=1 \ + google-patent-cli:latest \ + sleep infinity diff --git a/src/core/patent_search.rs b/src/core/patent_search.rs index 8dba8d5..e0985ce 100644 --- a/src/core/patent_search.rs +++ b/src/core/patent_search.rs @@ -3,6 +3,74 @@ use crate::core::{BrowserManager, CdpPage}; use crate::core::{Error, Result}; use async_trait::async_trait; +// API response types for Google Patents /xhr/query endpoint +#[derive(serde::Deserialize)] +struct ApiResponse { + results: ApiResults, +} + +#[derive(serde::Deserialize)] +struct ApiResults { + total_num_results: u64, + cluster: Vec, +} + +#[derive(serde::Deserialize)] +struct ApiCluster { + result: Vec, +} + +#[derive(serde::Deserialize)] +struct ApiPatentEntry { + patent: ApiPatent, +} + +#[derive(serde::Deserialize)] +struct ApiPatent { + title: Option, + snippet: Option, + filing_date: Option, + assignee: Option, + publication_number: Option, +} + +fn convert_api_response(api: ApiResponse) -> SearchResult { + let patents = api + .results + .cluster + .iter() + .flat_map(|cluster| cluster.result.iter()) + .map(|entry| { + let p = &entry.patent; + let id = p.publication_number.clone().unwrap_or_default(); + Patent { + id: id.clone(), + title: p.title.clone().unwrap_or_default(), + abstract_text: None, + description_paragraphs: None, + claims: None, + images: None, + snippet: p.snippet.clone(), + description: None, + filing_date: p.filing_date.clone(), + assignee: p.assignee.clone(), + related_application: None, + claiming_priority: None, + family_applications: None, + legal_status: None, + url: format!("https://patents.google.com/patent/{}", id), + } + }) + .collect(); + + SearchResult { + total_results: api.results.total_num_results.to_string(), + top_assignees: None, + top_cpcs: None, + patents, + } +} + #[async_trait] pub trait PatentSearch: Send + Sync { async fn search(&self, options: &SearchOptions) -> Result; @@ -37,7 +105,7 @@ impl PatentSearcher { async fn search_internal(&self, options: &SearchOptions) -> Result { let browser = self.browser_manager.get_browser().await?; let page_ws_url = browser.new_page().await?; - let page = CdpPage::new(&page_ws_url).await?; + let page = CdpPage::new(&page_ws_url, std::time::Duration::from_secs(30)).await?; let base_url = options.to_url()?; @@ -52,6 +120,22 @@ impl PatentSearcher { } page.goto(&base_url).await?; + // Check for bot detection / rate limiting page + let title = page + .evaluate("document.title") + .await + .ok() + .and_then(|v| v.as_str().map(String::from)) + .unwrap_or_default(); + if title == "Sorry..." { + let _ = page.close().await; + return Err(Error::Search( + "Google blocked this request (bot detection / rate limiting). \ + The IP address may be temporarily blocked. Try again later." + .to_string(), + )); + } + if self.verbose { eprintln!("Waiting for page to load..."); } @@ -65,7 +149,7 @@ impl PatentSearcher { // Additional wait for description paragraphs, claims, and images to appear let _ = page .wait_for_element( - "div.description-line[num], div.claim[num], img[src*='patentimages']", + "div.description-paragraph[num], div.claim[num], img[src*='patentimages']", 15, ) .await?; @@ -90,7 +174,7 @@ impl PatentSearcher { patents, }) } else { - // Search results page - may need pagination + // Search results page - use navigate_safe to bypass webcomponents polyfill freeze let mut all_patents: Vec = Vec::new(); let limit = options.limit.unwrap_or(10); let mut total_results_str = "Unknown".to_string(); @@ -102,18 +186,13 @@ impl PatentSearcher { } // Append num=100 to base_url to fetch more results per page if needed - // This reduces the need for multiple page loads for limits <= 100 let base_url = if limit > 10 { format!("{}&num=100", base_url) } else { base_url }; - // Calculate how many pages we need to fetch - // With num=100, we get 100 results per page (if limit > 10) + // Calculate pagination let results_per_page = if limit > 10 { 100 } else { 10 }; let pages_needed = limit.div_ceil(results_per_page); for page_num in 0..pages_needed { - // Construct URL with page parameter - // Note: Google Patents generally uses start=N or page=N. - // With num=100, page=1 might give results 101-200. let page_url = if page_num == 0 { base_url.clone() } else { @@ -125,42 +204,74 @@ impl PatentSearcher { eprintln!("URL: {}", page_url); } - page.goto(&page_url).await?; + // Navigate with JS disabled to prevent webcomponents polyfill freeze, + // remove polyfill scripts, then re-enable JS for API fetch + let polyfill_selectors = [ + "script[src*='webcomponents']", + "link[rel='import']", + "script[src*='search-app']", + ]; + page.navigate_safe( + &page_url, + &polyfill_selectors, + std::time::Duration::from_secs(3), + ) + .await?; - // Wait for results to load - let loaded = page.wait_for_element(".search-result-item", 15).await?; - if !loaded { - // No results on this page, stop pagination - if self.verbose { - eprintln!("No results found on this page, stopping pagination."); - } + // Check for bot detection / rate limiting page + let title = page + .evaluate("document.title") + .await + .ok() + .and_then(|v| v.as_str().map(String::from)) + .unwrap_or_default(); + if title == "Sorry..." { let _ = page.close().await; - break; + return Err(Error::Search( + "Google blocked this request (bot detection / rate limiting). \ + The IP address may be temporarily blocked. Try again later." + .to_string(), + )); } + // Build API URL from the search URL + let api_path = + base_url.strip_prefix("https://patents.google.com/").unwrap_or(&base_url); + let api_url = format!("/xhr/query?url={}", api_path); + let fetch_script = format!( + r#"(async () => {{ + try {{ + const resp = await fetch("{}"); + if (!resp.ok) return {{ error: "HTTP " + resp.status }}; + return await resp.json(); + }} catch(e) {{ + return {{ error: e.message }}; + }} + }})()"#, + api_url + ); + + let api_result = page.evaluate(&fetch_script).await?; + if self.verbose { - eprintln!("Extracting search results from page..."); + if let Some(err) = api_result.get("error") { + eprintln!("API error: {}", err); + } else { + eprintln!("API response received"); + } } - let results = - page.evaluate(include_str!("scripts/extract_search_results.js")).await?; - - let sr: SearchResult = serde_json::from_value(results)?; + let sr = serde_json::from_value::(api_result) + .map_err(|e| Error::Search(format!("Failed to parse API response: {}", e))) + .map(convert_api_response)?; - // Only capture total results and summary data from the first page if page_num == 0 { total_results_str = sr.total_results.clone(); if self.verbose { eprintln!("Total results found: {}", total_results_str); } top_assignees = sr.top_assignees; - - // Two-step CPC extraction: click CPCs tab and wait for DOM update - let _ = page.evaluate(include_str!("scripts/click_cpcs_tab.js")).await?; - tokio::time::sleep(std::time::Duration::from_millis(500)).await; - let cpcs_result = - page.evaluate(include_str!("scripts/extract_cpcs.js")).await?; - top_cpcs = serde_json::from_value(cpcs_result).unwrap_or(None); + top_cpcs = sr.top_cpcs; } let page_patents = sr.patents; @@ -169,14 +280,12 @@ impl PatentSearcher { eprintln!("Found {} patents on this page", page_patents.len()); } - // If we got no results, stop pagination if page_patents.is_empty() { break; } all_patents.extend(page_patents); - // If we've collected enough results, stop if all_patents.len() >= limit { break; } @@ -187,7 +296,6 @@ impl PatentSearcher { eprintln!("Total patents collected: {}", all_patents.len()); } - // Truncate to exact limit if all_patents.len() > limit { if self.verbose { eprintln!("Truncating to limit: {}", limit); diff --git a/src/core/scripts/stealth.js b/src/core/scripts/stealth.js new file mode 100644 index 0000000..ab707e9 --- /dev/null +++ b/src/core/scripts/stealth.js @@ -0,0 +1,51 @@ +// Stealth evasions ported from puppeteer-extra-plugin-stealth +// Minimal set for bot detection avoidance + XHR hang workaround +(function() { + 'use strict'; + + // --- navigator.webdriver --- + try { + if (navigator.webdriver !== false && navigator.webdriver !== undefined) { + delete Object.getPrototypeOf(navigator).webdriver; + } + } catch(e) { + Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); + } + + // --- chrome object --- + if (!window.chrome) { + Object.defineProperty(window, 'chrome', { + writable: true, enumerable: true, configurable: false, value: {} + }); + } + window.chrome.runtime = { + OnInstalledReason: {}, OnRestartRequiredReason: {}, + PlatformArch: {}, PlatformNaclArch: {}, PlatformOs: {}, + RequestUpdateCheckStatus: {}, + connect: null, sendMessage: null, get id() { return undefined; } + }; + window.chrome.app = { + isInstalled: false, getDetails: function() { return null; }, + getIsInstalled: function() { return false; }, runningState: function() { return 'cannot_run'; } + }; + window.chrome.csi = function() { return { onloadT: Date.now(), startE: Date.now() }; }; + window.chrome.loadTimes = function() { return {}; }; + + // --- navigator properties --- + try { Object.defineProperty(navigator, 'languages', { get: () => Object.freeze(['en-US', 'en']) }); } catch(e) {} + try { Object.defineProperty(navigator, 'vendor', { get: () => 'Google Inc.' }); } catch(e) {} + try { Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 4 }); } catch(e) {} + + // --- Suppress dialogs --- + window.alert = function() {}; + window.confirm = function() { return true; }; + window.prompt = function() { return ''; }; + + // --- Force async XHR (critical: prevents webcomponents polyfill hang) --- + (function() { + var origOpen = XMLHttpRequest.prototype.open; + XMLHttpRequest.prototype.open = function(method, url, async, user, pass) { + return origOpen.call(this, method, url, true, user, pass); + }; + })(); +})(); From 4a836a11e283ef6850518108aa5ea7f12158f264 Mon Sep 17 00:00:00 2001 From: "Claude Sonnet 4.6" Date: Sun, 12 Apr 2026 03:06:43 +0000 Subject: [PATCH 2/2] chore: switch to nixos-unstable Co-Authored-By: Claude Opus 4.6 --- Cargo.lock | 15 ++-- Cargo.toml | 2 +- docs/chromium-freeze-investigation.md | 101 -------------------------- flake.lock | 8 +- flake.nix | 8 ++ scripts/setup.sh | 16 +--- src/core/patent_search.rs | 16 +--- src/core/scripts/extract_patent.js | 4 +- src/core/scripts/stealth.js | 51 ------------- 9 files changed, 26 insertions(+), 195 deletions(-) delete mode 100644 docs/chromium-freeze-investigation.md delete mode 100644 src/core/scripts/stealth.js diff --git a/Cargo.lock b/Cargo.lock index 1ab51f7..d351c27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -213,7 +213,7 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrome-cdp" version = "0.1.0" -source = "git+https://github.com/sonesuke/chrome-cdp?branch=fix%2Fwait-for-element-timeout#0bf5c32fe95f30864f20f9774c9b00df709b9654" +source = "git+https://github.com/sonesuke/chrome-cdp?branch=main#2f057a0e1a3bb875eda922727f306ebe688e94be" dependencies = [ "futures", "reqwest 0.13.2", @@ -221,7 +221,7 @@ dependencies = [ "serde_json", "thiserror 2.0.18", "tokio", - "tokio-tungstenite 0.28.0", + "tokio-tungstenite 0.29.0", "uuid", ] @@ -2223,14 +2223,14 @@ dependencies = [ [[package]] name = "tokio-tungstenite" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25a406cddcc431a75d3d9afc6a7c0f7428d4891dd973e4d54c56b46127bf857" +checksum = "8f72a05e828585856dacd553fba484c242c46e391fb0e58917c942ee9202915c" dependencies = [ "futures-util", "log", "tokio", - "tungstenite 0.28.0", + "tungstenite 0.29.0", ] [[package]] @@ -2388,9 +2388,9 @@ dependencies = [ [[package]] name = "tungstenite" -version = "0.28.0" +version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8628dcc84e5a09eb3d8423d6cb682965dea9133204e8fb3efee74c2a0c259442" +checksum = "6c01152af293afb9c7c2a57e4b559c5620b421f6d133261c60dd2d0cdb38e6b8" dependencies = [ "bytes", "data-encoding", @@ -2400,7 +2400,6 @@ dependencies = [ "rand", "sha1", "thiserror 2.0.18", - "utf-8", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4ae68b2..92b67bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,7 +31,7 @@ rmcp = { version = "0.16", features = ["server", "macros", "transport-io"] } async-trait = "0.1" thiserror = "2" schemars = "1.2" -chrome-cdp = { git = "https://github.com/sonesuke/chrome-cdp", branch = "fix/wait-for-element-timeout" } +chrome-cdp = { git = "https://github.com/sonesuke/chrome-cdp", branch = "main" } cypher-rs = { git = "https://github.com/sonesuke/cypher-rs" } [dependencies.openssl-sys] diff --git a/docs/chromium-freeze-investigation.md b/docs/chromium-freeze-investigation.md deleted file mode 100644 index 549c2d6..0000000 --- a/docs/chromium-freeze-investigation.md +++ /dev/null @@ -1,101 +0,0 @@ -# Chromium Headless Freeze Investigation - -## Problem - -Google Patents search page (`https://patents.google.com/?q=...`) freezes indefinitely when loaded in headless Chromium 138 on Linux/ARM container. The page never finishes loading. - -## Root Cause - -Google Patents uses Polymer framework with `webcomponents-lite.min.js` (deprecated, unmaintained since 2018). - -### Freeze Mechanism - -1. `webcomponents-lite.min.js` uses HTML Imports polyfill -2. HTML Imports polyfill fetches `search-app-vulcanized.html` via **synchronous XHR** (`XMLHttpRequest.open(method, url, false)`) -3. The polyfill calls `send()` and immediately busy-waits on `responseText` -4. In Chromium 138 headless on Linux/ARM, this synchronous XHR hangs indefinitely -5. The JS thread is blocked waiting for the XHR response that never arrives - -### Why XHR async patch doesn't work - -We tried forcing `XMLHttpRequest.open()` to always use `async=true`: - -```javascript -var origOpen = XMLHttpRequest.prototype.open; -XMLHttpRequest.prototype.open = function(method, url, async, user, pass) { - return origOpen.call(this, method, url, true, user, pass); -}; -``` - -This does NOT fix the freeze because: -- The polyfill calls `xhr.send()` then immediately reads `xhr.responseText` (blocking expectation) -- With async=true, the response arrives via callback (which needs the JS thread) -- But the JS thread is blocked by the polyfill's busy-wait loop -- **Deadlock**: polyfill waits for response → response callback needs JS thread → JS thread is blocked by polyfill - -## Platform Differences - -| Platform | Chromium Version | Headless | Result | -|---|---|---|---| -| Mac (user's machine) | 149 | Yes/No | Works | -| Linux/ARM container | 138 | Yes | Freezes | - -**Hypothesis**: Chromium fixed or improved sync XHR handling in versions between 138-149. Chromium 138's headless mode on Linux may handle sync XHR differently than newer versions or than the Mac build. - -## Current Workaround: `navigate_safe` - -Implemented in `chrome-cdp` library (`chrome-cdp/src/page.rs`): - -``` -1. Disable JavaScript via CDP (Emulation.setScriptExecutionDisabled) -2. Navigate to URL (JS won't execute, so polyfill never runs) -3. Wait for HTML to load -4. Use DOM CDP commands to remove polyfill elements (no JS needed) -5. Re-enable JavaScript -6. Fetch search results via /xhr/query API endpoint -``` - -This completely bypasses the polyfill freeze by never allowing the problematic JavaScript to execute during page load. - -### API Approach - -After `navigate_safe`, search results are fetched via Google Patents internal API: -- Endpoint: `/xhr/query?url=` -- Returns JSON with `results.cluster[].result[].patent` structure -- No page rendering needed - pure JSON API - -## Chromium Version in nixpkgs - -- `nixos-24.11` branch: Chromium **138.0.7204.49** (same as current container) -- `nixos-unstable` branch: Chromium **148-149** (expected to fix the freeze) - -### Action Taken (2026-04-12) - -Changed `flake.nix` from `nixos-24.11` to `nixos-unstable`. Requires rebuild on host: - -```bash -nix flake lock --update-input nixpkgs -mise run build -mise run up -``` - -## Verification Plan - -After upgrading to Chromium 148+: -1. Test if normal `page.goto()` works without `navigate_safe` on search pages -2. If yes, consider making `navigate_safe` a fallback for older Chromium versions -3. If no, keep `navigate_safe` as the primary approach - -## Files Modified - -- `chrome-cdp/src/page.rs` — Added `navigate_safe()`, `capture_screenshot()`, `send_command()` -- `src/core/patent_search.rs` — Uses `navigate_safe` + `/xhr/query` API -- `src/core/scripts/stealth.js` — Created but **unused** (XHR patch is embedded in `CdpPage::new()`) -- `flake.nix` — Changed to `nixos-unstable` - -## Open Items - -- [ ] Rebuild Docker image with nixos-unstable and verify Chromium version -- [ ] Test if newer Chromium resolves the freeze without `navigate_safe` -- [ ] Commit and push changes to both `chrome-cdp` and `google-patent-cli` repos -- [ ] Clean up `stealth.js` if it remains unused diff --git a/flake.lock b/flake.lock index a622089..1d78474 100644 --- a/flake.lock +++ b/flake.lock @@ -2,16 +2,16 @@ "nodes": { "nixpkgs": { "locked": { - "lastModified": 1751274312, - "narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=", + "lastModified": 1775710090, + "narHash": "sha256-ar3rofg+awPB8QXDaFJhJ2jJhu+KqN/PRCXeyuXR76E=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674", + "rev": "4c1018dae018162ec878d42fec712642d214fdfa", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-24.11", + "ref": "nixos-unstable", "repo": "nixpkgs", "type": "github" } diff --git a/flake.nix b/flake.nix index 151885f..e99c327 100644 --- a/flake.nix +++ b/flake.nix @@ -52,6 +52,10 @@ pkg-config openssl.dev lcov + # Chromium runtime dependencies + fontconfig + dbus + liberation_ttf (rust-bin.stable.latest.minimal.override { extensions = [ "rustfmt-preview" "clippy-preview" ]; }) @@ -81,6 +85,10 @@ for f in ${pkgs.glibc}/lib/ld-linux*.so*; do ln -sf "$f" ./lib/$(basename "$f") done + # Fontconfig setup for Chromium + mkdir -p ./etc/fonts + ln -sf ${pkgs.fontconfig.out}/etc/fonts/fonts.conf ./etc/fonts/fonts.conf + fc-cache -f 2>/dev/null || true ''; config = { Env = [ diff --git a/scripts/setup.sh b/scripts/setup.sh index 8990da8..62971df 100755 --- a/scripts/setup.sh +++ b/scripts/setup.sh @@ -13,20 +13,8 @@ else echo "Warning: GitHub CLI not authenticated, skipping git config" fi -# Install Rust via rustup -if ! command -v rustup >/dev/null 2>&1; then - echo "Installing Rust..." - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-tool stable --profile minimal - export PATH="$HOME/.cargo/bin:$PATH" - source "$HOME/.cargo/env" -else - echo "Rust already installed: $(rustc --version)" -fi - -# Add Rust components and tools -echo "Setting up Rust toolchain..." -rustup component add rustfmt clippy llvm-tools-preview -curl -L --proto '=https' --tlsv1.2 -sSf https://raw.githubusercontent.com/cargo-bins/cargo-binstall/main/install-from-binstall-release.sh | bash +# Install cargo tools (Rust is pre-installed in the image) +echo "Installing cargo tools..." cargo binstall -y cargo-audit cargo-llvm-cov # Install Claude CLI diff --git a/src/core/patent_search.rs b/src/core/patent_search.rs index e0985ce..b87b129 100644 --- a/src/core/patent_search.rs +++ b/src/core/patent_search.rs @@ -174,7 +174,7 @@ impl PatentSearcher { patents, }) } else { - // Search results page - use navigate_safe to bypass webcomponents polyfill freeze + // Search results page - fetch via /xhr/query API let mut all_patents: Vec = Vec::new(); let limit = options.limit.unwrap_or(10); let mut total_results_str = "Unknown".to_string(); @@ -204,19 +204,7 @@ impl PatentSearcher { eprintln!("URL: {}", page_url); } - // Navigate with JS disabled to prevent webcomponents polyfill freeze, - // remove polyfill scripts, then re-enable JS for API fetch - let polyfill_selectors = [ - "script[src*='webcomponents']", - "link[rel='import']", - "script[src*='search-app']", - ]; - page.navigate_safe( - &page_url, - &polyfill_selectors, - std::time::Duration::from_secs(3), - ) - .await?; + page.goto(&page_url).await?; // Check for bot detection / rate limiting page let title = page diff --git a/src/core/scripts/extract_patent.js b/src/core/scripts/extract_patent.js index 3affd18..5460e2a 100644 --- a/src/core/scripts/extract_patent.js +++ b/src/core/scripts/extract_patent.js @@ -18,7 +18,7 @@ const descParas = Array.from(document.querySelectorAll('div.description-paragraph[num]')).map(el => ({ number: el.getAttribute('num'), id: el.id, - text: el.innerText.trim() + text: (el.innerText.trim() || el.textContent.trim() || '') })); // Fallback for unstructured description (e.g., Japanese patents) @@ -77,7 +77,7 @@ const claimsArray = Array.from(document.querySelectorAll('div.claim[num]')).map(el => ({ number: el.getAttribute('num'), id: el.id, - text: el.innerText.trim() + text: (el.innerText.trim() || el.textContent.trim() || '') })); // Extract images diff --git a/src/core/scripts/stealth.js b/src/core/scripts/stealth.js deleted file mode 100644 index ab707e9..0000000 --- a/src/core/scripts/stealth.js +++ /dev/null @@ -1,51 +0,0 @@ -// Stealth evasions ported from puppeteer-extra-plugin-stealth -// Minimal set for bot detection avoidance + XHR hang workaround -(function() { - 'use strict'; - - // --- navigator.webdriver --- - try { - if (navigator.webdriver !== false && navigator.webdriver !== undefined) { - delete Object.getPrototypeOf(navigator).webdriver; - } - } catch(e) { - Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); - } - - // --- chrome object --- - if (!window.chrome) { - Object.defineProperty(window, 'chrome', { - writable: true, enumerable: true, configurable: false, value: {} - }); - } - window.chrome.runtime = { - OnInstalledReason: {}, OnRestartRequiredReason: {}, - PlatformArch: {}, PlatformNaclArch: {}, PlatformOs: {}, - RequestUpdateCheckStatus: {}, - connect: null, sendMessage: null, get id() { return undefined; } - }; - window.chrome.app = { - isInstalled: false, getDetails: function() { return null; }, - getIsInstalled: function() { return false; }, runningState: function() { return 'cannot_run'; } - }; - window.chrome.csi = function() { return { onloadT: Date.now(), startE: Date.now() }; }; - window.chrome.loadTimes = function() { return {}; }; - - // --- navigator properties --- - try { Object.defineProperty(navigator, 'languages', { get: () => Object.freeze(['en-US', 'en']) }); } catch(e) {} - try { Object.defineProperty(navigator, 'vendor', { get: () => 'Google Inc.' }); } catch(e) {} - try { Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 4 }); } catch(e) {} - - // --- Suppress dialogs --- - window.alert = function() {}; - window.confirm = function() { return true; }; - window.prompt = function() { return ''; }; - - // --- Force async XHR (critical: prevents webcomponents polyfill hang) --- - (function() { - var origOpen = XMLHttpRequest.prototype.open; - XMLHttpRequest.prototype.open = function(method, url, async, user, pass) { - return origOpen.call(this, method, url, true, user, pass); - }; - })(); -})();