From ce4c725c78195086834d33a4fb990c2aa1c79fa8 Mon Sep 17 00:00:00 2001
From: Dom <dwojtaszek@gmail.com>
Date: Sat, 13 Jun 2026 18:50:30 +0200
Subject: [PATCH 1/4] fix: post dogfood PR review comment via workflow_run

---
 .github/workflows/pr-review-comment.yml | 65 +++++++++++++++++++++++++
 .github/workflows/pr-review.yml         | 20 +++++++-
 action.yml                              | 11 +++++
 docs/GITHUB_ACTION.md                   | 26 ++++++++--
 4 files changed, 116 insertions(+), 6 deletions(-)
 create mode 100644 .github/workflows/pr-review-comment.yml

diff --git a/.github/workflows/pr-review-comment.yml b/.github/workflows/pr-review-comment.yml
new file mode 100644
index 00000000..d4f2b130
--- /dev/null
+++ b/.github/workflows/pr-review-comment.yml
@@ -0,0 +1,65 @@
+# Posts the sticky PR comment rendered by pr-review.yml. Split into a
+# separate workflow_run-triggered workflow because GITHUB_TOKEN is read-only
+# in pull_request runs from forks (observed on #555), so the unprivileged
+# run cannot comment. This workflow never checks out or executes PR code,
+# and it treats the artifact as untrusted input: the PR number it names is
+# only honored if that PR's head is the exact commit the triggering run
+# analyzed.
+name: PR Review Comment
+
+on:
+  workflow_run:
+    workflows: ["PR Review"]
+    types: [completed]
+
+permissions:
+  pull-requests: write
+
+jobs:
+  comment:
+    runs-on: ubuntu-latest
+    if: github.event.workflow_run.conclusion == 'success'
+    steps:
+      - name: Download report artifact
+        uses: actions/download-artifact@v4
+        with:
+          name: crg-report
+          run-id: ${{ github.event.workflow_run.id }}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Upsert sticky PR comment
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          HEAD_SHA: ${{ github.event.workflow_run.head_sha }}
+        run: |
+          pr_number=$(head -n 1 pr-number.txt | tr -d '[:space:]')
+          case "${pr_number}" in
+            ''|*[!0-9]*)
+              echo "Missing or non-numeric PR number in artifact." >&2
+              exit 1
+              ;;
+          esac
+          # The artifact comes from an unprivileged (possibly fork) run.
+          # Only trust its PR number if that PR's head is the commit the
+          # triggering run actually analyzed.
+          actual_sha=$(gh api \
+            "repos/${GITHUB_REPOSITORY}/pulls/${pr_number}" --jq .head.sha)
+          if [ "${actual_sha}" != "${HEAD_SHA}" ]; then
+            echo "PR ${pr_number} head (${actual_sha}) does not match" \
+              "analyzed commit (${HEAD_SHA}); refusing to comment." >&2
+            exit 1
+          fi
+          marker='<!-- code-review-graph-report -->'
+          comment_id=$(gh api \
+            "repos/${GITHUB_REPOSITORY}/issues/${pr_number}/comments" \
+            --paginate \
+            --jq ".[] | select(.body | contains(\"${marker}\")) | .id" | head -n 1)
+          if [ -n "${comment_id}" ]; then
+            gh api --method PATCH --silent \
+              "repos/${GITHUB_REPOSITORY}/issues/comments/${comment_id}" \
+              -F body=@crg-comment.md
+          else
+            gh api --method POST --silent \
+              "repos/${GITHUB_REPOSITORY}/issues/${pr_number}/comments" \
+              -F body=@crg-comment.md
+          fi
diff --git a/.github/workflows/pr-review.yml b/.github/workflows/pr-review.yml
index dde491b6..96c04901 100644
--- a/.github/workflows/pr-review.yml
+++ b/.github/workflows/pr-review.yml
@@ -1,4 +1,8 @@
 # Dogfoods the local composite action (action.yml at the repo root) on PRs.
+# Runs unprivileged: GITHUB_TOKEN is read-only in pull_request runs from
+# forks, so the sticky comment is not posted here. Instead the rendered
+# report is uploaded as an artifact and posted by pr-review-comment.yml
+# (workflow_run, which has write permissions).
 name: PR Review
 
 on:
@@ -7,7 +11,6 @@ on:
 
 permissions:
   contents: read
-  pull-requests: write
 
 jobs:
   review:
@@ -15,7 +18,22 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Run code-review-graph review
+        id: review
         uses: ./
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
+          comment: "false"
           fail-on-risk: none
+      - name: Stage report artifact
+        env:
+          COMMENT_FILE: ${{ steps.review.outputs.comment-file }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+        run: |
+          mkdir crg-report
+          cp "${COMMENT_FILE}" crg-report/crg-comment.md
+          echo "${PR_NUMBER}" > crg-report/pr-number.txt
+      - name: Upload report artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: crg-report
+          path: crg-report/
diff --git a/action.yml b/action.yml
index 79718127..f363c52a 100644
--- a/action.yml
+++ b/action.yml
@@ -32,6 +32,15 @@ inputs:
     required: false
     default: "3.12"
 
+outputs:
+  comment-file:
+    description: >-
+      Runner-local path to the rendered markdown report. Useful with
+      `comment: false` to publish the report another way — e.g. upload it
+      as an artifact for a privileged workflow_run workflow to post on
+      fork PRs (see docs/GITHUB_ACTION.md, "Fork PRs").
+    value: ${{ steps.render.outputs.comment-file }}
+
 runs:
   using: "composite"
   steps:
@@ -87,11 +96,13 @@ runs:
           > "${RUNNER_TEMP}/crg-report.json"
 
     - name: Render markdown report
+      id: render
       shell: bash
       run: |
         python "${GITHUB_ACTION_PATH}/scripts/render_pr_comment.py" \
           --input "${RUNNER_TEMP}/crg-report.json" \
           --output "${RUNNER_TEMP}/crg-comment.md"
+        echo "comment-file=${RUNNER_TEMP}/crg-comment.md" >> "${GITHUB_OUTPUT}"
 
     - name: Upsert sticky PR comment
       if: ${{ inputs.comment == 'true' && github.event_name == 'pull_request' }}
diff --git a/docs/GITHUB_ACTION.md b/docs/GITHUB_ACTION.md
index 53b66f0c..9e0fa988 100644
--- a/docs/GITHUB_ACTION.md
+++ b/docs/GITHUB_ACTION.md
@@ -78,6 +78,12 @@ security-sensitive names, caller count). The action maps it to levels:
 | high | 0.70 – 0.84 |
 | critical | ≥ 0.85 |
 
+## Outputs
+
+| Output | Description |
+|--------|-------------|
+| `comment-file` | Runner-local path to the rendered markdown report. Useful with `comment: false` to publish the report another way — e.g. upload it as an artifact for a privileged `workflow_run` workflow to post on fork PRs (see "Fork PRs" below). |
+
 ## What the comment contains
 
 - **Overall risk** score and level, with counts of changed functions,
@@ -139,16 +145,26 @@ database) with `actions/cache`:
 - **Pinning**: when consuming the action from another repository, pin
   `uses:` to a release tag or commit SHA rather than `@main`.
 - **Fork PRs**: `pull_request` runs from forks receive a read-only
-  `GITHUB_TOKEN`, so the comment step will fail for fork PRs unless you use
-  `pull_request_target` — which checks out trusted base-branch workflow
-  code; understand [the security implications](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/)
-  before switching, or set `comment: false` for fork PRs.
+  `GITHUB_TOKEN`, so the comment step fails for fork PRs. The recommended
+  pattern: run the action with `comment: false`, upload the rendered report
+  (the `comment-file` output) as an artifact, and post it from a separate
+  `workflow_run`-triggered workflow with `pull-requests: write` — see
+  [`pr-review.yml`](../.github/workflows/pr-review.yml) and
+  [`pr-review-comment.yml`](../.github/workflows/pr-review-comment.yml) for
+  a reference implementation, which also verifies the artifact's PR number
+  against the triggering run's head SHA before posting. Avoid
+  `pull_request_target` with a checkout of PR code — it runs untrusted code
+  with a privileged token
+  ([details](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/)).
 
 ## Dogfooding
 
 This repository runs the action on its own PRs via
 [`.github/workflows/pr-review.yml`](../.github/workflows/pr-review.yml),
-which `uses: ./` (the local `action.yml`).
+which `uses: ./` (the local `action.yml`). The review run is unprivileged
+(`comment: false` + artifact upload) and the sticky comment is posted by
+[`pr-review-comment.yml`](../.github/workflows/pr-review-comment.yml), so
+fork PRs get review comments too.
 
 ## Rendering script
 

From 5f0cb47023b265ab070ea3df4c8cd12d62f7ef09 Mon Sep 17 00:00:00 2001
From: Dom <dwojtaszek@gmail.com>
Date: Sat, 13 Jun 2026 19:08:18 +0200
Subject: [PATCH 2/4] fix: add actions: read permission to PR review comment
 workflow

---
 .github/workflows/pr-review-comment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pr-review-comment.yml b/.github/workflows/pr-review-comment.yml
index d4f2b130..64a0cea4 100644
--- a/.github/workflows/pr-review-comment.yml
+++ b/.github/workflows/pr-review-comment.yml
@@ -14,6 +14,7 @@ on:
 
 permissions:
   pull-requests: write
+  actions: read
 
 jobs:
   comment:

From 6ace2ef9bd081b8ecb58ae83f4b76865da960a13 Mon Sep 17 00:00:00 2001
From: Dom <dwojtaszek@gmail.com>
Date: Sun, 14 Jun 2026 13:29:36 +0200
Subject: [PATCH 3/4] fix: add issues: write permission

---
 .github/workflows/pr-review-comment.yml    | 1 +
 code-review-graph-vscode/package-lock.json | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-review-comment.yml b/.github/workflows/pr-review-comment.yml
index 64a0cea4..5fd3d427 100644
--- a/.github/workflows/pr-review-comment.yml
+++ b/.github/workflows/pr-review-comment.yml
@@ -15,6 +15,7 @@ on:
 permissions:
   pull-requests: write
   actions: read
+  issues: write
 
 jobs:
   comment:
diff --git a/code-review-graph-vscode/package-lock.json b/code-review-graph-vscode/package-lock.json
index ef5b18b6..2ad8d1ed 100644
--- a/code-review-graph-vscode/package-lock.json
+++ b/code-review-graph-vscode/package-lock.json
@@ -1879,7 +1879,6 @@
       "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz",
       "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==",
       "license": "ISC",
-      "peer": true,
       "engines": {
         "node": ">=12"
       }

From bc6614c86be7e9f543660abb3d23786ed5a4ec66 Mon Sep 17 00:00:00 2001
From: Dom <dwojtaszek@gmail.com>
Date: Sun, 14 Jun 2026 13:35:52 +0200
Subject: [PATCH 4/4] fix: gracefully handle tree-sitter >= 0.22 parser.parse
 string type

---
 code_review_graph/parser.py | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py
index c55b2e8f..f00abde1 100644
--- a/code_review_graph/parser.py
+++ b/code_review_graph/parser.py
@@ -851,6 +851,12 @@ def _get_parser(self, language: str):  # type: ignore[arg-type]
                 return None
         return self._parsers[language]
 
+    def _safe_parse(self, parser, source: bytes):
+        try:
+            return parser.parse(source)
+        except TypeError:
+            return parser.parse(source.decode("utf-8", errors="replace"))
+
     def detect_language(self, path: Path) -> Optional[str]:
         """Map a file path to its language name.
 
@@ -993,7 +999,7 @@ def parse_bytes(self, path: Path, source: bytes) -> tuple[list[NodeInfo], list[E
         if not parser:
             return [], []
 
-        tree = parser.parse(source)
+        tree = self._safe_parse(parser, source)
         nodes: list[NodeInfo] = []
         edges: list[EdgeInfo] = []
         file_path_str = str(path)
@@ -1052,7 +1058,7 @@ def _parse_vue(
         if not vue_parser:
             return [], []
 
-        tree = vue_parser.parse(source)
+        tree = self._safe_parse(vue_parser, source)
         file_path_str = str(path)
         test_file = _is_test_file(file_path_str)
 
@@ -1110,7 +1116,7 @@ def _parse_vue(
             if not script_parser:
                 continue
 
-            script_tree = script_parser.parse(script_source)
+            script_tree = self._safe_parse(script_parser, script_source)
 
             # Collect imports and defined names from the script block
             import_map, defined_names = self._collect_file_scope(
@@ -1174,7 +1180,7 @@ def _parse_svelte(
         if not svelte_parser:
             return [], []
 
-        tree = svelte_parser.parse(source)
+        tree = self._safe_parse(svelte_parser, source)
         file_path_str = str(path)
         test_file = _is_test_file(file_path_str)
 
@@ -1237,7 +1243,7 @@ def _parse_svelte(
             if not script_parser:
                 continue
 
-            script_tree = script_parser.parse(script_source)
+            script_tree = self._safe_parse(script_parser, script_source)
             import_map, defined_names = self._collect_file_scope(
                 script_tree.root_node, script_lang, script_source,
             )
@@ -1441,7 +1447,7 @@ def _parse_notebook_cells(
             concatenated = "\n".join(code_chunks)
             concat_bytes = concatenated.encode("utf-8")
 
-            tree = ts_parser.parse(concat_bytes)
+            tree = self._safe_parse(ts_parser, concat_bytes)
 
             import_map, defined_names = self._collect_file_scope(
                 tree.root_node, lang, concat_bytes,
@@ -2035,7 +2041,7 @@ def _parse_sql(
         # --- tree-sitter pass ---
         parser = self._get_parser("sql")
         if parser:
-            tree = parser.parse(source)
+            tree = self._safe_parse(parser, source)
             self._walk_sql_tree(
                 tree.root_node, source, file_path_str, nodes, edges,
             )
@@ -5632,7 +5638,7 @@ def _resolve_exported_symbol(
         if not parser:
             return None
 
-        tree = parser.parse(source)
+        tree = self._safe_parse(parser, source)
 
         # Direct local definition/export in the module file.
         import_map, defined_names = self._collect_file_scope(