From ce4c725c78195086834d33a4fb990c2aa1c79fa8 Mon Sep 17 00:00:00 2001 From: Dom Date: Sat, 13 Jun 2026 18:50:30 +0200 Subject: [PATCH 1/4] fix: post dogfood PR review comment via workflow_run --- .github/workflows/pr-review-comment.yml | 65 +++++++++++++++++++++++++ .github/workflows/pr-review.yml | 20 +++++++- action.yml | 11 +++++ docs/GITHUB_ACTION.md | 26 ++++++++-- 4 files changed, 116 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/pr-review-comment.yml diff --git a/.github/workflows/pr-review-comment.yml b/.github/workflows/pr-review-comment.yml new file mode 100644 index 00000000..d4f2b130 --- /dev/null +++ b/.github/workflows/pr-review-comment.yml @@ -0,0 +1,65 @@ +# Posts the sticky PR comment rendered by pr-review.yml. Split into a +# separate workflow_run-triggered workflow because GITHUB_TOKEN is read-only +# in pull_request runs from forks (observed on #555), so the unprivileged +# run cannot comment. This workflow never checks out or executes PR code, +# and it treats the artifact as untrusted input: the PR number it names is +# only honored if that PR's head is the exact commit the triggering run +# analyzed. +name: PR Review Comment + +on: + workflow_run: + workflows: ["PR Review"] + types: [completed] + +permissions: + pull-requests: write + +jobs: + comment: + runs-on: ubuntu-latest + if: github.event.workflow_run.conclusion == 'success' + steps: + - name: Download report artifact + uses: actions/download-artifact@v4 + with: + name: crg-report + run-id: ${{ github.event.workflow_run.id }} + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Upsert sticky PR comment + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + HEAD_SHA: ${{ github.event.workflow_run.head_sha }} + run: | + pr_number=$(head -n 1 pr-number.txt | tr -d '[:space:]') + case "${pr_number}" in + ''|*[!0-9]*) + echo "Missing or non-numeric PR number in artifact." >&2 + exit 1 + ;; + esac + # The artifact comes from an unprivileged (possibly fork) run. + # Only trust its PR number if that PR's head is the commit the + # triggering run actually analyzed. + actual_sha=$(gh api \ + "repos/${GITHUB_REPOSITORY}/pulls/${pr_number}" --jq .head.sha) + if [ "${actual_sha}" != "${HEAD_SHA}" ]; then + echo "PR ${pr_number} head (${actual_sha}) does not match" \ + "analyzed commit (${HEAD_SHA}); refusing to comment." >&2 + exit 1 + fi + marker='' + comment_id=$(gh api \ + "repos/${GITHUB_REPOSITORY}/issues/${pr_number}/comments" \ + --paginate \ + --jq ".[] | select(.body | contains(\"${marker}\")) | .id" | head -n 1) + if [ -n "${comment_id}" ]; then + gh api --method PATCH --silent \ + "repos/${GITHUB_REPOSITORY}/issues/comments/${comment_id}" \ + -F body=@crg-comment.md + else + gh api --method POST --silent \ + "repos/${GITHUB_REPOSITORY}/issues/${pr_number}/comments" \ + -F body=@crg-comment.md + fi diff --git a/.github/workflows/pr-review.yml b/.github/workflows/pr-review.yml index dde491b6..96c04901 100644 --- a/.github/workflows/pr-review.yml +++ b/.github/workflows/pr-review.yml @@ -1,4 +1,8 @@ # Dogfoods the local composite action (action.yml at the repo root) on PRs. +# Runs unprivileged: GITHUB_TOKEN is read-only in pull_request runs from +# forks, so the sticky comment is not posted here. Instead the rendered +# report is uploaded as an artifact and posted by pr-review-comment.yml +# (workflow_run, which has write permissions). name: PR Review on: @@ -7,7 +11,6 @@ on: permissions: contents: read - pull-requests: write jobs: review: @@ -15,7 +18,22 @@ jobs: steps: - uses: actions/checkout@v4 - name: Run code-review-graph review + id: review uses: ./ with: github-token: ${{ secrets.GITHUB_TOKEN }} + comment: "false" fail-on-risk: none + - name: Stage report artifact + env: + COMMENT_FILE: ${{ steps.review.outputs.comment-file }} + PR_NUMBER: ${{ github.event.pull_request.number }} + run: | + mkdir crg-report + cp "${COMMENT_FILE}" crg-report/crg-comment.md + echo "${PR_NUMBER}" > crg-report/pr-number.txt + - name: Upload report artifact + uses: actions/upload-artifact@v4 + with: + name: crg-report + path: crg-report/ diff --git a/action.yml b/action.yml index 79718127..f363c52a 100644 --- a/action.yml +++ b/action.yml @@ -32,6 +32,15 @@ inputs: required: false default: "3.12" +outputs: + comment-file: + description: >- + Runner-local path to the rendered markdown report. Useful with + `comment: false` to publish the report another way — e.g. upload it + as an artifact for a privileged workflow_run workflow to post on + fork PRs (see docs/GITHUB_ACTION.md, "Fork PRs"). + value: ${{ steps.render.outputs.comment-file }} + runs: using: "composite" steps: @@ -87,11 +96,13 @@ runs: > "${RUNNER_TEMP}/crg-report.json" - name: Render markdown report + id: render shell: bash run: | python "${GITHUB_ACTION_PATH}/scripts/render_pr_comment.py" \ --input "${RUNNER_TEMP}/crg-report.json" \ --output "${RUNNER_TEMP}/crg-comment.md" + echo "comment-file=${RUNNER_TEMP}/crg-comment.md" >> "${GITHUB_OUTPUT}" - name: Upsert sticky PR comment if: ${{ inputs.comment == 'true' && github.event_name == 'pull_request' }} diff --git a/docs/GITHUB_ACTION.md b/docs/GITHUB_ACTION.md index 53b66f0c..9e0fa988 100644 --- a/docs/GITHUB_ACTION.md +++ b/docs/GITHUB_ACTION.md @@ -78,6 +78,12 @@ security-sensitive names, caller count). The action maps it to levels: | high | 0.70 – 0.84 | | critical | ≥ 0.85 | +## Outputs + +| Output | Description | +|--------|-------------| +| `comment-file` | Runner-local path to the rendered markdown report. Useful with `comment: false` to publish the report another way — e.g. upload it as an artifact for a privileged `workflow_run` workflow to post on fork PRs (see "Fork PRs" below). | + ## What the comment contains - **Overall risk** score and level, with counts of changed functions, @@ -139,16 +145,26 @@ database) with `actions/cache`: - **Pinning**: when consuming the action from another repository, pin `uses:` to a release tag or commit SHA rather than `@main`. - **Fork PRs**: `pull_request` runs from forks receive a read-only - `GITHUB_TOKEN`, so the comment step will fail for fork PRs unless you use - `pull_request_target` — which checks out trusted base-branch workflow - code; understand [the security implications](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/) - before switching, or set `comment: false` for fork PRs. + `GITHUB_TOKEN`, so the comment step fails for fork PRs. The recommended + pattern: run the action with `comment: false`, upload the rendered report + (the `comment-file` output) as an artifact, and post it from a separate + `workflow_run`-triggered workflow with `pull-requests: write` — see + [`pr-review.yml`](../.github/workflows/pr-review.yml) and + [`pr-review-comment.yml`](../.github/workflows/pr-review-comment.yml) for + a reference implementation, which also verifies the artifact's PR number + against the triggering run's head SHA before posting. Avoid + `pull_request_target` with a checkout of PR code — it runs untrusted code + with a privileged token + ([details](https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/)). ## Dogfooding This repository runs the action on its own PRs via [`.github/workflows/pr-review.yml`](../.github/workflows/pr-review.yml), -which `uses: ./` (the local `action.yml`). +which `uses: ./` (the local `action.yml`). The review run is unprivileged +(`comment: false` + artifact upload) and the sticky comment is posted by +[`pr-review-comment.yml`](../.github/workflows/pr-review-comment.yml), so +fork PRs get review comments too. ## Rendering script From 5f0cb47023b265ab070ea3df4c8cd12d62f7ef09 Mon Sep 17 00:00:00 2001 From: Dom Date: Sat, 13 Jun 2026 19:08:18 +0200 Subject: [PATCH 2/4] fix: add actions: read permission to PR review comment workflow --- .github/workflows/pr-review-comment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pr-review-comment.yml b/.github/workflows/pr-review-comment.yml index d4f2b130..64a0cea4 100644 --- a/.github/workflows/pr-review-comment.yml +++ b/.github/workflows/pr-review-comment.yml @@ -14,6 +14,7 @@ on: permissions: pull-requests: write + actions: read jobs: comment: From 6ace2ef9bd081b8ecb58ae83f4b76865da960a13 Mon Sep 17 00:00:00 2001 From: Dom Date: Sun, 14 Jun 2026 13:29:36 +0200 Subject: [PATCH 3/4] fix: add issues: write permission --- .github/workflows/pr-review-comment.yml | 1 + code-review-graph-vscode/package-lock.json | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-review-comment.yml b/.github/workflows/pr-review-comment.yml index 64a0cea4..5fd3d427 100644 --- a/.github/workflows/pr-review-comment.yml +++ b/.github/workflows/pr-review-comment.yml @@ -15,6 +15,7 @@ on: permissions: pull-requests: write actions: read + issues: write jobs: comment: diff --git a/code-review-graph-vscode/package-lock.json b/code-review-graph-vscode/package-lock.json index ef5b18b6..2ad8d1ed 100644 --- a/code-review-graph-vscode/package-lock.json +++ b/code-review-graph-vscode/package-lock.json @@ -1879,7 +1879,6 @@ "resolved": "https://registry.npmjs.org/d3-selection/-/d3-selection-3.0.0.tgz", "integrity": "sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==", "license": "ISC", - "peer": true, "engines": { "node": ">=12" } From bc6614c86be7e9f543660abb3d23786ed5a4ec66 Mon Sep 17 00:00:00 2001 From: Dom Date: Sun, 14 Jun 2026 13:35:52 +0200 Subject: [PATCH 4/4] fix: gracefully handle tree-sitter >= 0.22 parser.parse string type --- code_review_graph/parser.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index c55b2e8f..f00abde1 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -851,6 +851,12 @@ def _get_parser(self, language: str): # type: ignore[arg-type] return None return self._parsers[language] + def _safe_parse(self, parser, source: bytes): + try: + return parser.parse(source) + except TypeError: + return parser.parse(source.decode("utf-8", errors="replace")) + def detect_language(self, path: Path) -> Optional[str]: """Map a file path to its language name. @@ -993,7 +999,7 @@ def parse_bytes(self, path: Path, source: bytes) -> tuple[list[NodeInfo], list[E if not parser: return [], [] - tree = parser.parse(source) + tree = self._safe_parse(parser, source) nodes: list[NodeInfo] = [] edges: list[EdgeInfo] = [] file_path_str = str(path) @@ -1052,7 +1058,7 @@ def _parse_vue( if not vue_parser: return [], [] - tree = vue_parser.parse(source) + tree = self._safe_parse(vue_parser, source) file_path_str = str(path) test_file = _is_test_file(file_path_str) @@ -1110,7 +1116,7 @@ def _parse_vue( if not script_parser: continue - script_tree = script_parser.parse(script_source) + script_tree = self._safe_parse(script_parser, script_source) # Collect imports and defined names from the script block import_map, defined_names = self._collect_file_scope( @@ -1174,7 +1180,7 @@ def _parse_svelte( if not svelte_parser: return [], [] - tree = svelte_parser.parse(source) + tree = self._safe_parse(svelte_parser, source) file_path_str = str(path) test_file = _is_test_file(file_path_str) @@ -1237,7 +1243,7 @@ def _parse_svelte( if not script_parser: continue - script_tree = script_parser.parse(script_source) + script_tree = self._safe_parse(script_parser, script_source) import_map, defined_names = self._collect_file_scope( script_tree.root_node, script_lang, script_source, ) @@ -1441,7 +1447,7 @@ def _parse_notebook_cells( concatenated = "\n".join(code_chunks) concat_bytes = concatenated.encode("utf-8") - tree = ts_parser.parse(concat_bytes) + tree = self._safe_parse(ts_parser, concat_bytes) import_map, defined_names = self._collect_file_scope( tree.root_node, lang, concat_bytes, @@ -2035,7 +2041,7 @@ def _parse_sql( # --- tree-sitter pass --- parser = self._get_parser("sql") if parser: - tree = parser.parse(source) + tree = self._safe_parse(parser, source) self._walk_sql_tree( tree.root_node, source, file_path_str, nodes, edges, ) @@ -5632,7 +5638,7 @@ def _resolve_exported_symbol( if not parser: return None - tree = parser.parse(source) + tree = self._safe_parse(parser, source) # Direct local definition/export in the module file. import_map, defined_names = self._collect_file_scope(