From 5f1efe793942e7dfb8f597b1b4196c3c4340ae0f Mon Sep 17 00:00:00 2001 From: lining Date: Thu, 18 Jun 2026 17:20:56 +0800 Subject: [PATCH] fix: add errors='replace' to all subprocess.run calls in incremental.py Prevents UnicodeDecodeError when git output contains non-UTF-8 or truncated multi-byte characters (e.g., branch names with CJK characters that get truncated by git rev-parse --abbrev-ref HEAD). The _git_branch_info function was crashing at full_build because it lacked errors='replace' on subprocess.run() calls, while other functions in the same file (e.g., _get_svn_changed_files, _read_ignore_file) already used errors='replace'. Also adds UnicodeDecodeError to except clauses so that any remaining decode failures are handled gracefully rather than crashing the build. Closes #302 --- code_review_graph/incremental.py | 34 +++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/code_review_graph/incremental.py b/code_review_graph/incremental.py index 81dc3026..36b15c08 100644 --- a/code_review_graph/incremental.py +++ b/code_review_graph/incremental.py @@ -416,25 +416,27 @@ def _git_branch_info(repo_root: Path) -> tuple[str, str]: result = subprocess.run( ["git", "rev-parse", "--abbrev-ref", "HEAD"], capture_output=True, - text=True, encoding='utf-8', cwd=str(repo_root), + text=True, encoding='utf-8', errors='replace', + cwd=str(repo_root), timeout=_GIT_TIMEOUT, stdin=subprocess.DEVNULL, ) if result.returncode == 0: branch = result.stdout.strip() - except (subprocess.TimeoutExpired, FileNotFoundError): + except (subprocess.TimeoutExpired, FileNotFoundError, UnicodeDecodeError): pass try: result = subprocess.run( ["git", "rev-parse", "HEAD"], capture_output=True, - text=True, encoding='utf-8', cwd=str(repo_root), + text=True, encoding='utf-8', errors='replace', + cwd=str(repo_root), timeout=_GIT_TIMEOUT, stdin=subprocess.DEVNULL, ) if result.returncode == 0: sha = result.stdout.strip() - except (subprocess.TimeoutExpired, FileNotFoundError): + except (subprocess.TimeoutExpired, FileNotFoundError, UnicodeDecodeError): pass return branch, sha @@ -508,7 +510,8 @@ def get_changed_files(repo_root: Path, base: str = "HEAD~1") -> list[str]: result = subprocess.run( ["git", "diff", "--name-only", base, "--"], capture_output=True, - text=True, encoding='utf-8', cwd=str(repo_root), + text=True, encoding='utf-8', errors='replace', + cwd=str(repo_root), timeout=_GIT_TIMEOUT, stdin=subprocess.DEVNULL, ) @@ -517,16 +520,16 @@ def get_changed_files(repo_root: Path, base: str = "HEAD~1") -> list[str]: result = subprocess.run( ["git", "diff", "--name-only", "--cached"], capture_output=True, - text=True, encoding='utf-8', cwd=str(repo_root), + text=True, encoding='utf-8', errors='replace', + cwd=str(repo_root), timeout=_GIT_TIMEOUT, stdin=subprocess.DEVNULL, ) files = [f.strip() for f in result.stdout.splitlines() if f.strip()] return files - except (FileNotFoundError, subprocess.TimeoutExpired): + except (FileNotFoundError, subprocess.TimeoutExpired, UnicodeDecodeError): return [] - def _get_svn_changed_files(repo_root: Path, rev_range: str | None = None) -> list[str]: """Return changed files in an SVN working copy. @@ -570,10 +573,9 @@ def _get_svn_changed_files(repo_root: Path, rev_range: str | None = None) -> lis path = line[8:].strip() if len(line) > 8 else line[1:].strip() files.append(path) return files - except (FileNotFoundError, subprocess.TimeoutExpired): + except (FileNotFoundError, subprocess.TimeoutExpired, UnicodeDecodeError): return [] - def get_staged_and_unstaged(repo_root: Path) -> list[str]: """Get all modified files (staged + unstaged + untracked).""" if detect_vcs(repo_root) == "svn": @@ -582,7 +584,8 @@ def get_staged_and_unstaged(repo_root: Path) -> list[str]: result = subprocess.run( ["git", "status", "--porcelain"], capture_output=True, - text=True, encoding='utf-8', cwd=str(repo_root), + text=True, encoding='utf-8', errors='replace', + cwd=str(repo_root), timeout=_GIT_TIMEOUT, stdin=subprocess.DEVNULL, ) @@ -595,10 +598,9 @@ def get_staged_and_unstaged(repo_root: Path) -> list[str]: entry = entry.split(" -> ", 1)[1] files.append(entry) return files - except (FileNotFoundError, subprocess.TimeoutExpired): + except (FileNotFoundError, subprocess.TimeoutExpired, UnicodeDecodeError): return [] - def get_all_tracked_files( repo_root: Path, recurse_submodules: bool | None = None, @@ -627,15 +629,15 @@ def get_all_tracked_files( result = subprocess.run( cmd, capture_output=True, - text=True, encoding='utf-8', cwd=str(repo_root), + text=True, encoding='utf-8', errors='replace', + cwd=str(repo_root), timeout=_GIT_TIMEOUT, stdin=subprocess.DEVNULL, ) return [f.strip() for f in result.stdout.splitlines() if f.strip()] - except (FileNotFoundError, subprocess.TimeoutExpired): + except (FileNotFoundError, subprocess.TimeoutExpired, UnicodeDecodeError): return [] - def _get_svn_all_tracked_files(repo_root: Path) -> list[str]: """Return SVN-versioned files by walking the working copy.