Skip to content
152 changes: 152 additions & 0 deletions webhook_server/libs/handlers/runner_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,151 @@ async def is_branch_exists(self, branch: str) -> bool:
return False
raise

async def _find_signoff_source(
self,
pull_request: PullRequest,
) -> tuple[Any, str | None]:
"""Find a commit with a Signed-off-by trailer and return it with the sign-off email.

Checks the merge commit first (squash-merge). If the merge commit has no
Signed-off-by, falls back to scanning the PR's individual commits in
reverse order (regular merge).

Returns (source_commit, signoff_email) or (None, None) if no sign-off found.
"""
# Try the merge commit first (squash-merge case)
merge_sha = await github_api_call(
lambda: pull_request.merge_commit_sha, logger=self.logger, log_prefix=self.log_prefix
)
if merge_sha:
merge_commit = await github_api_call(
self.github_webhook.repository.get_commit, merge_sha, logger=self.logger, log_prefix=self.log_prefix
)
commit_msg = await github_api_call(
lambda: merge_commit.commit.message, logger=self.logger, log_prefix=self.log_prefix
)
signoff_match = re.findall(r"Signed-off-by:\s*(.+?)\s*<([^>]+)>", commit_msg)
if signoff_match:
return merge_commit, signoff_match[-1][1]

# Fall back to PR commits (regular merge case)
commits = await github_api_call(
lambda: list(pull_request.get_commits()), logger=self.logger, log_prefix=self.log_prefix
)
for commit in reversed(commits):
commit_msg = await github_api_call(
lambda c=commit: c.commit.message, logger=self.logger, log_prefix=self.log_prefix
)
signoff_match = re.findall(r"Signed-off-by:\s*(.+?)\s*<([^>]+)>", commit_msg)
if signoff_match:
return commit, signoff_match[-1][1]
Comment on lines +761 to +775
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

MEDIUM: Match actual Signed-off-by trailer lines only.

The current regex matches Signed-off-by: anywhere in the message body, so explanatory text can be misclassified as a DCO trailer and trigger an incorrect amend. Anchor it to full lines.

Suggested fix
-            signoff_match = re.findall(r"Signed-off-by:\s*(.+?)\s*<([^>]+)>", commit_msg)
+            signoff_match = re.findall(r"(?im)^Signed-off-by:\s*(.+?)\s*<([^>\n]+)>\s*$", commit_msg)
@@
-            signoff_match = re.findall(r"Signed-off-by:\s*(.+?)\s*<([^>]+)>", commit_msg)
+            signoff_match = re.findall(r"(?im)^Signed-off-by:\s*(.+?)\s*<([^>\n]+)>\s*$", commit_msg)
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
signoff_match = re.findall(r"Signed-off-by:\s*(.+?)\s*<([^>]+)>", commit_msg)
if signoff_match:
return merge_commit, signoff_match[-1][1]
# Fall back to PR commits (regular merge case)
commits = await github_api_call(
lambda: list(pull_request.get_commits()), logger=self.logger, log_prefix=self.log_prefix
)
for commit in reversed(commits):
commit_msg = await github_api_call(
lambda c=commit: c.commit.message, logger=self.logger, log_prefix=self.log_prefix
)
signoff_match = re.findall(r"Signed-off-by:\s*(.+?)\s*<([^>]+)>", commit_msg)
if signoff_match:
return commit, signoff_match[-1][1]
signoff_match = re.findall(r"(?im)^Signed-off-by:\s*(.+?)\s*<([^>\n]+)>\s*$", commit_msg)
if signoff_match:
return merge_commit, signoff_match[-1][1]
# Fall back to PR commits (regular merge case)
commits = await github_api_call(
lambda: list(pull_request.get_commits()), logger=self.logger, log_prefix=self.log_prefix
)
for commit in reversed(commits):
commit_msg = await github_api_call(
lambda c=commit: c.commit.message, logger=self.logger, log_prefix=self.log_prefix
)
signoff_match = re.findall(r"(?im)^Signed-off-by:\s*(.+?)\s*<([^>\n]+)>\s*$", commit_msg)
if signoff_match:
return commit, signoff_match[-1][1]
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@webhook_server/libs/handlers/runner_handler.py` around lines 761 - 775, The
current regex that looks for Signed-off-by in commit messages (used when
inspecting merge_commit and later in the commits loop via the signoff_match
variable and commit_msg) can match text anywhere in the message body; update
both uses to only match trailer lines by anchoring to line boundaries and
enabling multiline mode (use a regex like ^Signed-off-by: ... with re.MULTILINE)
so only true DCO trailer lines are returned; make the same change for the
merge_commit block and the for-commit loop that uses github_api_call and
signoff_match.


return None, None

async def _restore_original_author_for_cherry_pick(
self,
pull_request: PullRequest,
git_cmd: str,
github_token: str,
) -> bool:
"""Amend cherry-picked commit to restore the original PR author for DCO compliance.

GitHub squash-merges rewrite the author email to the noreply format
(e.g., 86722603+user@users.noreply.github.com). When git cherry-pick
replays such a commit, the DCO check fails because the author email
no longer matches the Signed-off-by trailer.

This method first checks the merge commit (via ``pull_request.merge_commit_sha``)
for a Signed-off-by trailer (squash-merge case). If not found, it falls back
to scanning the PR's individual commits (regular merge case).

The author identity is built from the source commit's git author name
(preserved by GitHub) combined with the email from the Signed-off-by trailer.

Returns True if the commit was amended, False if no amendment was needed or possible.
"""
try:
# Try merge commit first (squash-merge), fall back to PR commits (regular merge)
source_commit, signoff_email = await self._find_signoff_source(pull_request)
if not source_commit or not signoff_email:
self.logger.debug(f"{self.log_prefix} No Signed-off-by found, skipping author restore")
return False

# Author name from the source commit's git author (GitHub preserves the display name)
# Author email from the Signed-off-by trailer (commit email may be noreply)
author_name = await github_api_call(
lambda: source_commit.commit.author.name, logger=self.logger, log_prefix=self.log_prefix
)
author_email = signoff_email

author_spec = f"{author_name} <{author_email}>"
redact_list = [github_token, author_spec, author_email, author_name]

# Check if the cherry-picked commit author already matches (both name and email)
rc, current_author_info, _ = await run_command(
command=f"{git_cmd} log -1 --format=%an%n%ae",
log_prefix=self.log_prefix,
redact_secrets=redact_list,
mask_sensitive=self.github_webhook.mask_sensitive,
)
if not rc:
self.logger.warning(
f"{self.log_prefix} Could not read current author info, proceeding with author amend"
)
else:
info_lines = current_author_info.strip().splitlines()
if len(info_lines) == 2 and info_lines[0] == author_name and info_lines[1] == author_email:
self.logger.debug(f"{self.log_prefix} Author already matches original PR commit, no amend needed")
return False

# Read the current commit message to fix Signed-off-by trailers
rc, current_msg, _ = await run_command(
command=f"{git_cmd} log -1 --format=%B",
log_prefix=self.log_prefix,
redact_secrets=redact_list,
mask_sensitive=self.github_webhook.mask_sensitive,
)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
amended_msg: str | None = None
if not rc:
self.logger.warning(f"{self.log_prefix} Could not read commit message, amending author only")
else:
# Remove all existing Signed-off-by trailers and add the correct one
msg_lines = current_msg.rstrip().splitlines()
filtered_lines = [line for line in msg_lines if not re.match(r"Signed-off-by:\s*", line)]
while filtered_lines and not filtered_lines[-1].strip():
filtered_lines.pop()
filtered_lines.append("")
filtered_lines.append(f"Signed-off-by: {author_name} <{author_email}>")
amended_msg = "\n".join(filtered_lines) + "\n"

# Amend the commit author and optionally the message
msg_flag = f"-m {shlex.quote(amended_msg)}" if amended_msg else "--no-edit"
Comment on lines +818 to +856
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

HIGH: Don’t return before normalizing the sign-off trailer.

Line 833 skips the message rewrite whenever author name/email already match. That leaves short-name trailers such as Signed-off-by: rnetser <...> untouched, even though the method is meant to synthesize a single trailer from the git author identity for DCO compliance.

Suggested fix
+            needs_author_amend = True
             # Check if the cherry-picked commit author already matches (both name and email)
             rc, current_author_info, _ = await run_command(
                 command=f"{git_cmd} log -1 --format=%an%n%ae",
                 log_prefix=self.log_prefix,
                 redact_secrets=redact_list,
@@
             else:
                 info_lines = current_author_info.strip().splitlines()
                 if len(info_lines) == 2 and info_lines[0] == author_name and info_lines[1] == author_email:
-                    self.logger.debug(f"{self.log_prefix} Author already matches original PR commit, no amend needed")
-                    return False
+                    needs_author_amend = False
 
             # Read the current commit message to fix Signed-off-by trailers
             rc, current_msg, _ = await run_command(
                 command=f"{git_cmd} log -1 --format=%B",
@@
             amended_msg: str | None = None
+            needs_message_amend = False
             if not rc:
                 self.logger.warning(f"{self.log_prefix} Could not read commit message, amending author only")
             else:
                 # Remove all existing Signed-off-by trailers and add the correct one
                 msg_lines = current_msg.rstrip().splitlines()
@@
                 filtered_lines.append("")
                 filtered_lines.append(f"Signed-off-by: {author_name} <{author_email}>")
                 amended_msg = "\n".join(filtered_lines) + "\n"
+                needs_message_amend = amended_msg != current_msg
+
+            if not needs_author_amend and not needs_message_amend:
+                self.logger.debug(f"{self.log_prefix} Author and Signed-off-by already match, no amend needed")
+                return False
 
             # Amend the commit author and optionally the message
-            msg_flag = f"-m {shlex.quote(amended_msg)}" if amended_msg else "--no-edit"
+            msg_flag = f"-m {shlex.quote(amended_msg)}" if needs_message_amend and amended_msg else "--no-edit"

Based on learnings, _restore_original_author_for_cherry_pick() intentionally removes existing Signed-off-by trailers and replaces them with a single trailer synthesized from the git author identity to guarantee DCO compliance.

🧰 Tools
🪛 Ruff (0.15.10)

[warning] 827-827: Logging statement uses f-string

(G004)


[warning] 832-832: Logging statement uses f-string

(G004)


[warning] 844-844: Logging statement uses f-string

(G004)

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@webhook_server/libs/handlers/runner_handler.py` around lines 818 - 856, The
early return in _restore_original_author_for_cherry_pick (when current author
name/email match author_name and author_email) prevents the Signed-off-by
trailer normalization; remove the return False and instead allow the code to
continue to the commit-message read/amend path so the method always rewrites
trailers to a single synthesized "Signed-off-by: {author_name} <{author_email}>"
(you can still skip changing author metadata if desired but must not skip
building amended_msg); specifically, edit the branch that checks
current_author_info (the block referencing current_author_info, author_name,
author_email and the logged "Author already matches" message) to not return
early and ensure amended_msg is constructed and used later when computing
msg_flag (which references amended_msg) so trailers are normalized even when the
author already matches.

rc, _, err = await run_command(
command=f"{git_cmd} commit --amend --author={shlex.quote(author_spec)} {msg_flag}",
log_prefix=self.log_prefix,
redact_secrets=redact_list,
mask_sensitive=self.github_webhook.mask_sensitive,
)

if not rc:
redacted_err = _redact_secrets(
err,
redact_list,
mask_sensitive=self.github_webhook.mask_sensitive,
)
self.logger.warning(
f"{self.log_prefix} Failed to amend cherry-pick author for DCO compliance: {redacted_err}"
)
return False

self.logger.info(f"{self.log_prefix} Restored original author on cherry-pick for DCO compliance")
return True
except asyncio.CancelledError:
raise
except Exception:
self.logger.exception(f"{self.log_prefix} Failed to restore original author for cherry-pick")
return False

async def _resolve_cherry_pick_with_ai(
self,
worktree_path: str,
Expand Down Expand Up @@ -1025,6 +1170,13 @@ async def cherry_pick(
return
cherry_pick_had_conflicts = True

# Restore original PR author on cherry-pick for DCO compliance
await self._restore_original_author_for_cherry_pick(
pull_request=pull_request,
git_cmd=git_cmd,
github_token=github_token,
)

# Push the branch
rc, out, err = await run_command(
command=push_command,
Expand Down
Loading