diff --git a/docs/sources.md b/docs/sources.md index 71d28ad..f24a074 100644 --- a/docs/sources.md +++ b/docs/sources.md @@ -138,6 +138,8 @@ sync: condense: true # Haiku extraction for long notifications allow_repos: [] # Guardrail allowlist — empty = all repos. Example: ["ClickHouse/*"] deny_repos: [] # Guardrail denylist — always dropped (takes precedence) + allow_actors: [] # Guardrail allowlist of GitHub logins — empty = all. Example: ["alice", "bob"] + deny_actors: [] # Guardrail denylist of GitHub logins — always dropped (takes precedence) github_events: enabled: true @@ -208,6 +210,30 @@ With `allow_repos` empty (the default), all repos pass — behavior is unchanged | `github.allow_repos` | list | `[]` | Allowlist of repo globs. Empty = all repos pass | | `github.deny_repos` | list | `[]` | Denylist of repo globs. Takes precedence over `allow_repos` | +### GitHub actor guardrail + +The GitHub notification source also matches on `actors` — the list of every GitHub login +involved in a notification (issue/PR author, assignees, and comment/review authors). This +restricts **who** can put a notification in front of the worker, so a drive-by `@mention` +from an untrusted account is dropped before the agent ever sees it: + +```yaml +sync: + github: + allow_actors: ["alice", "bob"] # Only notifications involving these logins reach the inbox + deny_actors: ["noisy-bot"] # Always dropped, even if otherwise allowed +``` + +`actors` is list-valued, so a notification is kept when **any** involved login matches the +allowlist. A non-empty `allow_actors` is **fail-closed**: a notification with no +identifiable actor (e.g. enrichment failed) is dropped. With `allow_actors` empty (the +default), all actors pass — behavior is unchanged. The repo and actor rules AND together. + +| Field | Type | Default | Description | +|-------|------|---------|-------------| +| `github.allow_actors` | list | `[]` | Allowlist of GitHub login globs. Empty = all actors pass | +| `github.deny_actors` | list | `[]` | Denylist of GitHub login globs. Takes precedence over `allow_actors` | + ### Extending to other sources Adding a guardrail to another source is a config field plus one registry line. In diff --git a/nerve/config.py b/nerve/config.py index b9a9bcb..fff5cff 100644 --- a/nerve/config.py +++ b/nerve/config.py @@ -271,6 +271,14 @@ class GitHubSyncConfig: # pass); deny_repos is a denylist and takes precedence over allow_repos. allow_repos: list[str] = field(default_factory=list) deny_repos: list[str] = field(default_factory=list) + # Actor guardrails — limit which GitHub logins can land a notification in + # the inbox, matched on the "actors" metadata key (every login involved in + # the notification: issue/PR author, assignees, comment & review authors). + # Same semantics as allow_repos/deny_repos — case-insensitive globs, deny + # wins, and a non-empty allow_actors is fail-closed (a notification with no + # matching actor is dropped before it reaches the inbox). Empty = all pass. + allow_actors: list[str] = field(default_factory=list) + deny_actors: list[str] = field(default_factory=list) @classmethod def from_dict(cls, d: dict) -> GitHubSyncConfig: @@ -284,6 +292,8 @@ def from_dict(cls, d: dict) -> GitHubSyncConfig: condense=d.get("condense", False), allow_repos=d.get("allow_repos", []), deny_repos=d.get("deny_repos", []), + allow_actors=d.get("allow_actors", []), + deny_actors=d.get("deny_actors", []), ) diff --git a/nerve/sources/github.py b/nerve/sources/github.py index 4087d0c..b87bfe2 100644 --- a/nerve/sources/github.py +++ b/nerve/sources/github.py @@ -30,6 +30,43 @@ _MAX_CONCURRENT_FETCHES = 5 +def _collect_actors( + subject_user: str, + assignees: list[str], + comment: dict[str, Any] | None, + latest_review: dict[str, Any] | None, + inline_comments: list[dict[str, Any]], + recent_comments: list[dict[str, Any]], +) -> list[str]: + """Every GitHub login involved in a notification, de-duplicated. + + Order-preserving (first occurrence wins) with case-insensitive de-dup; + empty and placeholder ("?") logins are skipped. Surfaced as the ``actors`` + metadata key so the inbox guardrail can allow/deny a notification by who is + involved (see :mod:`nerve.sources.filters`). The raw ``/notifications`` + payload carries no actor, but enrichment has already fetched these logins + for the rendered content. + """ + candidates: list[str] = [subject_user, *assignees] + if comment: + candidates.append(comment.get("user", "")) + if latest_review: + candidates.append(latest_review.get("user", "")) + candidates.extend(ic.get("user", "") for ic in inline_comments) + candidates.extend(rc.get("user", "") for rc in recent_comments) + + actors: list[str] = [] + seen: set[str] = set() + for login in candidates: + if not login or login == "?": + continue + key = login.lower() + if key not in seen: + seen.add(key) + actors.append(login) + return actors + + class GitHubSource(Source): """GitHub notification source using the gh CLI.""" @@ -181,6 +218,13 @@ async def fetch(self, cursor: str | None, limit: int = 100) -> FetchResult: f"\n{rc_user} ({rc_date}):\n{rc_body}" ) + # Surface every involved login so the inbox guardrail can + # allow/deny by actor (the raw notification carries no actor). + actors = _collect_actors( + subject_user, assignees, comment, latest_review, + inline_comments, recent_comments, + ) + records.append(SourceRecord( id=notif.get("id", ""), source="github", @@ -195,6 +239,7 @@ async def fetch(self, cursor: str | None, limit: int = 100) -> FetchResult: "subject_url": html_url, "repo_name": repo_name, "repo_url": repo.get("html_url", ""), + "actors": actors, }, )) diff --git a/nerve/sources/registry.py b/nerve/sources/registry.py index 4df7048..67f3504 100644 --- a/nerve/sources/registry.py +++ b/nerve/sources/registry.py @@ -86,15 +86,19 @@ def build_source_runners( # GitHub (notifications) gh = config.sync.github if gh.enabled: - from nerve.sources.filters import InboxFilter + from nerve.sources.filters import FieldRule, InboxFilter from nerve.sources.github import GitHubSource source = GitHubSource() - # Guardrail: restrict which repos reach the inbox (matched on the - # "repo_name" metadata key set by GitHubSource). - gh_filter = InboxFilter.from_field( - "repo_name", allow=gh.allow_repos, deny=gh.deny_repos, - ) + # Guardrails: restrict which repos (matched on the "repo_name" metadata + # key) and which GitHub actors (matched on the "actors" metadata key — + # every login involved in a notification) reach the inbox. The two rules + # AND together; within each, deny wins and a non-empty allow is + # fail-closed. + gh_filter = InboxFilter(rules=[ + FieldRule(field="repo_name", allow=gh.allow_repos, deny=gh.deny_repos), + FieldRule(field="actors", allow=gh.allow_actors, deny=gh.deny_actors), + ]) runners.append(SourceRunner( source=source, db=db, @@ -107,8 +111,11 @@ def build_source_runners( )) if gh_filter.active: logger.info( - "Registered source: github (batch=%d, guardrail: allow=%s deny=%s)", - gh.batch_size, gh.allow_repos or "*", gh.deny_repos or [], + "Registered source: github (batch=%d, guardrail: " + "repos allow=%s deny=%s; actors allow=%s deny=%s)", + gh.batch_size, + gh.allow_repos or "*", gh.deny_repos or [], + gh.allow_actors or "*", gh.deny_actors or [], ) else: logger.info("Registered source: github (batch=%d)", gh.batch_size) diff --git a/tests/test_github_source_actors.py b/tests/test_github_source_actors.py new file mode 100644 index 0000000..bb640ee --- /dev/null +++ b/tests/test_github_source_actors.py @@ -0,0 +1,243 @@ +"""Actor guardrail for the GitHub notifications source. + +Covers the ``actors`` metadata key that ``GitHubSource`` surfaces (every login +involved in a notification), the ``allow_actors`` / ``deny_actors`` config, and +the registry wiring that turns them into an inbox guardrail — so an untrusted +GitHub user cannot drive the worker merely by @-mentioning it. +""" + +from __future__ import annotations + +import json + +import pytest + +from nerve.config import NerveConfig +from nerve.sources.github import GitHubSource, _collect_actors +from nerve.sources.models import SourceRecord +from nerve.sources.registry import build_source_runners + + +# --------------------------------------------------------------------------- +# _collect_actors — pure de-dup / ordering / placeholder handling +# --------------------------------------------------------------------------- + +def test_collect_actors_orders_and_dedups_case_insensitively(): + actors = _collect_actors( + subject_user="Alice", + assignees=["bob", "alice"], # "alice" is a case-dup of "Alice" + comment={"user": "carol"}, + latest_review=None, + inline_comments=[], + recent_comments=[], + ) + assert actors == ["Alice", "bob", "carol"] + + +def test_collect_actors_skips_empty_and_placeholder_logins(): + actors = _collect_actors( + subject_user="", + assignees=[], + comment={"user": "?"}, # "?" is the enrichment placeholder + latest_review={"user": ""}, + inline_comments=[{"user": "dave"}], + recent_comments=[{"user": "?"}, {"user": "erin"}], + ) + assert actors == ["dave", "erin"] + + +def test_collect_actors_spans_all_enrichment_sources(): + actors = _collect_actors( + subject_user="author", + assignees=["assignee"], + comment={"user": "commenter"}, + latest_review={"user": "reviewer"}, + inline_comments=[{"user": "inline1"}, {"user": "inline2"}], + recent_comments=[{"user": "recent"}], + ) + assert actors == [ + "author", "assignee", "commenter", "reviewer", + "inline1", "inline2", "recent", + ] + + +# --------------------------------------------------------------------------- +# Config — allow_actors / deny_actors parsing +# --------------------------------------------------------------------------- + +def test_github_sync_config_reads_actor_lists(): + cfg = NerveConfig.from_dict({ + "sync": {"github": { + "allow_actors": ["alice", "bob"], + "deny_actors": ["spammer"], + }}, + }) + gh = cfg.sync.github + assert gh.allow_actors == ["alice", "bob"] + assert gh.deny_actors == ["spammer"] + + +def test_github_sync_config_actor_lists_default_empty(): + gh = NerveConfig.from_dict({}).sync.github + assert gh.allow_actors == [] + assert gh.deny_actors == [] + + +# --------------------------------------------------------------------------- +# Source — fetch() surfaces the "actors" key in record metadata +# --------------------------------------------------------------------------- + +class _FakeProc: + """Minimal stand-in for an asyncio subprocess returning canned stdout.""" + + def __init__(self, stdout: bytes): + self._stdout = stdout + self.returncode = 0 + + async def communicate(self): + return self._stdout, b"" + + +@pytest.mark.asyncio +async def test_fetch_populates_actors_metadata(monkeypatch): + notifications = [{ + "id": "n1", + "reason": "mention", + "unread": True, + "updated_at": "2026-01-02T10:00:00Z", + "subject": { + "title": "Bug", + "type": "Issue", + "url": "https://api.github.com/repos/owner/repo/issues/1", + }, + "repository": { + "full_name": "owner/repo", + "html_url": "https://github.com/owner/repo", + }, + }] + + async def fake_exec(*args, **kwargs): + return _FakeProc(json.dumps(notifications).encode()) + + monkeypatch.setattr( + "nerve.sources.github.asyncio.create_subprocess_exec", fake_exec, + ) + + src = GitHubSource() + + async def fake_enrich(notif, sem): + return { + "html_url": "https://github.com/owner/repo/issues/1", + "body": "desc", + "state": "open", + "user": "alice", + "assignees": ["bob"], + "labels": ["bug"], + "latest_comment": {"user": "carol", "body": "ping", "created_at": "x"}, + } + + monkeypatch.setattr(src, "_enrich_notification", fake_enrich) + + result = await src.fetch(cursor="2026-01-02T09:00:00Z") + + assert len(result.records) == 1 + assert result.records[0].metadata["actors"] == ["alice", "bob", "carol"] + + +# --------------------------------------------------------------------------- +# Registry — config actor lists become an active inbox guardrail +# --------------------------------------------------------------------------- + +def _gh_rec(rid: str, actors: list[str], repo: str = "ClickHouse/nerve") -> SourceRecord: + return SourceRecord( + id=rid, source="github", record_type="github_notification", + summary=f"[{repo}] x", content="c", timestamp="2026-01-01T00:00:00Z", + metadata={"repo_name": repo, "actors": actors}, + ) + + +@pytest.mark.asyncio +async def test_build_source_runners_wires_actor_guardrail(db): + cfg = NerveConfig.from_dict({ + "sync": {"github": { + "enabled": True, + "allow_actors": ["alice", "bob"], + }}, + }) + runners = build_source_runners(cfg, db) + gh = next(r for r in runners if r.source.source_name == "github") + + assert gh.inbox_filter is not None + assert gh.inbox_filter.active is True + # A trusted actor being involved keeps the record; only strangers (or no + # identifiable actor) → dropped, fail-closed. + assert gh.inbox_filter.passes(_gh_rec("a", ["bob", "x"])) is True + assert gh.inbox_filter.passes(_gh_rec("b", ["stranger"])) is False + assert gh.inbox_filter.passes(_gh_rec("c", [])) is False + + +@pytest.mark.asyncio +async def test_build_source_runners_no_actor_config_is_passthrough(db): + # Without allow/deny actors (and no repo guardrail) the github filter must + # stay inactive so normal notifications still flow. + cfg = NerveConfig.from_dict({"sync": {"github": {"enabled": True}}}) + runners = build_source_runners(cfg, db) + gh = next(r for r in runners if r.source.source_name == "github") + assert gh.inbox_filter is None or gh.inbox_filter.active is False + + +@pytest.mark.asyncio +async def test_build_source_runners_actor_deny_wins(db): + # deny_actors takes precedence even when an allowed actor co-occurs. + cfg = NerveConfig.from_dict({ + "sync": {"github": { + "enabled": True, + "allow_actors": ["alice", "bob"], + "deny_actors": ["spammer"], + }}, + }) + runners = build_source_runners(cfg, db) + gh = next(r for r in runners if r.source.source_name == "github") + assert gh.inbox_filter.passes(_gh_rec("ok", ["alice"])) is True + assert gh.inbox_filter.passes(_gh_rec("no", ["alice", "spammer"])) is False + + +@pytest.mark.asyncio +async def test_fetch_actors_empty_when_enrichment_fails(monkeypatch): + # When enrichment raises, the loop falls back to extra={} — the record is + # still produced (with no identifiable actor), proving every login variable + # is defined on the failure path and `actors` degrades to []. + notifications = [{ + "id": "n1", + "reason": "mention", + "unread": True, + "updated_at": "2026-01-02T10:00:00Z", + "subject": { + "title": "Bug", + "type": "Issue", + "url": "https://api.github.com/repos/owner/repo/issues/1", + }, + "repository": { + "full_name": "owner/repo", + "html_url": "https://github.com/owner/repo", + }, + }] + + async def fake_exec(*args, **kwargs): + return _FakeProc(json.dumps(notifications).encode()) + + monkeypatch.setattr( + "nerve.sources.github.asyncio.create_subprocess_exec", fake_exec, + ) + + src = GitHubSource() + + async def boom(notif, sem): + raise RuntimeError("enrichment failed") + + monkeypatch.setattr(src, "_enrich_notification", boom) + + result = await src.fetch(cursor="2026-01-02T09:00:00Z") + + assert len(result.records) == 1 + assert result.records[0].metadata["actors"] == [] diff --git a/tests/test_source_filters.py b/tests/test_source_filters.py index 0a379cc..af4d006 100644 --- a/tests/test_source_filters.py +++ b/tests/test_source_filters.py @@ -194,3 +194,54 @@ async def test_runner_all_dropped_still_advances_cursor(db): rows, _ = await db.list_source_messages(source="github", limit=100) assert rows == [] assert await db.get_sync_cursor("github") == "c2" + + +# --------------------------------------------------------------------------- +# Actor guardrail — the "actors" metadata key (list of involved GitHub logins) +# --------------------------------------------------------------------------- + +def test_actor_allowlist_keeps_record_if_any_involved_login_matches(): + rule = FieldRule(field="actors", allow=["alice", "bob"]) + # list-valued: kept if ANY involved login is on the allowlist + assert rule.passes(_rec(actors=["bob", "stranger"])) is True + assert rule.passes(_rec(actors=["alice"])) is True + assert rule.passes(_rec(actors=["stranger", "drive-by"])) is False + + +def test_actor_denylist_drops_if_any_login_matches(): + rule = FieldRule(field="actors", deny=["spammer"]) + assert rule.passes(_rec(actors=["trusted", "spammer"])) is False + assert rule.passes(_rec(actors=["trusted"])) is True + + +def test_actor_allowlist_absent_or_empty_actors_fails_closed(): + rule = FieldRule(field="actors", allow=["alice"]) + assert rule.passes(_rec()) is False # no "actors" key at all + assert rule.passes(_rec(actors=[])) is False # present but empty + + +def test_repo_and_actor_guardrails_and_together(): + flt = InboxFilter(rules=[ + FieldRule(field="repo_name", allow=["ClickHouse/*"]), + FieldRule(field="actors", allow=["alice", "bob"]), + ]) + assert flt.passes(_rec("ClickHouse/nerve", actors=["bob"])) is True + # right repo, untrusted actor → dropped + assert flt.passes(_rec("ClickHouse/nerve", actors=["stranger"])) is False + # trusted actor, wrong repo → dropped + assert flt.passes(_rec("other/repo", actors=["bob"])) is False + + +def test_actor_deny_wins_even_when_an_allowed_actor_is_present(): + # Security-critical: a denied login co-occurring with an allowed one is + # still dropped (deny takes precedence over allow). + rule = FieldRule(field="actors", allow=["alice", "bob"], deny=["spammer"]) + assert rule.passes(_rec(actors=["alice"])) is True + assert rule.passes(_rec(actors=["alice", "spammer"])) is False + + +def test_actor_allowlist_is_case_insensitive(): + rule = FieldRule(field="actors", allow=["Alice"]) + assert rule.passes(_rec(actors=["alice"])) is True + assert rule.passes(_rec(actors=["ALICE"])) is True + assert rule.passes(_rec(actors=["bob"])) is False