diff --git a/backend/seeds/github/github_default.json b/backend/seeds/github/github_default.json new file mode 100644 index 00000000..0e64a1bc --- /dev/null +++ b/backend/seeds/github/github_default.json @@ -0,0 +1,202 @@ +{ + "github_users": [ + { + "id": "10000001", + "login": "agent-bot", + "name": "Agent Bot", + "email": "agent@example.com", + "type": "Bot", + "site_admin": false, + "created_at": "2024-01-10T09:00:00" + }, + { + "id": "10000002", + "login": "alice", + "name": "Alice Maintainer", + "email": "alice@example.com", + "type": "User", + "site_admin": true, + "created_at": "2023-06-01T12:00:00" + }, + { + "id": "10000003", + "login": "bob", + "name": "Bob Contributor", + "email": "bob@example.com", + "type": "User", + "site_admin": false, + "created_at": "2023-07-15T12:00:00" + }, + { + "id": "10000004", + "login": "carol", + "name": "Carol Reviewer", + "email": "carol@example.com", + "type": "User", + "site_admin": false, + "created_at": "2023-08-22T12:00:00" + } + ], + "github_repositories": [ + { + "id": "20000001", + "owner_id": "10000002", + "name": "widgets", + "full_name": "acme/widgets", + "description": "Reference widgets library", + "private": false, + "default_branch": "main", + "created_at": "2024-01-15T10:00:00" + } + ], + "github_labels": [ + { + "id": "30000001", + "repository_id": "20000001", + "name": "bug", + "color": "d73a4a", + "description": "Something isn't working", + "is_default": true + }, + { + "id": "30000002", + "repository_id": "20000001", + "name": "enhancement", + "color": "a2eeef", + "description": "New feature or request", + "is_default": true + }, + { + "id": "30000003", + "repository_id": "20000001", + "name": "documentation", + "color": "0075ca", + "description": "Improvements or additions to documentation", + "is_default": true + }, + { + "id": "30000004", + "repository_id": "20000001", + "name": "good first issue", + "color": "7057ff", + "description": "Good for newcomers", + "is_default": true + }, + { + "id": "30000005", + "repository_id": "20000001", + "name": "needs-triage", + "color": "ededed", + "description": "Awaiting maintainer triage", + "is_default": false + } + ], + "github_issues": [ + { + "id": "40000001", + "repository_id": "20000001", + "number": 1, + "title": "Widget factory panics on empty input", + "body": "Calling `build()` with no components raises an unhandled exception.", + "state": "open", + "user_id": "10000003", + "locked": false, + "comments_count": 1, + "is_pull_request": false, + "created_at": "2024-02-01T09:00:00", + "updated_at": "2024-02-01T09:00:00" + }, + { + "id": "40000002", + "repository_id": "20000001", + "number": 2, + "title": "Add async support to renderer", + "body": "We need `render_async()` to integrate with asyncio servers.", + "state": "open", + "user_id": "10000004", + "locked": false, + "comments_count": 0, + "is_pull_request": false, + "created_at": "2024-02-05T11:30:00", + "updated_at": "2024-02-05T11:30:00" + }, + { + "id": "40000003", + "repository_id": "20000001", + "number": 3, + "title": "Docs: document widget lifecycle", + "body": "The README skips over lifecycle hooks.", + "state": "closed", + "state_reason": "completed", + "user_id": "10000002", + "locked": false, + "comments_count": 0, + "is_pull_request": false, + "created_at": "2024-01-20T14:00:00", + "updated_at": "2024-01-25T09:00:00", + "closed_at": "2024-01-25T09:00:00" + }, + { + "id": "40000004", + "repository_id": "20000001", + "number": 4, + "title": "Fix widget factory panic", + "body": "Closes #1. Adds guard for empty components list.", + "state": "open", + "user_id": "10000003", + "locked": false, + "comments_count": 0, + "is_pull_request": true, + "draft": false, + "merged": false, + "head_ref": "bob/fix-empty-input", + "base_ref": "main", + "head_sha": "aaaaaaaabbbbbbbbccccccccdddddddd11111111", + "base_sha": "ffffffffeeeeeeeeddddddddcccccccc22222222", + "created_at": "2024-02-10T15:00:00", + "updated_at": "2024-02-10T15:00:00" + }, + { + "id": "40000005", + "repository_id": "20000001", + "number": 5, + "title": "Async renderer prototype", + "body": "Initial pass at `render_async`. Draft — please review API shape.", + "state": "open", + "user_id": "10000004", + "locked": false, + "comments_count": 0, + "is_pull_request": true, + "draft": true, + "merged": false, + "head_ref": "carol/async-renderer", + "base_ref": "main", + "head_sha": "1111111122222222333333334444444455555555", + "base_sha": "ffffffffeeeeeeeeddddddddcccccccc22222222", + "created_at": "2024-02-12T10:00:00", + "updated_at": "2024-02-12T10:00:00" + } + ], + "github_issue_labels": [ + {"issue_id": "40000001", "label_id": "30000001"}, + {"issue_id": "40000001", "label_id": "30000005"}, + {"issue_id": "40000002", "label_id": "30000002"}, + {"issue_id": "40000003", "label_id": "30000003"} + ], + "github_issue_assignees": [ + {"issue_id": "40000002", "user_id": "10000004"} + ], + "github_pull_request_reviewers": [ + {"issue_id": "40000004", "user_id": "10000002"} + ], + "github_issue_comments": [ + { + "id": "50000001", + "issue_id": "40000001", + "user_id": "10000002", + "body": "Reproduced on 0.4.1 — triaging.", + "created_at": "2024-02-01T10:00:00", + "updated_at": "2024-02-01T10:00:00" + } + ] +} diff --git a/backend/seeds/testsuites/github_bench.json b/backend/seeds/testsuites/github_bench.json new file mode 100644 index 00000000..842ffd63 --- /dev/null +++ b/backend/seeds/testsuites/github_bench.json @@ -0,0 +1,214 @@ +{ + "id": "github-bench", + "name": "GitHub Bench", + "description": "Benchmark tests for GitHub issue, pull-request, comment, and label operations.", + "service": "github", + "owner": "dev-user", + "ignore_fields": { + "global": ["created_at", "updated_at", "closed_at", "merged_at"] + }, + "tests": [ + { + "id": "issue_create_bug", + "name": "Open a new bug report", + "prompt": "Open a new issue in acme/widgets titled 'Widget factory leaks memory' describing a leak when processing large inputs. Apply the 'bug' label.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000001", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issues", + "where": { + "title": {"eq": "Widget factory leaks memory"}, + "is_pull_request": {"eq": false}, + "state": {"eq": "open"} + }, + "expected_count": 1 + }, + { + "diff_type": "added", + "entity": "github_issue_labels", + "where": {"label_id": {"eq": "30000001"}}, + "expected_count": 1 + } + ] + }, + { + "id": "issue_close_resolved", + "name": "Close issue as completed", + "prompt": "Close issue #2 in acme/widgets with state_reason 'completed' now that async renderer support has shipped.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "updated", + "entity": "github_issues", + "where": {"id": {"eq": "40000002"}}, + "changes": { + "state": {"from": "open", "to": "closed"}, + "state_reason": {"to": "completed"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "issue_triage_label", + "name": "Add needs-triage label to issue", + "prompt": "Add the 'needs-triage' label to issue #2 in acme/widgets.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issue_labels", + "where": { + "issue_id": {"eq": "40000002"}, + "label_id": {"eq": "30000005"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "issue_assign_user", + "name": "Assign issue to user", + "prompt": "Assign bob to issue #1 in acme/widgets so he can investigate the panic.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issue_assignees", + "where": { + "issue_id": {"eq": "40000001"}, + "user_id": {"eq": "10000003"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "issue_comment_reply", + "name": "Comment on an issue", + "prompt": "Comment on issue #1 in acme/widgets saying 'I can reproduce on 0.5.0 as well — will pick this up.'", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000001", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issue_comments", + "where": { + "issue_id": {"eq": "40000001"}, + "body": {"contains": "reproduce"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "comment_edit_typo", + "name": "Edit an existing comment", + "prompt": "Alice noticed a typo in comment 50000001 on issue #1 of acme/widgets. Update the comment body to 'Reproduced on 0.4.1 and 0.5.0 — triaging.'", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "updated", + "entity": "github_issue_comments", + "where": {"id": {"eq": "50000001"}}, + "changes": { + "body": {"to": "Reproduced on 0.4.1 and 0.5.0 — triaging."} + }, + "expected_count": 1 + } + ] + }, + { + "id": "label_create_security", + "name": "Create a security label", + "prompt": "Create a new label called 'security' in acme/widgets with color 'b60205' and description 'Security-sensitive issue'.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "added", + "entity": "github_labels", + "where": { + "repository_id": {"eq": "20000001"}, + "name": {"eq": "security"}, + "color": {"eq": "b60205"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "pr_open_docs_fix", + "name": "Open a pull request", + "prompt": "Open a pull request in acme/widgets titled 'Docs: clarify lifecycle hooks' from branch 'agent-bot/lifecycle-docs' into 'main', body 'Expands README §3 based on #3.'", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000001", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issues", + "where": { + "is_pull_request": {"eq": true}, + "title": {"eq": "Docs: clarify lifecycle hooks"}, + "head_ref": {"eq": "agent-bot/lifecycle-docs"}, + "base_ref": {"eq": "main"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "pr_request_review", + "name": "Request reviewers on a PR", + "prompt": "Request alice and carol as reviewers on PR #4 in acme/widgets.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000003", + "assertions": [ + { + "diff_type": "added", + "entity": "github_pull_request_reviewers", + "where": { + "issue_id": {"eq": "40000004"}, + "user_id": {"eq": "10000004"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "pr_merge", + "name": "Merge a pull request", + "prompt": "Merge PR #4 in acme/widgets now that review is complete.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "updated", + "entity": "github_issues", + "where": {"id": {"eq": "40000004"}}, + "changes": { + "state": {"from": "open", "to": "closed"}, + "merged": {"from": false, "to": true} + }, + "expected_count": 1 + } + ] + } + ] +} diff --git a/backend/src/platform/api/main.py b/backend/src/platform/api/main.py index 0f0458ec..f5d4df5e 100644 --- a/backend/src/platform/api/main.py +++ b/backend/src/platform/api/main.py @@ -23,6 +23,7 @@ from src.services.slack.api.methods import routes as slack_routes from src.services.calendar.api import routes as calendar_routes from src.services.box.api.routes import routes as box_routes +from src.services.github.api.routes import routes as github_routes from src.platform.logging_config import setup_logging from src.platform.isolationEngine.pool import PoolManager from src.platform.db.schema import TemplateEnvironment @@ -126,6 +127,9 @@ def create_app(): box_router = Router(box_routes) app.mount("/api/env/{env_id}/services/box/2.0", box_router) + github_router = Router(github_routes) + app.mount("/api/env/{env_id}/services/github", github_router) + linear_schema_path = "src/services/linear/api/schema/Linear-API.graphql" linear_type_defs = load_schema_from_path(linear_schema_path) linear_schema = make_executable_schema(linear_type_defs, *bindables) diff --git a/backend/src/services/github/__init__.py b/backend/src/services/github/__init__.py new file mode 100644 index 00000000..db43f55a --- /dev/null +++ b/backend/src/services/github/__init__.py @@ -0,0 +1,5 @@ +"""GitHub API replica for agent-diff benchmarks. + +Covers issues, pull requests, comments, and labels — enough to evaluate +agents that triage issues, land PRs, and manage repo labels. +""" diff --git a/backend/src/services/github/api/__init__.py b/backend/src/services/github/api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/src/services/github/api/routes.py b/backend/src/services/github/api/routes.py new file mode 100644 index 00000000..8664bdde --- /dev/null +++ b/backend/src/services/github/api/routes.py @@ -0,0 +1,817 @@ +"""GitHub REST routes. + +Mounted at ``/api/env/{env_id}/services/github``. Paths mirror +``https://api.github.com`` so existing GitHub clients work against the +replica by swapping the base URL. +""" + +from __future__ import annotations + +import json +import logging +from typing import Any, List, Optional + +from sqlalchemy.orm import Session +from starlette.requests import Request +from starlette.responses import JSONResponse, Response +from starlette.routing import Route + +from src.services.github.database import operations as ops +from src.services.github.database.schema import Issue, Repository, User + +logger = logging.getLogger(__name__) + + +def _session(request: Request) -> Session: + session = getattr(request.state, "db_session", None) + if session is None: + raise RuntimeError("IsolationMiddleware did not attach a db_session") + return session + + +def _error(status_code: int, message: str, *, doc_url: Optional[str] = None) -> Response: + body: dict[str, Any] = { + "message": message, + "documentation_url": doc_url or "https://docs.github.com/rest", + "status": str(status_code), + } + return JSONResponse(body, status_code=status_code) + + +def _principal(request: Request) -> Optional[User]: + session = _session(request) + user_id = getattr(request.state, "impersonate_user_id", None) + if user_id: + u = ops.get_user_by_id(session, str(user_id)) + if u: + return u + email = getattr(request.state, "impersonate_email", None) + if email: + u = ops.get_user_by_email(session, email) + if u: + return u + u = ops.get_user_by_login(session, email) + if u: + return u + return None + + +def _require_principal(request: Request) -> User | Response: + user = _principal(request) + if user is None: + return _error(401, "Requires authentication") + return user + + +def _require_repo(session: Session, owner: str, repo: str) -> Repository | Response: + r = ops.get_repo(session, owner, repo) + if r is None: + return _error(404, "Not Found") + return r + + +def _require_issue( + session: Session, repo: Repository, number: int, *, expect_pr: Optional[bool] = None +) -> Issue | Response: + issue = ops.get_issue(session, repo, number) + if issue is None: + return _error(404, "Not Found") + if expect_pr is True and not issue.is_pull_request: + return _error(404, "Not Found") + if expect_pr is False and issue.is_pull_request: + return _error( + 404, + "Not Found — use the pull requests endpoint for this resource", + ) + return issue + + +async def _json_body(request: Request) -> dict | Response: + try: + raw = await request.body() + if not raw: + return {} + return json.loads(raw) + except json.JSONDecodeError: + return _error(400, "Problems parsing JSON") + + +def _resolve_users(session: Session, logins: List[str]) -> List[User]: + users: List[User] = [] + for login in logins: + u = ops.get_user_by_login(session, login) + if u: + users.append(u) + return users + + +def _resolve_labels(session: Session, repo: Repository, names: List[str]): + labels = [] + for name in names: + lbl = ops.get_label(session, repo, name) + if lbl: + labels.append(lbl) + return labels + + +# Repository metadata + +async def get_repo_route(request: Request) -> Response: + session = _session(request) + result = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(result, Response): + return result + return JSONResponse(result.to_dict()) + + +# Labels + +async def list_labels_route(request: Request) -> Response: + session = _session(request) + result = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(result, Response): + return result + labels = ops.list_labels(session, result) + return JSONResponse([lbl.to_dict() for lbl in labels]) + + +async def create_label_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + body = await _json_body(request) + if isinstance(body, Response): + return body + name = body.get("name") + if not name: + return _error(422, "Validation Failed: name is required") + if ops.get_label(session, repo, name): + return _error(422, "Validation Failed: label already exists") + label = ops.create_label( + session, + repo, + name=name, + color=body.get("color", "ededed"), + description=body.get("description"), + ) + return JSONResponse(label.to_dict(), status_code=201) + + +async def get_label_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + label = ops.get_label(session, repo, request.path_params["name"]) + if label is None: + return _error(404, "Not Found") + return JSONResponse(label.to_dict()) + + +async def update_label_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + label = ops.get_label(session, repo, request.path_params["name"]) + if label is None: + return _error(404, "Not Found") + body = await _json_body(request) + if isinstance(body, Response): + return body + ops.update_label( + session, + label, + new_name=body.get("new_name"), + color=body.get("color"), + description=body.get("description"), + ) + return JSONResponse(label.to_dict()) + + +async def delete_label_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + label = ops.get_label(session, repo, request.path_params["name"]) + if label is None: + return _error(404, "Not Found") + ops.delete_label(session, label) + return Response(status_code=204) + + +# Issues + +async def list_issues_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + state = request.query_params.get("state", "open") + issues = ops.list_issues(session, repo, state=state, include_pulls=True) + return JSONResponse([i.to_issue_dict() for i in issues]) + + +async def create_issue_route(request: Request) -> Response: + session = _session(request) + principal = _require_principal(request) + if isinstance(principal, Response): + return principal + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + body = await _json_body(request) + if isinstance(body, Response): + return body + title = body.get("title") + if not title: + return _error(422, "Validation Failed: title is required") + assignees = _resolve_users(session, list(body.get("assignees", []) or [])) + if body.get("assignee") and not assignees: + assignees = _resolve_users(session, [body["assignee"]]) + labels = _resolve_labels(session, repo, list(body.get("labels", []) or [])) + issue = ops.create_issue( + session, + repo, + title=title, + body=body.get("body"), + user=principal, + assignees=assignees, + labels=labels, + ) + return JSONResponse(issue.to_issue_dict(), status_code=201) + + +async def get_issue_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + return JSONResponse(issue.to_issue_dict()) + + +async def update_issue_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + body = await _json_body(request) + if isinstance(body, Response): + return body + state = body.get("state") + if state is not None and state not in ("open", "closed"): + return _error(422, "Validation Failed: state must be open or closed") + ops.update_issue( + session, + issue, + title=body.get("title"), + body=body.get("body"), + state=state, + state_reason=body.get("state_reason"), + locked=body.get("locked"), + ) + if "assignees" in body: + assignees = _resolve_users(session, list(body.get("assignees") or [])) + for ia in list(issue.assignees): + session.delete(ia) + session.flush() + ops.set_issue_assignees(session, issue, assignees) + if "labels" in body: + labels = _resolve_labels(session, repo, list(body.get("labels") or [])) + ops.set_issue_labels(session, issue, labels) + return JSONResponse(issue.to_issue_dict()) + + +# Issue assignees + +async def add_assignees_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + body = await _json_body(request) + if isinstance(body, Response): + return body + users = _resolve_users(session, list(body.get("assignees", []) or [])) + ops.set_issue_assignees(session, issue, users) + return JSONResponse(issue.to_issue_dict(), status_code=201) + + +async def remove_assignees_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + body = await _json_body(request) + if isinstance(body, Response): + return body + users = _resolve_users(session, list(body.get("assignees", []) or [])) + ops.remove_issue_assignees(session, issue, users) + return JSONResponse(issue.to_issue_dict()) + + +# Issue labels + +async def list_issue_labels_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + return JSONResponse([il.label.to_dict() for il in issue.labels if il.label]) + + +async def add_issue_labels_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + body = await _json_body(request) + if isinstance(body, Response): + return body + names = body.get("labels") if isinstance(body, dict) else body + if not isinstance(names, list): + return _error(422, "Validation Failed: labels must be an array") + labels = _resolve_labels(session, repo, [str(n) for n in names]) + attached = ops.add_issue_labels(session, issue, labels) + return JSONResponse([lbl.to_dict() for lbl in attached]) + + +async def set_issue_labels_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + body = await _json_body(request) + if isinstance(body, Response): + return body + names = body.get("labels") if isinstance(body, dict) else body + if not isinstance(names, list): + return _error(422, "Validation Failed: labels must be an array") + labels = _resolve_labels(session, repo, [str(n) for n in names]) + set_labels = ops.set_issue_labels(session, issue, labels) + return JSONResponse([lbl.to_dict() for lbl in set_labels]) + + +async def remove_single_label_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + label = ops.get_label(session, repo, request.path_params["name"]) + if label is None: + return _error(404, "Not Found") + ops.remove_issue_label(session, issue, label) + return JSONResponse([il.label.to_dict() for il in issue.labels if il.label]) + + +async def clear_issue_labels_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + ops.clear_issue_labels(session, issue) + return Response(status_code=204) + + +# Issue comments + +async def list_issue_comments_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + return JSONResponse([c.to_dict() for c in ops.list_issue_comments(session, issue)]) + + +async def create_issue_comment_route(request: Request) -> Response: + session = _session(request) + principal = _require_principal(request) + if isinstance(principal, Response): + return principal + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["issue_number"]) + issue = _require_issue(session, repo, number) + if isinstance(issue, Response): + return issue + body = await _json_body(request) + if isinstance(body, Response): + return body + text = body.get("body") + if not text: + return _error(422, "Validation Failed: body is required") + comment = ops.create_comment(session, issue, user=principal, body=text) + return JSONResponse(comment.to_dict(), status_code=201) + + +async def get_comment_route(request: Request) -> Response: + session = _session(request) + comment = ops.get_comment(session, str(request.path_params["comment_id"])) + if comment is None: + return _error(404, "Not Found") + return JSONResponse(comment.to_dict()) + + +async def update_comment_route(request: Request) -> Response: + session = _session(request) + comment = ops.get_comment(session, str(request.path_params["comment_id"])) + if comment is None: + return _error(404, "Not Found") + body = await _json_body(request) + if isinstance(body, Response): + return body + text = body.get("body") + if text is None: + return _error(422, "Validation Failed: body is required") + ops.update_comment(session, comment, body=text) + return JSONResponse(comment.to_dict()) + + +async def delete_comment_route(request: Request) -> Response: + session = _session(request) + comment = ops.get_comment(session, str(request.path_params["comment_id"])) + if comment is None: + return _error(404, "Not Found") + ops.delete_comment(session, comment) + return Response(status_code=204) + + +# Pull requests + +async def list_pulls_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + state = request.query_params.get("state", "open") + q = session.query(Issue).filter( + Issue.repository_id == repo.id, Issue.is_pull_request.is_(True) + ) + if state != "all": + q = q.filter(Issue.state == state) + pulls = q.order_by(Issue.number.desc()).all() + return JSONResponse([p.to_pull_dict() for p in pulls]) + + +async def create_pull_route(request: Request) -> Response: + session = _session(request) + principal = _require_principal(request) + if isinstance(principal, Response): + return principal + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + body = await _json_body(request) + if isinstance(body, Response): + return body + title = body.get("title") + head = body.get("head") + base = body.get("base") + if not title or not head or not base: + return _error(422, "Validation Failed: title, head, base are required") + pr = ops.create_issue( + session, + repo, + title=title, + body=body.get("body"), + user=principal, + is_pull_request=True, + head_ref=head, + base_ref=base, + draft=bool(body.get("draft", False)), + ) + return JSONResponse(pr.to_pull_dict(), status_code=201) + + +async def get_pull_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["pull_number"]) + pr = _require_issue(session, repo, number, expect_pr=True) + if isinstance(pr, Response): + return pr + return JSONResponse(pr.to_pull_dict()) + + +async def update_pull_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["pull_number"]) + pr = _require_issue(session, repo, number, expect_pr=True) + if isinstance(pr, Response): + return pr + body = await _json_body(request) + if isinstance(body, Response): + return body + ops.update_issue( + session, + pr, + title=body.get("title"), + body=body.get("body"), + state=body.get("state"), + ) + if "base" in body: + pr.base_ref = body["base"] + session.flush() + return JSONResponse(pr.to_pull_dict()) + + +async def merge_pull_route(request: Request) -> Response: + session = _session(request) + principal = _require_principal(request) + if isinstance(principal, Response): + return principal + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["pull_number"]) + pr = _require_issue(session, repo, number, expect_pr=True) + if isinstance(pr, Response): + return pr + if pr.merged: + return _error(405, "Pull Request is already merged") + if pr.state != "open": + return _error(405, "Pull Request is not mergeable") + body = await _json_body(request) + if isinstance(body, Response): + return body + sha = body.get("sha") if isinstance(body, dict) else None + ops.merge_pull(session, pr, merger=principal, commit_sha=sha) + return JSONResponse( + { + "sha": pr.merge_commit_sha, + "merged": True, + "message": "Pull Request successfully merged", + } + ) + + +async def requested_reviewers_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["pull_number"]) + pr = _require_issue(session, repo, number, expect_pr=True) + if isinstance(pr, Response): + return pr + reviewers = [prr.user.to_mini_dict() for prr in pr.requested_reviewers if prr.user] + return JSONResponse({"users": reviewers, "teams": []}) + + +async def request_reviewers_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["pull_number"]) + pr = _require_issue(session, repo, number, expect_pr=True) + if isinstance(pr, Response): + return pr + body = await _json_body(request) + if isinstance(body, Response): + return body + users = _resolve_users(session, list(body.get("reviewers", []) or [])) + ops.set_requested_reviewers(session, pr, users) + return JSONResponse(pr.to_pull_dict(), status_code=201) + + +async def remove_reviewers_route(request: Request) -> Response: + session = _session(request) + repo = _require_repo( + session, request.path_params["owner"], request.path_params["repo"] + ) + if isinstance(repo, Response): + return repo + number = int(request.path_params["pull_number"]) + pr = _require_issue(session, repo, number, expect_pr=True) + if isinstance(pr, Response): + return pr + body = await _json_body(request) + if isinstance(body, Response): + return body + users = _resolve_users(session, list(body.get("reviewers", []) or [])) + ops.remove_requested_reviewers(session, pr, users) + return JSONResponse(pr.to_pull_dict()) + + +routes = [ + # Repository metadata + Route("/repos/{owner}/{repo}", get_repo_route, methods=["GET"]), + # Labels + Route("/repos/{owner}/{repo}/labels", list_labels_route, methods=["GET"]), + Route("/repos/{owner}/{repo}/labels", create_label_route, methods=["POST"]), + Route("/repos/{owner}/{repo}/labels/{name}", get_label_route, methods=["GET"]), + Route("/repos/{owner}/{repo}/labels/{name}", update_label_route, methods=["PATCH"]), + Route( + "/repos/{owner}/{repo}/labels/{name}", delete_label_route, methods=["DELETE"] + ), + # Issues + Route("/repos/{owner}/{repo}/issues", list_issues_route, methods=["GET"]), + Route("/repos/{owner}/{repo}/issues", create_issue_route, methods=["POST"]), + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}", + get_issue_route, + methods=["GET"], + ), + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}", + update_issue_route, + methods=["PATCH"], + ), + # Issue assignees + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}/assignees", + add_assignees_route, + methods=["POST"], + ), + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}/assignees", + remove_assignees_route, + methods=["DELETE"], + ), + # Issue labels + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}/labels", + list_issue_labels_route, + methods=["GET"], + ), + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}/labels", + add_issue_labels_route, + methods=["POST"], + ), + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}/labels", + set_issue_labels_route, + methods=["PUT"], + ), + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}/labels", + clear_issue_labels_route, + methods=["DELETE"], + ), + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}/labels/{name}", + remove_single_label_route, + methods=["DELETE"], + ), + # Issue comments (also used by PRs — GitHub shares the route) + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}/comments", + list_issue_comments_route, + methods=["GET"], + ), + Route( + "/repos/{owner}/{repo}/issues/{issue_number:int}/comments", + create_issue_comment_route, + methods=["POST"], + ), + Route( + "/repos/{owner}/{repo}/issues/comments/{comment_id}", + get_comment_route, + methods=["GET"], + ), + Route( + "/repos/{owner}/{repo}/issues/comments/{comment_id}", + update_comment_route, + methods=["PATCH"], + ), + Route( + "/repos/{owner}/{repo}/issues/comments/{comment_id}", + delete_comment_route, + methods=["DELETE"], + ), + # Pull requests + Route("/repos/{owner}/{repo}/pulls", list_pulls_route, methods=["GET"]), + Route("/repos/{owner}/{repo}/pulls", create_pull_route, methods=["POST"]), + Route( + "/repos/{owner}/{repo}/pulls/{pull_number:int}", + get_pull_route, + methods=["GET"], + ), + Route( + "/repos/{owner}/{repo}/pulls/{pull_number:int}", + update_pull_route, + methods=["PATCH"], + ), + Route( + "/repos/{owner}/{repo}/pulls/{pull_number:int}/merge", + merge_pull_route, + methods=["PUT"], + ), + Route( + "/repos/{owner}/{repo}/pulls/{pull_number:int}/requested_reviewers", + requested_reviewers_route, + methods=["GET"], + ), + Route( + "/repos/{owner}/{repo}/pulls/{pull_number:int}/requested_reviewers", + request_reviewers_route, + methods=["POST"], + ), + Route( + "/repos/{owner}/{repo}/pulls/{pull_number:int}/requested_reviewers", + remove_reviewers_route, + methods=["DELETE"], + ), +] diff --git a/backend/src/services/github/database/__init__.py b/backend/src/services/github/database/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/src/services/github/database/base.py b/backend/src/services/github/database/base.py new file mode 100644 index 00000000..fa2b68a5 --- /dev/null +++ b/backend/src/services/github/database/base.py @@ -0,0 +1,5 @@ +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + pass diff --git a/backend/src/services/github/database/operations.py b/backend/src/services/github/database/operations.py new file mode 100644 index 00000000..fa6fe2dd --- /dev/null +++ b/backend/src/services/github/database/operations.py @@ -0,0 +1,379 @@ +"""CRUD helpers for the GitHub replica. + +Each function takes a SQLAlchemy session and returns ORM objects. Route +handlers are responsible for formatting responses. +""" + +from __future__ import annotations + +import random +from datetime import datetime +from typing import Iterable, List, Optional + +from sqlalchemy import func +from sqlalchemy.orm import Session + +from .schema import ( + Issue, + IssueAssignee, + IssueComment, + IssueLabel, + Label, + PullRequestReviewer, + Repository, + User, +) + + +_ID_RANGE = (10_000_000, 99_999_999) + + +def _new_id() -> str: + return str(random.randint(*_ID_RANGE)) + + +def get_user_by_id(session: Session, user_id: str) -> Optional[User]: + return session.query(User).filter(User.id == user_id).one_or_none() + + +def get_user_by_login(session: Session, login: str) -> Optional[User]: + return session.query(User).filter(User.login == login).one_or_none() + + +def get_user_by_email(session: Session, email: str) -> Optional[User]: + return session.query(User).filter(User.email == email).one_or_none() + + +def get_repo(session: Session, owner: str, repo: str) -> Optional[Repository]: + full_name = f"{owner}/{repo}" + return ( + session.query(Repository).filter(Repository.full_name == full_name).one_or_none() + ) + + +def list_labels(session: Session, repo: Repository) -> List[Label]: + return ( + session.query(Label) + .filter(Label.repository_id == repo.id) + .order_by(Label.name) + .all() + ) + + +def get_label(session: Session, repo: Repository, name: str) -> Optional[Label]: + return ( + session.query(Label) + .filter(Label.repository_id == repo.id, Label.name == name) + .one_or_none() + ) + + +def create_label( + session: Session, + repo: Repository, + *, + name: str, + color: str = "ededed", + description: Optional[str] = None, +) -> Label: + label = Label( + id=_new_id(), + repository_id=repo.id, + name=name, + color=color, + description=description, + ) + session.add(label) + session.flush() + return label + + +def update_label( + session: Session, + label: Label, + *, + new_name: Optional[str] = None, + color: Optional[str] = None, + description: Optional[str] = None, +) -> Label: + if new_name is not None: + label.name = new_name + if color is not None: + label.color = color + if description is not None: + label.description = description + session.flush() + return label + + +def delete_label(session: Session, label: Label) -> None: + session.query(IssueLabel).filter(IssueLabel.label_id == label.id).delete() + session.delete(label) + session.flush() + + +def _next_number(session: Session, repo: Repository) -> int: + current = ( + session.query(func.max(Issue.number)) + .filter(Issue.repository_id == repo.id) + .scalar() + ) + return (current or 0) + 1 + + +def list_issues( + session: Session, + repo: Repository, + *, + state: str = "open", + include_pulls: bool = True, +) -> List[Issue]: + q = session.query(Issue).filter(Issue.repository_id == repo.id) + if state != "all": + q = q.filter(Issue.state == state) + if not include_pulls: + q = q.filter(Issue.is_pull_request.is_(False)) + return q.order_by(Issue.number.desc()).all() + + +def get_issue(session: Session, repo: Repository, number: int) -> Optional[Issue]: + return ( + session.query(Issue) + .filter(Issue.repository_id == repo.id, Issue.number == number) + .one_or_none() + ) + + +def create_issue( + session: Session, + repo: Repository, + *, + title: str, + body: Optional[str], + user: User, + assignees: Iterable[User] = (), + labels: Iterable[Label] = (), + is_pull_request: bool = False, + head_ref: Optional[str] = None, + base_ref: Optional[str] = None, + head_sha: Optional[str] = None, + base_sha: Optional[str] = None, + draft: bool = False, +) -> Issue: + issue = Issue( + id=_new_id(), + repository_id=repo.id, + number=_next_number(session, repo), + title=title, + body=body, + state="open", + user_id=user.id, + is_pull_request=is_pull_request, + draft=draft if is_pull_request else None, + merged=False if is_pull_request else None, + head_ref=head_ref, + base_ref=base_ref, + head_sha=head_sha, + base_sha=base_sha, + ) + session.add(issue) + session.flush() + for u in assignees: + session.add(IssueAssignee(issue_id=issue.id, user_id=u.id)) + for lbl in labels: + session.add(IssueLabel(issue_id=issue.id, label_id=lbl.id)) + session.flush() + return issue + + +def update_issue( + session: Session, + issue: Issue, + *, + title: Optional[str] = None, + body: Optional[str] = None, + state: Optional[str] = None, + state_reason: Optional[str] = None, + locked: Optional[bool] = None, +) -> Issue: + if title is not None: + issue.title = title + if body is not None: + issue.body = body + if locked is not None: + issue.locked = locked + if state is not None and state != issue.state: + issue.state = state + if state == "closed": + issue.closed_at = datetime.utcnow() + if state_reason: + issue.state_reason = state_reason + elif issue.state_reason is None: + issue.state_reason = "completed" + else: + issue.closed_at = None + issue.state_reason = None + elif state_reason is not None and issue.state == "closed": + issue.state_reason = state_reason + session.flush() + return issue + + +def set_issue_assignees( + session: Session, issue: Issue, users: Iterable[User] +) -> List[User]: + existing = {ia.user_id for ia in issue.assignees} + added: List[User] = [] + for u in users: + if u.id in existing: + continue + session.add(IssueAssignee(issue_id=issue.id, user_id=u.id)) + added.append(u) + existing.add(u.id) + session.flush() + session.refresh(issue) + return added + + +def remove_issue_assignees( + session: Session, issue: Issue, users: Iterable[User] +) -> List[User]: + ids = {u.id for u in users} + to_remove = [ia for ia in issue.assignees if ia.user_id in ids] + removed_users = [ia.user for ia in to_remove if ia.user] + for ia in to_remove: + session.delete(ia) + session.flush() + session.refresh(issue) + return removed_users + + +def set_issue_labels( + session: Session, issue: Issue, labels: Iterable[Label] +) -> List[Label]: + session.query(IssueLabel).filter(IssueLabel.issue_id == issue.id).delete() + label_list = list(labels) + for lbl in label_list: + session.add(IssueLabel(issue_id=issue.id, label_id=lbl.id)) + session.flush() + session.refresh(issue) + return label_list + + +def add_issue_labels( + session: Session, issue: Issue, labels: Iterable[Label] +) -> List[Label]: + existing = {il.label_id for il in issue.labels} + for lbl in labels: + if lbl.id in existing: + continue + session.add(IssueLabel(issue_id=issue.id, label_id=lbl.id)) + existing.add(lbl.id) + session.flush() + session.refresh(issue) + return [il.label for il in issue.labels if il.label] + + +def remove_issue_label(session: Session, issue: Issue, label: Label) -> None: + session.query(IssueLabel).filter( + IssueLabel.issue_id == issue.id, IssueLabel.label_id == label.id + ).delete() + session.flush() + session.refresh(issue) + + +def clear_issue_labels(session: Session, issue: Issue) -> None: + session.query(IssueLabel).filter(IssueLabel.issue_id == issue.id).delete() + session.flush() + session.refresh(issue) + + +def list_issue_comments(session: Session, issue: Issue) -> List[IssueComment]: + return ( + session.query(IssueComment) + .filter(IssueComment.issue_id == issue.id) + .order_by(IssueComment.created_at) + .all() + ) + + +def get_comment(session: Session, comment_id: str) -> Optional[IssueComment]: + return ( + session.query(IssueComment).filter(IssueComment.id == comment_id).one_or_none() + ) + + +def create_comment( + session: Session, issue: Issue, *, user: User, body: str +) -> IssueComment: + comment = IssueComment( + id=_new_id(), + issue_id=issue.id, + user_id=user.id, + body=body, + ) + session.add(comment) + issue.comments_count = (issue.comments_count or 0) + 1 + session.flush() + return comment + + +def update_comment( + session: Session, comment: IssueComment, *, body: str +) -> IssueComment: + comment.body = body + session.flush() + return comment + + +def delete_comment(session: Session, comment: IssueComment) -> None: + if comment.issue and comment.issue.comments_count: + comment.issue.comments_count = max(comment.issue.comments_count - 1, 0) + session.delete(comment) + session.flush() + + +def set_requested_reviewers( + session: Session, pr: Issue, users: Iterable[User] +) -> List[User]: + existing = {prr.user_id for prr in pr.requested_reviewers} + added: List[User] = [] + for u in users: + if u.id in existing: + continue + session.add(PullRequestReviewer(issue_id=pr.id, user_id=u.id)) + added.append(u) + existing.add(u.id) + session.flush() + session.refresh(pr) + return added + + +def remove_requested_reviewers( + session: Session, pr: Issue, users: Iterable[User] +) -> None: + ids = {u.id for u in users} + session.query(PullRequestReviewer).filter( + PullRequestReviewer.issue_id == pr.id, + PullRequestReviewer.user_id.in_(ids), + ).delete(synchronize_session=False) + session.flush() + session.refresh(pr) + + +def merge_pull( + session: Session, + pr: Issue, + *, + merger: User, + commit_sha: Optional[str] = None, +) -> Issue: + pr.merged = True + pr.state = "closed" + pr.state_reason = "completed" + pr.merged_at = datetime.utcnow() + pr.closed_at = pr.merged_at + pr.merged_by_id = merger.id + pr.merge_commit_sha = commit_sha or _new_id() * 2 + session.flush() + return pr diff --git a/backend/src/services/github/database/schema.py b/backend/src/services/github/database/schema.py new file mode 100644 index 00000000..e14a9099 --- /dev/null +++ b/backend/src/services/github/database/schema.py @@ -0,0 +1,361 @@ +"""GitHub API replica schema. + +Follows GitHub's real data model: issues and pull requests live in the same +number space per repo. A single ``github_issues`` table stores both, with +``is_pull_request`` distinguishing them. PR-only fields are nullable. + +Only the surface needed for the bench is modeled: issues, pulls, labels, +assignees, requested reviewers, and issue/PR comments (GitHub calls them +"issue comments" even when attached to a PR). +""" + +from __future__ import annotations + +from datetime import datetime +from typing import List, Optional + +from sqlalchemy import ( + BigInteger, + Boolean, + DateTime, + ForeignKey, + Index, + Integer, + String, + Text, + UniqueConstraint, +) +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from .base import Base + + +class User(Base): + __tablename__ = "github_users" + + id: Mapped[str] = mapped_column(String(32), primary_key=True) + login: Mapped[str] = mapped_column(String(64), unique=True, index=True) + name: Mapped[Optional[str]] = mapped_column(String(128)) + email: Mapped[Optional[str]] = mapped_column(String(255), index=True) + type: Mapped[str] = mapped_column(String(16), default="User") + site_admin: Mapped[bool] = mapped_column(Boolean, default=False) + created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) + + def to_mini_dict(self) -> dict: + return { + "login": self.login, + "id": int(self.id) if self.id.isdigit() else self.id, + "node_id": f"U_{self.id}", + "type": self.type, + "site_admin": self.site_admin, + "url": f"https://api.github.com/users/{self.login}", + "html_url": f"https://github.com/{self.login}", + "avatar_url": f"https://github.com/{self.login}.png", + } + + +class Repository(Base): + __tablename__ = "github_repositories" + __table_args__ = ( + UniqueConstraint("owner_id", "name", name="uq_github_repo_owner_name"), + ) + + id: Mapped[str] = mapped_column(String(32), primary_key=True) + owner_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_users.id"), index=True + ) + name: Mapped[str] = mapped_column(String(128)) + full_name: Mapped[str] = mapped_column(String(256), unique=True, index=True) + description: Mapped[Optional[str]] = mapped_column(Text) + private: Mapped[bool] = mapped_column(Boolean, default=False) + default_branch: Mapped[str] = mapped_column(String(128), default="main") + created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) + + owner: Mapped["User"] = relationship("User") + issues: Mapped[List["Issue"]] = relationship("Issue", back_populates="repository") + labels: Mapped[List["Label"]] = relationship("Label", back_populates="repository") + + def to_dict(self) -> dict: + return { + "id": int(self.id) if self.id.isdigit() else self.id, + "node_id": f"R_{self.id}", + "name": self.name, + "full_name": self.full_name, + "owner": self.owner.to_mini_dict() if self.owner else None, + "private": self.private, + "description": self.description, + "default_branch": self.default_branch, + "url": f"https://api.github.com/repos/{self.full_name}", + "html_url": f"https://github.com/{self.full_name}", + } + + +class Label(Base): + __tablename__ = "github_labels" + __table_args__ = ( + UniqueConstraint("repository_id", "name", name="uq_github_label_repo_name"), + ) + + id: Mapped[str] = mapped_column(String(32), primary_key=True) + repository_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_repositories.id"), index=True + ) + name: Mapped[str] = mapped_column(String(128)) + color: Mapped[str] = mapped_column(String(8), default="ededed") + description: Mapped[Optional[str]] = mapped_column(Text) + is_default: Mapped[bool] = mapped_column(Boolean, default=False) + + repository: Mapped["Repository"] = relationship( + "Repository", back_populates="labels" + ) + + def to_dict(self) -> dict: + return { + "id": int(self.id) if self.id.isdigit() else self.id, + "node_id": f"L_{self.id}", + "name": self.name, + "color": self.color, + "description": self.description, + "default": self.is_default, + "url": ( + f"https://api.github.com/repos/{self.repository.full_name}/labels/{self.name}" + if self.repository + else None + ), + } + + +class Issue(Base): + """Unified issue + pull request row. + + When ``is_pull_request`` is true, the PR-specific fields (head_ref, + base_ref, merged, etc.) are populated and the row represents a PR. + GitHub's ``/issues`` list endpoint also returns PRs (with a + ``pull_request`` field); this mirrors that shape. + """ + + __tablename__ = "github_issues" + __table_args__ = ( + UniqueConstraint("repository_id", "number", name="uq_github_issue_repo_number"), + Index("ix_github_issues_state", "repository_id", "state"), + ) + + id: Mapped[str] = mapped_column(String(32), primary_key=True) + repository_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_repositories.id"), index=True + ) + number: Mapped[int] = mapped_column(Integer) + title: Mapped[str] = mapped_column(Text) + body: Mapped[Optional[str]] = mapped_column(Text) + state: Mapped[str] = mapped_column(String(16), default="open") + state_reason: Mapped[Optional[str]] = mapped_column(String(32)) + user_id: Mapped[Optional[str]] = mapped_column( + String(32), ForeignKey("github_users.id"), index=True + ) + locked: Mapped[bool] = mapped_column(Boolean, default=False) + comments_count: Mapped[int] = mapped_column(Integer, default=0) + + is_pull_request: Mapped[bool] = mapped_column( + Boolean, default=False, server_default="false", index=True + ) + draft: Mapped[Optional[bool]] = mapped_column(Boolean) + merged: Mapped[Optional[bool]] = mapped_column(Boolean) + merged_at: Mapped[Optional[datetime]] = mapped_column(DateTime) + merged_by_id: Mapped[Optional[str]] = mapped_column( + String(32), ForeignKey("github_users.id") + ) + merge_commit_sha: Mapped[Optional[str]] = mapped_column(String(64)) + head_ref: Mapped[Optional[str]] = mapped_column(String(255)) + base_ref: Mapped[Optional[str]] = mapped_column(String(255)) + head_sha: Mapped[Optional[str]] = mapped_column(String(64)) + base_sha: Mapped[Optional[str]] = mapped_column(String(64)) + + created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) + updated_at: Mapped[datetime] = mapped_column( + DateTime, default=datetime.utcnow, onupdate=datetime.utcnow + ) + closed_at: Mapped[Optional[datetime]] = mapped_column(DateTime) + + repository: Mapped["Repository"] = relationship( + "Repository", back_populates="issues" + ) + user: Mapped[Optional["User"]] = relationship("User", foreign_keys=[user_id]) + merged_by: Mapped[Optional["User"]] = relationship( + "User", foreign_keys=[merged_by_id] + ) + labels: Mapped[List["IssueLabel"]] = relationship( + "IssueLabel", back_populates="issue", cascade="all, delete-orphan" + ) + assignees: Mapped[List["IssueAssignee"]] = relationship( + "IssueAssignee", back_populates="issue", cascade="all, delete-orphan" + ) + requested_reviewers: Mapped[List["PullRequestReviewer"]] = relationship( + "PullRequestReviewer", back_populates="issue", cascade="all, delete-orphan" + ) + comments: Mapped[List["IssueComment"]] = relationship( + "IssueComment", + back_populates="issue", + cascade="all, delete-orphan", + order_by="IssueComment.created_at", + ) + + def _base_dict(self) -> dict: + full_name = self.repository.full_name if self.repository else "" + labels = [il.label.to_dict() for il in self.labels if il.label] + assignees = [ia.user.to_mini_dict() for ia in self.assignees if ia.user] + url_base = f"https://api.github.com/repos/{full_name}" + return { + "id": int(self.id) if self.id.isdigit() else self.id, + "node_id": f"I_{self.id}", + "number": self.number, + "title": self.title, + "body": self.body, + "state": self.state, + "state_reason": self.state_reason, + "locked": self.locked, + "comments": self.comments_count, + "user": self.user.to_mini_dict() if self.user else None, + "labels": labels, + "assignee": assignees[0] if assignees else None, + "assignees": assignees, + "created_at": self.created_at.isoformat() if self.created_at else None, + "updated_at": self.updated_at.isoformat() if self.updated_at else None, + "closed_at": self.closed_at.isoformat() if self.closed_at else None, + "url": f"{url_base}/issues/{self.number}", + "html_url": f"https://github.com/{full_name}/issues/{self.number}", + "repository_url": url_base, + } + + def to_issue_dict(self) -> dict: + data = self._base_dict() + if self.is_pull_request: + full_name = self.repository.full_name if self.repository else "" + data["pull_request"] = { + "url": f"https://api.github.com/repos/{full_name}/pulls/{self.number}", + "html_url": f"https://github.com/{full_name}/pull/{self.number}", + "merged_at": self.merged_at.isoformat() if self.merged_at else None, + } + return data + + def to_pull_dict(self) -> dict: + data = self._base_dict() + full_name = self.repository.full_name if self.repository else "" + data["url"] = f"https://api.github.com/repos/{full_name}/pulls/{self.number}" + data["html_url"] = f"https://github.com/{full_name}/pull/{self.number}" + reviewers = [ + prr.user.to_mini_dict() for prr in self.requested_reviewers if prr.user + ] + data.update( + { + "draft": bool(self.draft), + "merged": bool(self.merged), + "merged_at": self.merged_at.isoformat() if self.merged_at else None, + "merged_by": self.merged_by.to_mini_dict() if self.merged_by else None, + "merge_commit_sha": self.merge_commit_sha, + "head": { + "ref": self.head_ref, + "sha": self.head_sha, + "label": f"{self.repository.owner.login}:{self.head_ref}" + if self.repository and self.repository.owner and self.head_ref + else self.head_ref, + }, + "base": { + "ref": self.base_ref, + "sha": self.base_sha, + "label": f"{self.repository.owner.login}:{self.base_ref}" + if self.repository and self.repository.owner and self.base_ref + else self.base_ref, + }, + "requested_reviewers": reviewers, + "requested_teams": [], + } + ) + return data + + +class IssueLabel(Base): + __tablename__ = "github_issue_labels" + + issue_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_issues.id"), primary_key=True + ) + label_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_labels.id"), primary_key=True + ) + + issue: Mapped["Issue"] = relationship("Issue", back_populates="labels") + label: Mapped["Label"] = relationship("Label") + + +class IssueAssignee(Base): + __tablename__ = "github_issue_assignees" + + issue_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_issues.id"), primary_key=True + ) + user_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_users.id"), primary_key=True + ) + + issue: Mapped["Issue"] = relationship("Issue", back_populates="assignees") + user: Mapped["User"] = relationship("User") + + +class PullRequestReviewer(Base): + __tablename__ = "github_pull_request_reviewers" + + issue_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_issues.id"), primary_key=True + ) + user_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_users.id"), primary_key=True + ) + + issue: Mapped["Issue"] = relationship("Issue", back_populates="requested_reviewers") + user: Mapped["User"] = relationship("User") + + +class IssueComment(Base): + """Comments on an issue or pull request. + + GitHub unifies these — both surface through + ``/repos/{owner}/{repo}/issues/{number}/comments`` and PR review-thread + comments are a separate system. + """ + + __tablename__ = "github_issue_comments" + + id: Mapped[str] = mapped_column(String(32), primary_key=True) + issue_id: Mapped[str] = mapped_column( + String(32), ForeignKey("github_issues.id"), index=True + ) + user_id: Mapped[Optional[str]] = mapped_column( + String(32), ForeignKey("github_users.id") + ) + body: Mapped[str] = mapped_column(Text) + created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) + updated_at: Mapped[datetime] = mapped_column( + DateTime, default=datetime.utcnow, onupdate=datetime.utcnow + ) + + issue: Mapped["Issue"] = relationship("Issue", back_populates="comments") + user: Mapped[Optional["User"]] = relationship("User") + + def to_dict(self) -> dict: + full_name = ( + self.issue.repository.full_name + if self.issue and self.issue.repository + else "" + ) + number = self.issue.number if self.issue else None + return { + "id": int(self.id) if self.id.isdigit() else self.id, + "node_id": f"IC_{self.id}", + "user": self.user.to_mini_dict() if self.user else None, + "body": self.body, + "created_at": self.created_at.isoformat() if self.created_at else None, + "updated_at": self.updated_at.isoformat() if self.updated_at else None, + "issue_url": f"https://api.github.com/repos/{full_name}/issues/{number}", + "url": f"https://api.github.com/repos/{full_name}/issues/comments/{self.id}", + "html_url": f"https://github.com/{full_name}/issues/{number}#issuecomment-{self.id}", + } diff --git a/backend/src/services/github/utils/__init__.py b/backend/src/services/github/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/tests/integration/test_github_api.py b/backend/tests/integration/test_github_api.py new file mode 100644 index 00000000..9cd35f68 --- /dev/null +++ b/backend/tests/integration/test_github_api.py @@ -0,0 +1,358 @@ +"""Integration tests for the GitHub API replica. + +These exercise the routes over an ASGI transport against a freshly cloned +``github_default`` schema. Each test gets its own environment so that +mutations don't bleed across cases. +""" + +from __future__ import annotations + +import pytest +import pytest_asyncio +from httpx import ASGITransport, AsyncClient +from starlette.applications import Starlette +from starlette.middleware import Middleware +from starlette.middleware.base import BaseHTTPMiddleware + +from src.services.github.api.routes import routes as github_routes + + +AGENT_USER_ID = "10000001" # agent-bot +ALICE_ID = "10000002" +BOB_ID = "10000003" +CAROL_ID = "10000004" +REPO = "acme/widgets" +BASE = f"/repos/{REPO}" + + +def _app_for(session_manager, env_id: str, impersonate_user_id: str) -> Starlette: + async def add_db_session(request, call_next): + with session_manager.with_session_for_environment(env_id) as session: + request.state.db_session = session + request.state.environment_id = env_id + request.state.impersonate_user_id = impersonate_user_id + request.state.impersonate_email = None + return await call_next(request) + + middleware = [Middleware(BaseHTTPMiddleware, dispatch=add_db_session)] + return Starlette(routes=github_routes, middleware=middleware) + + +@pytest_asyncio.fixture +async def github_client( + test_user_id, core_isolation_engine, session_manager, environment_handler +): + env = core_isolation_engine.create_environment( + template_schema="github_default", + ttl_seconds=3600, + created_by=test_user_id, + impersonate_user_id=AGENT_USER_ID, + ) + try: + app = _app_for(session_manager, env.environment_id, AGENT_USER_ID) + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + finally: + environment_handler.drop_schema(env.schema_name) + + +@pytest_asyncio.fixture +async def github_client_alice( + test_user_id, core_isolation_engine, session_manager, environment_handler +): + env = core_isolation_engine.create_environment( + template_schema="github_default", + ttl_seconds=3600, + created_by=test_user_id, + impersonate_user_id=ALICE_ID, + ) + try: + app = _app_for(session_manager, env.environment_id, ALICE_ID) + transport = ASGITransport(app=app) + async with AsyncClient(transport=transport, base_url="http://test") as client: + yield client + finally: + environment_handler.drop_schema(env.schema_name) + + +@pytest.mark.asyncio +class TestRepository: + async def test_get_repo(self, github_client: AsyncClient): + resp = await github_client.get(BASE) + assert resp.status_code == 200 + data = resp.json() + assert data["full_name"] == REPO + assert data["owner"]["login"] == "alice" + assert data["default_branch"] == "main" + + async def test_get_missing_repo(self, github_client: AsyncClient): + resp = await github_client.get("/repos/acme/nope") + assert resp.status_code == 404 + + +@pytest.mark.asyncio +class TestLabels: + async def test_list_labels(self, github_client: AsyncClient): + resp = await github_client.get(f"{BASE}/labels") + assert resp.status_code == 200 + names = {lbl["name"] for lbl in resp.json()} + assert {"bug", "enhancement", "documentation"} <= names + + async def test_create_label(self, github_client: AsyncClient): + resp = await github_client.post( + f"{BASE}/labels", + json={"name": "security", "color": "b60205", "description": "SEC"}, + ) + assert resp.status_code == 201 + data = resp.json() + assert data["name"] == "security" + assert data["color"] == "b60205" + + # Duplicate create is 422 + dup = await github_client.post(f"{BASE}/labels", json={"name": "security"}) + assert dup.status_code == 422 + + async def test_update_label(self, github_client: AsyncClient): + resp = await github_client.patch( + f"{BASE}/labels/bug", + json={"color": "ff0000", "description": "Critical bug"}, + ) + assert resp.status_code == 200 + assert resp.json()["color"] == "ff0000" + + async def test_delete_label(self, github_client: AsyncClient): + resp = await github_client.delete(f"{BASE}/labels/needs-triage") + assert resp.status_code == 204 + missing = await github_client.get(f"{BASE}/labels/needs-triage") + assert missing.status_code == 404 + + +@pytest.mark.asyncio +class TestIssues: + async def test_list_open_issues(self, github_client: AsyncClient): + resp = await github_client.get(f"{BASE}/issues") + assert resp.status_code == 200 + numbers = {i["number"] for i in resp.json()} + # Both issues and PRs come back on /issues in real GitHub. + assert {1, 2, 4, 5} <= numbers + # Closed issue #3 should not appear by default. + assert 3 not in numbers + + async def test_list_all_states(self, github_client: AsyncClient): + resp = await github_client.get(f"{BASE}/issues", params={"state": "all"}) + numbers = {i["number"] for i in resp.json()} + assert 3 in numbers # closed + + async def test_get_issue(self, github_client: AsyncClient): + resp = await github_client.get(f"{BASE}/issues/1") + assert resp.status_code == 200 + data = resp.json() + assert data["number"] == 1 + assert data["title"].startswith("Widget factory") + assert any(lbl["name"] == "bug" for lbl in data["labels"]) + assert "pull_request" not in data + + async def test_create_issue(self, github_client: AsyncClient): + resp = await github_client.post( + f"{BASE}/issues", + json={ + "title": "Widget factory leaks memory", + "body": "Leaks heap on large inputs.", + "labels": ["bug"], + "assignees": ["bob"], + }, + ) + assert resp.status_code == 201 + data = resp.json() + assert data["state"] == "open" + assert data["user"]["login"] == "agent-bot" + assert data["number"] == 6 # next after seeded 1-5 + assert any(lbl["name"] == "bug" for lbl in data["labels"]) + assert data["assignees"][0]["login"] == "bob" + + async def test_close_issue(self, github_client: AsyncClient): + resp = await github_client.patch( + f"{BASE}/issues/2", + json={"state": "closed", "state_reason": "completed"}, + ) + assert resp.status_code == 200 + data = resp.json() + assert data["state"] == "closed" + assert data["state_reason"] == "completed" + assert data["closed_at"] is not None + + async def test_update_issue_labels_replaces(self, github_client: AsyncClient): + resp = await github_client.patch( + f"{BASE}/issues/1", + json={"labels": ["documentation"]}, + ) + assert resp.status_code == 200 + names = {lbl["name"] for lbl in resp.json()["labels"]} + assert names == {"documentation"} + + +@pytest.mark.asyncio +class TestIssueLabels: + async def test_add_labels(self, github_client: AsyncClient): + resp = await github_client.post( + f"{BASE}/issues/2/labels", json={"labels": ["needs-triage"]} + ) + assert resp.status_code == 200 + names = {lbl["name"] for lbl in resp.json()} + assert "needs-triage" in names + + async def test_set_labels_replaces(self, github_client: AsyncClient): + resp = await github_client.put( + f"{BASE}/issues/1/labels", json={"labels": ["documentation"]} + ) + assert resp.status_code == 200 + assert [lbl["name"] for lbl in resp.json()] == ["documentation"] + + async def test_remove_single_label(self, github_client: AsyncClient): + resp = await github_client.delete(f"{BASE}/issues/1/labels/needs-triage") + assert resp.status_code == 200 + names = {lbl["name"] for lbl in resp.json()} + assert "needs-triage" not in names + assert "bug" in names # still there + + async def test_clear_labels(self, github_client: AsyncClient): + resp = await github_client.delete(f"{BASE}/issues/1/labels") + assert resp.status_code == 204 + listing = await github_client.get(f"{BASE}/issues/1/labels") + assert listing.json() == [] + + +@pytest.mark.asyncio +class TestIssueAssignees: + async def test_add_assignees(self, github_client: AsyncClient): + resp = await github_client.post( + f"{BASE}/issues/1/assignees", json={"assignees": ["bob", "carol"]} + ) + assert resp.status_code == 201 + logins = {u["login"] for u in resp.json()["assignees"]} + assert {"bob", "carol"} <= logins + + async def test_remove_assignees(self, github_client: AsyncClient): + await github_client.post( + f"{BASE}/issues/1/assignees", json={"assignees": ["bob"]} + ) + resp = await github_client.request( + "DELETE", + f"{BASE}/issues/1/assignees", + json={"assignees": ["bob"]}, + ) + assert resp.status_code == 200 + logins = {u["login"] for u in resp.json()["assignees"]} + assert "bob" not in logins + + +@pytest.mark.asyncio +class TestIssueComments: + async def test_list_comments(self, github_client: AsyncClient): + resp = await github_client.get(f"{BASE}/issues/1/comments") + assert resp.status_code == 200 + assert len(resp.json()) == 1 + + async def test_create_comment(self, github_client: AsyncClient): + resp = await github_client.post( + f"{BASE}/issues/1/comments", + json={"body": "I can reproduce on 0.5.0 too."}, + ) + assert resp.status_code == 201 + data = resp.json() + assert data["body"].startswith("I can reproduce") + assert data["user"]["login"] == "agent-bot" + + # comments_count should have bumped + issue = await github_client.get(f"{BASE}/issues/1") + assert issue.json()["comments"] == 2 + + async def test_update_comment(self, github_client_alice: AsyncClient): + resp = await github_client_alice.patch( + f"{BASE}/issues/comments/50000001", + json={"body": "Reproduced on 0.4.1 and 0.5.0 — triaging."}, + ) + assert resp.status_code == 200 + assert "0.5.0" in resp.json()["body"] + + async def test_delete_comment(self, github_client_alice: AsyncClient): + resp = await github_client_alice.delete(f"{BASE}/issues/comments/50000001") + assert resp.status_code == 204 + listing = await github_client_alice.get(f"{BASE}/issues/1/comments") + assert listing.json() == [] + + +@pytest.mark.asyncio +class TestPullRequests: + async def test_list_open_pulls(self, github_client: AsyncClient): + resp = await github_client.get(f"{BASE}/pulls") + assert resp.status_code == 200 + numbers = {p["number"] for p in resp.json()} + assert numbers == {4, 5} + + async def test_get_pull(self, github_client: AsyncClient): + resp = await github_client.get(f"{BASE}/pulls/4") + assert resp.status_code == 200 + data = resp.json() + assert data["head"]["ref"] == "bob/fix-empty-input" + assert data["base"]["ref"] == "main" + assert data["merged"] is False + + async def test_get_pull_rejects_issue(self, github_client: AsyncClient): + # #1 is an issue, not a PR — /pulls should not find it. + resp = await github_client.get(f"{BASE}/pulls/1") + assert resp.status_code == 404 + + async def test_create_pull(self, github_client: AsyncClient): + resp = await github_client.post( + f"{BASE}/pulls", + json={ + "title": "Docs: clarify lifecycle hooks", + "head": "agent-bot/lifecycle-docs", + "base": "main", + "body": "Expands README §3.", + }, + ) + assert resp.status_code == 201 + data = resp.json() + assert data["head"]["ref"] == "agent-bot/lifecycle-docs" + assert data["user"]["login"] == "agent-bot" + assert data["merged"] is False + + async def test_request_and_remove_reviewers(self, github_client: AsyncClient): + resp = await github_client.post( + f"{BASE}/pulls/4/requested_reviewers", + json={"reviewers": ["alice", "carol"]}, + ) + assert resp.status_code == 201 + logins = {u["login"] for u in resp.json()["requested_reviewers"]} + assert {"alice", "carol"} <= logins + + rm = await github_client.request( + "DELETE", + f"{BASE}/pulls/4/requested_reviewers", + json={"reviewers": ["alice"]}, + ) + assert rm.status_code == 200 + logins = {u["login"] for u in rm.json()["requested_reviewers"]} + assert "alice" not in logins + + async def test_merge_pull(self, github_client_alice: AsyncClient): + resp = await github_client_alice.put(f"{BASE}/pulls/4/merge", json={}) + assert resp.status_code == 200 + data = resp.json() + assert data["merged"] is True + assert data["sha"] + + detail = await github_client_alice.get(f"{BASE}/pulls/4") + detail_data = detail.json() + assert detail_data["merged"] is True + assert detail_data["state"] == "closed" + assert detail_data["merged_by"]["login"] == "alice" + + async def test_merge_already_merged_is_405(self, github_client_alice: AsyncClient): + first = await github_client_alice.put(f"{BASE}/pulls/4/merge", json={}) + assert first.status_code == 200 + again = await github_client_alice.put(f"{BASE}/pulls/4/merge", json={}) + assert again.status_code == 405 diff --git a/backend/utils/seed_github_template.py b/backend/utils/seed_github_template.py new file mode 100644 index 00000000..b749b33f --- /dev/null +++ b/backend/utils/seed_github_template.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 +"""Seed script for GitHub template schemas. + +Creates: +- ``github_base``: empty schema (tables only), handy as a clean-slate template. +- ``github_default``: pre-populated with a small ACME widgets repo containing + issues, pull requests, labels, assignees, and comments. + +Usage: + python backend/utils/seed_github_template.py +""" + +from __future__ import annotations + +import json +import os +import sys +from pathlib import Path +from uuid import uuid4 + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from sqlalchemy import create_engine, text + +from src.services.github.database import schema as github_schema +from src.services.github.database.base import Base + + +TABLE_ORDER = [ + "github_users", + "github_repositories", + "github_labels", + "github_issues", + "github_issue_labels", + "github_issue_assignees", + "github_pull_request_reviewers", + "github_issue_comments", +] + + +def create_schema(conn, schema_name: str) -> None: + conn.execute(text(f"DROP SCHEMA IF EXISTS {schema_name} CASCADE")) + conn.execute(text(f"CREATE SCHEMA {schema_name}")) + + +def create_tables(conn, schema_name: str) -> None: + conn_with_schema = conn.execution_options(schema_translate_map={None: schema_name}) + _ = github_schema + Base.metadata.create_all(conn_with_schema, checkfirst=True) + + +def insert_seed_data(conn, schema_name: str, seed_data: dict) -> None: + for table_name in TABLE_ORDER: + records = seed_data.get(table_name) or [] + if not records: + continue + print(f" Inserting {len(records)} {table_name}...") + for record in records: + columns = ", ".join(record.keys()) + placeholders = ", ".join(f":{k}" for k in record.keys()) + sql = ( + f"INSERT INTO {schema_name}.{table_name} ({columns}) " + f"VALUES ({placeholders})" + ) + conn.execute(text(sql), record) + + +def register_public_template( + conn, + *, + service: str, + name: str, + location: str, + description: str | None = None, + table_order: list[str] | None = None, +) -> None: + existing = conn.execute( + text( + """ + SELECT id FROM public.environments + WHERE service = :service + AND name = :name + AND version = :version + AND visibility = 'public' + AND owner_id IS NULL + LIMIT 1 + """ + ), + {"service": service, "name": name, "version": "v1"}, + ).fetchone() + + if existing: + print(f"Template {name} already exists, skipping registration") + return + + conn.execute( + text( + """ + INSERT INTO public.environments ( + id, service, name, version, visibility, description, + owner_id, kind, location, table_order, created_at, updated_at + ) VALUES ( + :id, :service, :name, :version, 'public', :description, + NULL, 'schema', :location, :table_order, NOW(), NOW() + ) + """ + ), + { + "id": str(uuid4()), + "service": service, + "name": name, + "version": "v1", + "description": description, + "location": location, + "table_order": json.dumps(table_order) if table_order else None, + }, + ) + + +def create_template(engine, template_name: str, seed_file: Path | None = None) -> None: + print(f"\n=== Creating {template_name} ===") + + with engine.begin() as conn: + create_schema(conn, template_name) + print(f"Created schema: {template_name}") + + create_tables(conn, template_name) + print(f"Created {len(Base.metadata.tables)} tables") + + if seed_file: + if not seed_file.exists(): + print(f"Seed file not found: {seed_file}") + return + with open(seed_file) as f: + seed_data = json.load(f) + insert_seed_data(conn, template_name, seed_data) + print(f"Loaded seed data from {seed_file.name}") + else: + print(f"Empty template {template_name} ready") + + description = ( + "GitHub base template without seed data" + if template_name == "github_base" + else "GitHub default template with a small widgets repo" + ) + register_public_template( + conn, + service="github", + name=template_name, + location=template_name, + description=description, + table_order=TABLE_ORDER, + ) + print(f"Registered public template: {template_name}") + + +def main() -> None: + db_url = os.environ.get("DATABASE_URL") + if not db_url: + print("ERROR: DATABASE_URL environment variable not set") + sys.exit(1) + + engine = create_engine(db_url) + + seeds_dir = Path(__file__).parent.parent / "seeds" / "github" + if not seeds_dir.exists(): + seeds_dir = ( + Path(__file__).parent.parent.parent / "examples" / "github" / "seeds" + ) + + create_template(engine, "github_base") + + seed_files = sorted(seeds_dir.glob("*.json")) + for seed_file in seed_files: + create_template(engine, seed_file.stem, seed_file) + + print(f"\n All {1 + len(seed_files)} GitHub template(s) created successfully\n") + + +if __name__ == "__main__": + main() diff --git a/examples/github/seeds/github_default.json b/examples/github/seeds/github_default.json new file mode 100644 index 00000000..0e64a1bc --- /dev/null +++ b/examples/github/seeds/github_default.json @@ -0,0 +1,202 @@ +{ + "github_users": [ + { + "id": "10000001", + "login": "agent-bot", + "name": "Agent Bot", + "email": "agent@example.com", + "type": "Bot", + "site_admin": false, + "created_at": "2024-01-10T09:00:00" + }, + { + "id": "10000002", + "login": "alice", + "name": "Alice Maintainer", + "email": "alice@example.com", + "type": "User", + "site_admin": true, + "created_at": "2023-06-01T12:00:00" + }, + { + "id": "10000003", + "login": "bob", + "name": "Bob Contributor", + "email": "bob@example.com", + "type": "User", + "site_admin": false, + "created_at": "2023-07-15T12:00:00" + }, + { + "id": "10000004", + "login": "carol", + "name": "Carol Reviewer", + "email": "carol@example.com", + "type": "User", + "site_admin": false, + "created_at": "2023-08-22T12:00:00" + } + ], + "github_repositories": [ + { + "id": "20000001", + "owner_id": "10000002", + "name": "widgets", + "full_name": "acme/widgets", + "description": "Reference widgets library", + "private": false, + "default_branch": "main", + "created_at": "2024-01-15T10:00:00" + } + ], + "github_labels": [ + { + "id": "30000001", + "repository_id": "20000001", + "name": "bug", + "color": "d73a4a", + "description": "Something isn't working", + "is_default": true + }, + { + "id": "30000002", + "repository_id": "20000001", + "name": "enhancement", + "color": "a2eeef", + "description": "New feature or request", + "is_default": true + }, + { + "id": "30000003", + "repository_id": "20000001", + "name": "documentation", + "color": "0075ca", + "description": "Improvements or additions to documentation", + "is_default": true + }, + { + "id": "30000004", + "repository_id": "20000001", + "name": "good first issue", + "color": "7057ff", + "description": "Good for newcomers", + "is_default": true + }, + { + "id": "30000005", + "repository_id": "20000001", + "name": "needs-triage", + "color": "ededed", + "description": "Awaiting maintainer triage", + "is_default": false + } + ], + "github_issues": [ + { + "id": "40000001", + "repository_id": "20000001", + "number": 1, + "title": "Widget factory panics on empty input", + "body": "Calling `build()` with no components raises an unhandled exception.", + "state": "open", + "user_id": "10000003", + "locked": false, + "comments_count": 1, + "is_pull_request": false, + "created_at": "2024-02-01T09:00:00", + "updated_at": "2024-02-01T09:00:00" + }, + { + "id": "40000002", + "repository_id": "20000001", + "number": 2, + "title": "Add async support to renderer", + "body": "We need `render_async()` to integrate with asyncio servers.", + "state": "open", + "user_id": "10000004", + "locked": false, + "comments_count": 0, + "is_pull_request": false, + "created_at": "2024-02-05T11:30:00", + "updated_at": "2024-02-05T11:30:00" + }, + { + "id": "40000003", + "repository_id": "20000001", + "number": 3, + "title": "Docs: document widget lifecycle", + "body": "The README skips over lifecycle hooks.", + "state": "closed", + "state_reason": "completed", + "user_id": "10000002", + "locked": false, + "comments_count": 0, + "is_pull_request": false, + "created_at": "2024-01-20T14:00:00", + "updated_at": "2024-01-25T09:00:00", + "closed_at": "2024-01-25T09:00:00" + }, + { + "id": "40000004", + "repository_id": "20000001", + "number": 4, + "title": "Fix widget factory panic", + "body": "Closes #1. Adds guard for empty components list.", + "state": "open", + "user_id": "10000003", + "locked": false, + "comments_count": 0, + "is_pull_request": true, + "draft": false, + "merged": false, + "head_ref": "bob/fix-empty-input", + "base_ref": "main", + "head_sha": "aaaaaaaabbbbbbbbccccccccdddddddd11111111", + "base_sha": "ffffffffeeeeeeeeddddddddcccccccc22222222", + "created_at": "2024-02-10T15:00:00", + "updated_at": "2024-02-10T15:00:00" + }, + { + "id": "40000005", + "repository_id": "20000001", + "number": 5, + "title": "Async renderer prototype", + "body": "Initial pass at `render_async`. Draft — please review API shape.", + "state": "open", + "user_id": "10000004", + "locked": false, + "comments_count": 0, + "is_pull_request": true, + "draft": true, + "merged": false, + "head_ref": "carol/async-renderer", + "base_ref": "main", + "head_sha": "1111111122222222333333334444444455555555", + "base_sha": "ffffffffeeeeeeeeddddddddcccccccc22222222", + "created_at": "2024-02-12T10:00:00", + "updated_at": "2024-02-12T10:00:00" + } + ], + "github_issue_labels": [ + {"issue_id": "40000001", "label_id": "30000001"}, + {"issue_id": "40000001", "label_id": "30000005"}, + {"issue_id": "40000002", "label_id": "30000002"}, + {"issue_id": "40000003", "label_id": "30000003"} + ], + "github_issue_assignees": [ + {"issue_id": "40000002", "user_id": "10000004"} + ], + "github_pull_request_reviewers": [ + {"issue_id": "40000004", "user_id": "10000002"} + ], + "github_issue_comments": [ + { + "id": "50000001", + "issue_id": "40000001", + "user_id": "10000002", + "body": "Reproduced on 0.4.1 — triaging.", + "created_at": "2024-02-01T10:00:00", + "updated_at": "2024-02-01T10:00:00" + } + ] +} diff --git a/examples/github/testsuites/github_bench.json b/examples/github/testsuites/github_bench.json new file mode 100644 index 00000000..842ffd63 --- /dev/null +++ b/examples/github/testsuites/github_bench.json @@ -0,0 +1,214 @@ +{ + "id": "github-bench", + "name": "GitHub Bench", + "description": "Benchmark tests for GitHub issue, pull-request, comment, and label operations.", + "service": "github", + "owner": "dev-user", + "ignore_fields": { + "global": ["created_at", "updated_at", "closed_at", "merged_at"] + }, + "tests": [ + { + "id": "issue_create_bug", + "name": "Open a new bug report", + "prompt": "Open a new issue in acme/widgets titled 'Widget factory leaks memory' describing a leak when processing large inputs. Apply the 'bug' label.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000001", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issues", + "where": { + "title": {"eq": "Widget factory leaks memory"}, + "is_pull_request": {"eq": false}, + "state": {"eq": "open"} + }, + "expected_count": 1 + }, + { + "diff_type": "added", + "entity": "github_issue_labels", + "where": {"label_id": {"eq": "30000001"}}, + "expected_count": 1 + } + ] + }, + { + "id": "issue_close_resolved", + "name": "Close issue as completed", + "prompt": "Close issue #2 in acme/widgets with state_reason 'completed' now that async renderer support has shipped.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "updated", + "entity": "github_issues", + "where": {"id": {"eq": "40000002"}}, + "changes": { + "state": {"from": "open", "to": "closed"}, + "state_reason": {"to": "completed"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "issue_triage_label", + "name": "Add needs-triage label to issue", + "prompt": "Add the 'needs-triage' label to issue #2 in acme/widgets.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issue_labels", + "where": { + "issue_id": {"eq": "40000002"}, + "label_id": {"eq": "30000005"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "issue_assign_user", + "name": "Assign issue to user", + "prompt": "Assign bob to issue #1 in acme/widgets so he can investigate the panic.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issue_assignees", + "where": { + "issue_id": {"eq": "40000001"}, + "user_id": {"eq": "10000003"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "issue_comment_reply", + "name": "Comment on an issue", + "prompt": "Comment on issue #1 in acme/widgets saying 'I can reproduce on 0.5.0 as well — will pick this up.'", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000001", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issue_comments", + "where": { + "issue_id": {"eq": "40000001"}, + "body": {"contains": "reproduce"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "comment_edit_typo", + "name": "Edit an existing comment", + "prompt": "Alice noticed a typo in comment 50000001 on issue #1 of acme/widgets. Update the comment body to 'Reproduced on 0.4.1 and 0.5.0 — triaging.'", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "updated", + "entity": "github_issue_comments", + "where": {"id": {"eq": "50000001"}}, + "changes": { + "body": {"to": "Reproduced on 0.4.1 and 0.5.0 — triaging."} + }, + "expected_count": 1 + } + ] + }, + { + "id": "label_create_security", + "name": "Create a security label", + "prompt": "Create a new label called 'security' in acme/widgets with color 'b60205' and description 'Security-sensitive issue'.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "added", + "entity": "github_labels", + "where": { + "repository_id": {"eq": "20000001"}, + "name": {"eq": "security"}, + "color": {"eq": "b60205"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "pr_open_docs_fix", + "name": "Open a pull request", + "prompt": "Open a pull request in acme/widgets titled 'Docs: clarify lifecycle hooks' from branch 'agent-bot/lifecycle-docs' into 'main', body 'Expands README §3 based on #3.'", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000001", + "assertions": [ + { + "diff_type": "added", + "entity": "github_issues", + "where": { + "is_pull_request": {"eq": true}, + "title": {"eq": "Docs: clarify lifecycle hooks"}, + "head_ref": {"eq": "agent-bot/lifecycle-docs"}, + "base_ref": {"eq": "main"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "pr_request_review", + "name": "Request reviewers on a PR", + "prompt": "Request alice and carol as reviewers on PR #4 in acme/widgets.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000003", + "assertions": [ + { + "diff_type": "added", + "entity": "github_pull_request_reviewers", + "where": { + "issue_id": {"eq": "40000004"}, + "user_id": {"eq": "10000004"} + }, + "expected_count": 1 + } + ] + }, + { + "id": "pr_merge", + "name": "Merge a pull request", + "prompt": "Merge PR #4 in acme/widgets now that review is complete.", + "type": "actionEval", + "seed_template": "github_default", + "impersonate_user_id": "10000002", + "assertions": [ + { + "diff_type": "updated", + "entity": "github_issues", + "where": {"id": {"eq": "40000004"}}, + "changes": { + "state": {"from": "open", "to": "closed"}, + "merged": {"from": false, "to": true} + }, + "expected_count": 1 + } + ] + } + ] +} diff --git a/experiments/kdd 2026/pilot_demonstration/pilot-demonstration.txt b/experiments/kdd 2026/pilot_demonstration/pilot-demonstration.txt new file mode 100644 index 00000000..64543303 --- /dev/null +++ b/experiments/kdd 2026/pilot_demonstration/pilot-demonstration.txt @@ -0,0 +1,610 @@ +╭─── Claude Code v2.1.112 ─────────────────────────────────────────────────────╮ +│ │ Tips for getting │ +│ Welcome back Hubert! │ started │ +│ │ Run /init to create a … │ +│ ▐▛███▜▌ │ ─────────────────────── │ +│ ▝▜█████▛▘ │ Recent activity │ +│ ▘▘ ▝▝ │ No recent activity │ +│ Opus 4.7 (1M context) · Claude Max · │ │ +│ hubert@uni.minerva.edu's Organization │ │ +│ ~/Documents/Github/agent-diff/ops │ │ +╰──────────────────────────────────────────────────────────────────────────────╯ + +❯ /clear + ⎿  (no content) + +❯ /login + ⎿ Login successful + +❯ Following the instructions in AGENTS.md, add a new GitHub service to the + benchmark using the OpenAPI specification at ./api.github.com. Cover issues, + pull requests, comments, and labels. Produce a seed script, example bench + suite, and pytest integration tests. The docker deamon is already running in + the background. + + Read 1 file (ctrl+o to expand) + + Read 6 files, listed 3 directories (ctrl+o to expand) + + Listed 1 directory (ctrl+o to expand) + +⏺ Bash(python3 -c " + import json…) + ⎿  /repos/{owner}/{repo}/issues ['get', 'post'] + /repos/{owner}/{repo}/issues/comments ['get'] + /repos/{owner}/{repo}/issues/comments/{comment_id} ['get', 'patch', 'd + … +44 lines (ctrl+o to expand) +⏺ Bash(ls /Users/hubertpysklo/Documents/Github/agent-diff/examples/slack/testsui + tes/slack_docs/ 2>/dev/null; cat + /Users/hubertpysklo/Documents/Github/agent-diff/exampl…) + ⎿  slack_api_full_docs.json + { + "id": "slack-bench", + … +34 lines (ctrl+o to expand) + + Searched for 1 pattern, read 5 files, listed 1 directory (ctrl+o to expand) + +⏺ Starting implementation. Creating the service directory structure and schema. + +⏺ Bash(mkdir -p /Users/hubertpysklo/Documents/Github/agent-diff/backend/src/serv + ices/github/database + /Users/hubertpysklo/Documents/Github/agent-diff/backend/src/servic…) + ⎿  Done + +⏺ Write(~/Documents/Github/agent-diff/backend/src/services/github/__init__.py) + ⎿  Wrote 5 lines to ../backend/src/services/github/__init__.py + 1 """GitHub API replica for agent-diff benchmarks. + 2 + 3 Covers issues, pull requests, comments, and labels — enough to ev + aluate + 4 agents that triage issues, land PRs, and manage repo labels. + 5 """ + +⏺ Write(~/Documents/Github/agent-diff/backend/src/services/github/database/__ini + t__.py) + ⎿  Wrote 1 lines to ../backend/src/services/github/database/__init__.py + 1 (No content) + +⏺ Write(~/Documents/Github/agent-diff/backend/src/services/github/database/base. + py) + ⎿  Wrote 5 lines to ../backend/src/services/github/database/base.py + 1 from sqlalchemy.orm import DeclarativeBase + 2 + 3 + 4 class Base(DeclarativeBase): + 5 pass + +⏺ Write(~/Documents/Github/agent-diff/backend/src/services/github/api/__init__.p + y) + ⎿  Wrote 1 lines to ../backend/src/services/github/api/__init__.py + 1 (No content) + +⏺ Write(~/Documents/Github/agent-diff/backend/src/services/github/utils/__init__ + .py) + ⎿  Wrote 1 lines to ../backend/src/services/github/utils/__init__.py + 1 (No content) + +⏺ Now the schema. I'll use a unified issues table (with is_pull_request + discriminator) that mirrors GitHub's own data model — issues and PRs share a + number space in a repo, so this keeps comments/labels routing natural. + +⏺ Write(~/Documents/Github/agent-diff/backend/src/services/github/database/schem + a.py) + ⎿  Wrote 361 lines to ../backend/src/services/github/database/schema.py + 2 """GitHub API replica schema. + 3 Follows GitHub's real data model: issues and pull requests live + 4 number space per repo. A single ``github_issues`` table stores b + oth, with + able.pull_request`` distinguishing them. PR-only fields are null + 6 + labels, surface needed for the bench is modeled: issues, pulls, + 8 assignees, requested reviewers, and issue/PR comments (GitHub ca + 9 "issue comments" even when attached to a PR). + 10 """ + … +410 lines (ctrl+o to expand) + +⏺ Now the operations layer and routes. + +⏺ Write(~/Documents/Github/agent-diff/backend/src/services/github/database/opera + tions.py) + ⎿  Wrote 374 lines to + ../backend/src/services/github/database/operations.py + 2 """CRUD helpers for the GitHub replica. + 3 Each function takes a SQLAlchemy session and returns ORM objects + . Route + 4 handlers are responsible for formatting responses. + 5 """ + 6 + 7 from __future__ import annotations + 8 + 9 import random + 10 from datetime import datetime + … +386 lines (ctrl+o to expand) + +⏺ Now the routes. + +⏺ Write(~/Documents/Github/agent-diff/backend/src/services/github/api/routes.py) + ⎿  Wrote 821 lines to ../backend/src/services/github/api/routes.py + 2 """GitHub REST routes. + 3 Mounted at ``/api/env/{env_id}/services/github``. Paths mirror + 4 ``https://api.github.com`` so existing GitHub clients work again + st the + 5 replica by swapping the base URL. + 6 """ + 7 + 8 from __future__ import annotations + 9 + 10 import json + … +889 lines (ctrl+o to expand) + +⏺ Update(~/Documents/Github/agent-diff/backend/src/services/github/api/routes.py + ) + ⎿  Removed 4 lines + 302 ) + 303 if "assignees" in body: + 304 assignees = _resolve_users(session, list(body.get("ass + ignees") or [])) + 305 - session.query(type(issue.assignees[0]) if issue.assign + -ees else object).filter( + 306 - False + 307 - ) # no-op to keep types calm + 308 - # Full replacement per GitHub semantics for PATCH issu + -es. + 305 for ia in list(issue.assignees): + 306 session.delete(ia) + 307 session.flush() +⏺ Now the seed data and seed script. + +⏺ Write(~/Documents/Github/agent-diff/examples/github/seeds/github_default.json) + ⎿  Wrote 202 lines to ../examples/github/seeds/github_default.json + 1 { + 2 "github_users": [ + 3 { + 4 "id": "10000001", + 5 "login": "agent-bot", + 6 "name": "Agent Bot", + 7 "email": "agent@example.com", + 8 "type": "Bot", + 9 "site_admin": false, + 10 "created_at": "2024-01-10T09:00:00" + … +195 lines (ctrl+o to expand) + +⏺ Now the seed script. I'll mirror the slack pattern. + + Read 1 file (ctrl+o to expand) + +⏺ Write(~/Documents/Github/agent-diff/backend/utils/seed_github_template.py) + ⎿  Wrote 181 lines to ../backend/utils/seed_github_template.py + 2 """Seed script for GitHub template schemas. + 3 + 4 Creates: + 5 - ``github_base``: empty schema (tables only), handy as a clean- + slate template. + po containingfault``: pre-populated with a small ACME widgets re + 7 issues, pull requests, labels, assignees, and comments. + 8 + 9 Usage: + 10 python backend/utils/seed_github_template.py + … +185 lines (ctrl+o to expand) + +⏺ Now copy seed data to backend/seeds for Docker builds. + +⏺ Bash(cp /Users/hubertpysklo/Documents/Github/agent-diff/examples/github/seeds/ + github_default.json + /Users/hubertpysklo/Documents/Github/agent-diff/backend/seeds/githu…) + ⎿  Done + +⏺ Now the example test suite with state-diff assertions. + +⏺ Write(~/Documents/Github/agent-diff/examples/github/testsuites/github_bench.js + on) + ⎿  Wrote 214 lines to ../examples/github/testsuites/github_bench.json + 2 "id": "github-bench", + 3 "name": "GitHub Bench", + 4 "description": "Benchmark tests for GitHub issue, pull-request + , comment, and label operations.", + 5 "service": "github", + 7 "ignore_fields": {", + 8 "global": ["created_at", "updated_at", "closed_at", "merged_ + at"] + 9 }, + 10 "tests": [ + … +219 lines (ctrl+o to expand) + +⏺ Bash(cp /Users/hubertpysklo/Documents/Github/agent-diff/examples/github/testsu + ites/github_bench.json + /Users/hubertpysklo/Documents/Github/agent-diff/backend/seeds/te…) + ⎿  Done + +⏺ Wiring the service into main.py and docker-compose.yml. + +⏺ Update(~/Documents/Github/agent-diff/backend/src/platform/api/main.py) + ⎿  Added 1 line + 23 from src.services.slack.api.methods import routes as slack_rout + es + 24 from src.services.calendar.api import routes as calendar_routes + 25 from src.services.box.api.routes import routes as box_routes + 26 +from src.services.github.api.routes import routes as github_rou + +tes + 27 from src.platform.logging_config import setup_logging + 28 from src.platform.isolationEngine.pool import PoolManager + 29 from src.platform.db.schema import TemplateEnvironment + +⏺ Update(~/Documents/Github/agent-diff/backend/src/platform/api/main.py) + ⎿  Added 3 lines + 127 box_router = Router(box_routes) + 128 app.mount("/api/env/{env_id}/services/box/2.0", box_router + ) + 129 + 130 + github_router = Router(github_routes) + 131 + app.mount("/api/env/{env_id}/services/github", github_rout + +er) + 132 + + 133 linear_schema_path = "src/services/linear/api/schema/Linea + r-API.graphql" + 134 linear_type_defs = load_schema_from_path(linear_schema_pat + h) + 135 linear_schema = make_executable_schema(linear_type_defs, * + bindables) + +⏺ Update(docker-compose.yml) + ⎿  Added 2 lines + 45 python utils/seed_linear_template.py && + 46 python utils/seed_box_template.py && + 47 python utils/seed_calendar_template.py && + 48 + python utils/seed_github_template.py && + 49 python utils/seed_tests.py; + 50 else + 51 echo '=== Skipping seed (use SEED=true to enable) === + '; + ... + 79 python utils/seed_linear_template.py && + 80 python utils/seed_box_template.py && + 81 python utils/seed_calendar_template.py && + 82 + python utils/seed_github_template.py && + 83 python utils/seed_tests.py; + 84 else + 85 echo '=== Skipping seed (use SEED=true to enable) === + '; + +⏺ Now the pytest integration tests. + +⏺ Write(~/Documents/Github/agent-diff/backend/tests/integration/test_github_api. + py) + ⎿  Wrote 357 lines to ../backend/tests/integration/test_github_api.py + 2 """Integration tests for the GitHub API replica. + 3 These exercise the routes over an ASGI transport against a fresh + ly cloned + 4 ``github_default`` schema. Each test gets its own environment so + that + 6 """ations don't bleed across cases. + 7 + 8 from __future__ import annotations + 9 + 10 import pytest + … +390 lines (ctrl+o to expand) +⏺ Now verify the code works end-to-end. First, spin up Postgres via + docker-compose and run the seed + tests. + +⏺ Bash(ls backend/.env 2>/dev/null; cat backend/.env 2>/dev/null | head -5; + docker ps --format '{{.Names}} {{.Ports}}' 2>&1 | head) + ⎿  (No output) + +⏺ Bash(ls backend/.env 2>&1; docker ps 2>&1 | head -10) + ⎿  ls: backend/.env: No such file or directory + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + +⏺ Bash(cd /Users/hubertpysklo/Documents/Github/agent-diff/ops && docker compose + up -d postgres 2>&1 | tail -20) + ⎿   b2a0d297a925 Extracting 18.92kB + b2a0d297a925 Pull complete 0B + 659fe7415ce8 Extracting 128B + … +17 lines (ctrl+o to expand) + +⏺ Bash(until docker exec ops-postgres-1 pg_isready -U postgres >/dev/null 2>&1; + do sleep 2; done && echo "postgres ready") + ⎿  postgres ready + +⏺ Bash(cd /Users/hubertpysklo/Documents/Github/agent-diff/backend && ls + requirements*.txt 2>/dev/null; python3 --version) + ⎿  (eval):1: no matches found: requirements*.txt + Python 3.9.6 + + Read 1 file, listed 1 directory (ctrl+o to expand) + +⏺ I'll run the backend container which will run migrations and seeds, then exec + pytest inside it. + +⏺ Bash(cd /Users/hubertpysklo/Documents/Github/agent-diff/ops && SEED=true + docker compose up --build -d backend 2>&1 | tail -30) + ⎿  #11 1.996 + sqlalchemy==2.0.49 + #11 1.996 + starlette==1.0.0 + #11 1.996 + typing-extensions==4.15.0 + … +27 lines (ctrl+o to expand) + +⏺ Bash(sleep 8 && docker compose logs backend 2>&1 | tail -80) + ⎿  backend-1 | ~~~~~~~~~~~~~~~~~~~~~~~^ + backend-1 | cursor, str_statement, effective_parameters, cont + ext + … +69 lines (ctrl+o to expand) + +⏺ default is a PostgreSQL reserved word. I'll rename the column to is_default + while keeping the API field as default. + +⏺ Update(~/Documents/Github/agent-diff/backend/src/services/github/database/sche + ma.py) + ⎿  Added 2 lines, removed 2 lines + 103 name: Mapped[str] = mapped_column(String(128)) + 104 color: Mapped[str] = mapped_column(String(8), default="ede + ded") + 105 description: Mapped[Optional[str]] = mapped_column(Text) + 106 - default: Mapped[bool] = mapped_column(Boolean, default=Fal + -se) + 106 + is_default: Mapped[bool] = mapped_column(Boolean, default= + +False) + 107 + 108 repository: Mapped["Repository"] = relationship( + 109 "Repository", back_populates="labels" + ... + 116 "name": self.name, + 117 "color": self.color, + 118 "description": self.description, + 119 - "default": self.default, + 119 + "default": self.is_default, + 120 "url": ( + 121 f"https://api.github.com/repos/{self.repositor + y.full_name}/labels/{self.name}" + 122 if self.repository + +⏺ Update(~/Documents/Github/agent-diff/examples/github/seeds/github_default.json + ) + ⎿  Added 4 lines, removed 4 lines + 56 "name": "bug", + 57 "color": "d73a4a", + 58 "description": "Something isn't working", + 59 - "default": true + 59 + "is_default": true + 60 }, + 61 { + 62 "id": "30000002", + ... + 64 "name": "enhancement", + 65 "color": "a2eeef", + 66 "description": "New feature or request", + 67 - "default": true + 67 + "is_default": true + 68 }, + 69 { + 70 "id": "30000003", + ... + 72 "name": "documentation", + 73 "color": "0075ca", + 74 "description": "Improvements or additions to documentatio + n", + 75 - "default": true + 75 + "is_default": true + 76 }, + 77 { + 78 "id": "30000004", + ... + 80 "name": "good first issue", + 81 "color": "7057ff", + 82 "description": "Good for newcomers", + 83 - "default": true + 83 + "is_default": true + 84 }, + 85 { + 86 "id": "30000005", + +⏺ Update(~/Documents/Github/agent-diff/examples/github/seeds/github_default.json + ) + ⎿  Added 1 line, removed 1 line + 88 "name": "needs-triage", + 89 "color": "ededed", + 90 "description": "Awaiting maintainer triage", + 91 - "default": false + 91 + "is_default": false + 92 } + 93 ], + 94 "github_issues": [ + +⏺ Bash(cp /Users/hubertpysklo/Documents/Github/agent-diff/examples/github/seeds/ + github_default.json + /Users/hubertpysklo/Documents/Github/agent-diff/backend/seeds/githu…) + ⎿  Done + +⏺ Bash(docker compose restart backend 2>&1 | tail -5 && sleep 10 && docker + compose logs backend 2>&1 | tail -40) + ⎿  time="2026-04-18T00:55:26+05:30" level=warning msg="The \"DATABASE_URL + \" variable is not set. Defaulting to a blank string." + Container ops-backend-1 Restarting + … +40 lines (ctrl+o to expand) + +⏺ Bash(docker compose logs backend 2>&1 | grep -E + "(Creating|All|template|seed|Error|error)" | tail -60) + ⎿  backend-1 | === Creating github_base === + backend-1 | Empty template github_base ready + backend-1 | Registered public template: github_base + … +46 lines (ctrl+o to expand) + +⏺ Seeds succeeded. The on_event error is pre-existing (Starlette 1.0 removed it) + — unrelated to my changes. My integration tests use ASGITransport directly, + not uvicorn, so they can bypass it. + +⏺ Bash(docker compose exec -T backend sh -c "cd /app && python -m pytest + tests/integration/test_github_api.py -v 2>&1" 2>&1 | tail -80) + ⎿  time="2026-04-18T00:55:53+05:30" level=warning msg="The \"DATABASE_URL + \" variable is not set. Defaulting to a blank string." + service "backend" is not running + +⏺ The backend container crashes on startup due to the pre-existing Starlette + bug. Let me run tests via a one-shot exec with the same image. + +⏺ Bash(docker compose run --rm --entrypoint /bin/sh -e DATABASE_URL="postgresql: + //postgres:postgres@postgres:5432/diff_the_universe" backend -c "cd /app + && python -m p…) + ⎿   app = _app_for(session_manager, env.environment_id, AGENT_USER_ID) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + tests/integration/test_github_api.py:36: in _app_for + … +101 lines (ctrl+o to expand) +⏺ Starlette 1.0 removed app.middleware(...). I'll use BaseHTTPMiddleware via + add_middleware instead. + +⏺ Update(~/Documents/Github/agent-diff/backend/tests/integration/test_github_api + .py) + ⎿  Added 2 lines + 11 import pytest_asyncio + 12 from httpx import ASGITransport, AsyncClient + 13 from starlette.applications import Starlette + 14 +from starlette.middleware import Middleware + 15 +from starlette.middleware.base import BaseHTTPMiddleware + 16 + 17 from src.services.github.api.routes import routes as github_rou + tes + 18 + +⏺ Update(~/Documents/Github/agent-diff/backend/tests/integration/test_github_api + .py) + ⎿  Added 2 lines, removed 3 lines + 34 request.state.impersonate_email = None + 35 return await call_next(request) + 36 + 37 - app = Starlette(routes=github_routes) + 38 - app.middleware("http")(add_db_session) + 39 - return app + 37 + middleware = [Middleware(BaseHTTPMiddleware, dispatch=add_d + +b_session)] + 38 + return Starlette(routes=github_routes, middleware=middlewar + +e) + 39 + 40 + 41 @pytest_asyncio.fixture + +⏺ Bash(docker compose run --rm --entrypoint /bin/sh -e DATABASE_URL="postgresql: + //postgres:postgres@postgres:5432/diff_the_universe" backend -c "cd /app + && python -m p…) + ⎿  ============================= test session starts ==================== + ========== + collecting ... collected 29 items + … +84 lines (ctrl+o to expand) + +⏺ The relationship cache isn't being refreshed after bulk adds. Let me fix. + +⏺ Update(~/Documents/Github/agent-diff/backend/src/services/github/database/oper + ations.py) + ⎿  Added 2 lines + 231 added.append(u) + 232 existing.add(u.id) + 233 session.flush() + 234 + session.refresh(issue) + 235 return added + 236 + 237 + ... + 243 for ia in removed: + 244 session.delete(ia) + 245 session.flush() + 246 + session.refresh(issue) + 247 return [ia.user for ia in removed if ia.user] + 248 + 249 + +⏺ Update(~/Documents/Github/agent-diff/backend/src/services/github/database/oper + ations.py) + ⎿  Added 2 lines + 344 added.append(u) + 345 existing.add(u.id) + 346 session.flush() + 347 + session.refresh(pr) + 348 return added + 349 + 350 + ... + 357 PullRequestReviewer.user_id.in_(ids), + 358 ).delete(synchronize_session=False) + 359 session.flush() + 360 + session.refresh(pr) + 361 + 362 + 363 def merge_pull( + +⏺ Bash(docker compose run --rm --entrypoint /bin/sh -e DATABASE_URL="postgresql: + //postgres:postgres@postgres:5432/diff_the_universe" backend -c "cd /app + && python -m p…) + ⎿   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + /usr/local/lib/python3.14/site-packages/sqlalchemy/orm/strategies.py:9 + 31: in _load_for_state + … +45 lines (ctrl+o to expand) + +⏺ Update(~/Documents/Github/agent-diff/backend/src/services/github/database/oper + ations.py) + ⎿  Added 4 lines, removed 3 lines + 239 session: Session, issue: Issue, users: Iterable[User] + 240 ) -> List[User]: + 241 ids = {u.id for u in users} + 242 - removed = [ia for ia in issue.assignees if ia.user_id in i + -ds] + 243 - for ia in removed: + 242 + to_remove = [ia for ia in issue.assignees if ia.user_id in + + ids] + 243 + removed_users = [ia.user for ia in to_remove if ia.user] + 244 + for ia in to_remove: + 245 session.delete(ia) + 246 session.flush() + 247 session.refresh(issue) + 247 - return [ia.user for ia in removed if ia.user] + 248 + return removed_users + 249 + 250 + 251 def set_issue_labels( + +⏺ Bash(docker compose run --rm --entrypoint /bin/sh -e DATABASE_URL="postgresql: + //postgres:postgres@postgres:5432/diff_the_universe" backend -c "cd /app + && python -m p…) + ⎿  tests/integration/test_github_api.py::TestIssues::test_list_open_issue + s PASSED [ 24%] + tests/integration/test_github_api.py::TestIssues::test_list_all_states + … +49 lines (ctrl+o to expand) + +⏺ All 29 tests pass. Cleaning up the container. + +⏺ Bash(docker compose stop 2>&1 | tail -5) + ⎿  time="2026-04-18T00:57:20+05:30" level=warning msg="The \"DATABASE_URL + \" variable is not set. Defaulting to a blank string." + Container ops-backend-1 Stopping + … +3 lines (ctrl+o to expand) + +⏺ All 29 integration tests pass. Summary of what was added: + + Service code (backend/src/services/github/) + - database/base.py, database/schema.py — ORM models: User, Repository, Label, + Issue (unified issue/PR row via is_pull_request), IssueLabel, IssueAssignee, + PullRequestReviewer, IssueComment. + - database/operations.py — session-taking CRUD helpers. + - api/routes.py — ~30 Starlette routes covering /repos/{owner}/{repo}/issues, + /pulls, /issues/{n}/comments, /issues/comments/{id}, /labels, assignee and + requested-reviewer management, and PR merge. + + Seed (backend/utils/seed_github_template.py, + examples/github/seeds/github_default.json, mirrored to backend/seeds/github/) + - Creates github_base and github_default templates; default seeds an + acme/widgets repo with 4 users, 5 labels, 3 issues, 2 PRs, an assignee, a + requested reviewer, and a comment. + + Bench suite (examples/github/testsuites/github_bench.json, mirrored to + backend/seeds/testsuites/) + - 10 tests with state-diff assertions: create/close/triage/assign/comment + issues, edit comments, create labels, open/review/merge PRs. + + Wiring + - backend/src/platform/api/main.py mounts the service at + /api/env/{env_id}/services/github. + - ops/docker-compose.yml runs seed_github_template.py alongside the other + seeders. + + Tests (backend/tests/integration/test_github_api.py) + - 29 tests across TestRepository, TestLabels, TestIssues, TestIssueLabels, + TestIssueAssignees, TestIssueComments, TestPullRequests — all passing against + a freshly cloned github_default schema. + +✻ Cogitated for 12m 20s \ No newline at end of file diff --git a/ops/docker-compose.yml b/ops/docker-compose.yml index 5ae86052..a2de50ac 100644 --- a/ops/docker-compose.yml +++ b/ops/docker-compose.yml @@ -45,6 +45,7 @@ services: python utils/seed_linear_template.py && python utils/seed_box_template.py && python utils/seed_calendar_template.py && + python utils/seed_github_template.py && python utils/seed_tests.py; else echo '=== Skipping seed (use SEED=true to enable) ==='; @@ -78,6 +79,7 @@ services: python utils/seed_linear_template.py && python utils/seed_box_template.py && python utils/seed_calendar_template.py && + python utils/seed_github_template.py && python utils/seed_tests.py; else echo '=== Skipping seed (use SEED=true to enable) ===';