Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion codespy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ signatures:
# ============================================================================
output_format: markdown # OUTPUT_FORMAT (markdown | json)
output_stdout: true # OUTPUT_STDOUT (enable stdout output)
output_git: true # OUTPUT_GIT (post review to GitHub PR or GitLab MR)
output_git: false # OUTPUT_GIT (post review to GitHub PR or GitLab MR)
cache_dir: ~/.cache/codespy # CACHE_DIR

# ============================================================================
Expand Down
60 changes: 40 additions & 20 deletions src/codespy/agents/reviewer/modules/scope_identifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,18 @@ class ScopeIdentifierSignature(dspy.Signature):
→ Candidate scope: company/backend/services/user-api
4. Group files by their longest common directory prefix that contains a scope indicator

STEP 2 - CLONE THE REPOSITORY:
Clone using clone_repository tool:
1. Use the repo_owner, repo_name, and head_sha provided in the inputs
2. Clone to the target_repo_path provided
3. Derive sparse_paths from candidate scopes identified in STEP 1:
- Include each candidate scope directory
- Example: ["mono/svc/my-service-v1/", "libs/common/"]
4. Use depth=1 for fastest clone (single commit)
STEP 2 - ACCESS THE REPOSITORY:
- If is_local is True:
The repository is ALREADY available at target_repo_path. Do NOT clone.
Skip directly to STEP 3 and use filesystem tools to explore the repo.
- If is_local is False:
Clone using clone_repository tool:
1. Use the repo_owner, repo_name, and head_sha provided in the inputs
2. Clone to the target_repo_path provided
3. Derive sparse_paths from candidate scopes identified in STEP 1:
- Include each candidate scope directory
- Example: ["mono/svc/my-service-v1/", "libs/common/"]
4. Use depth=1 for fastest clone (single commit)

STEP 3 - VERIFY SCOPES WITH PACKAGE MANIFESTS:
For each candidate scope from STEP 1:
Expand Down Expand Up @@ -141,14 +145,17 @@ class ScopeIdentifierSignature(dspy.Signature):
changed_files: list[str] = dspy.InputField(
desc="List of changed file paths from the MR. Use these to derive sparse_paths for efficient cloning."
)
repo_owner: str = dspy.InputField(desc="Repository owner/namespace (e.g., 'facebook' or 'group/subgroup')")
repo_name: str = dspy.InputField(desc="Repository name (e.g., 'react')")
head_sha: str = dspy.InputField(desc="Git commit SHA to checkout")
repo_owner: str = dspy.InputField(desc="Repository owner/namespace (e.g., 'facebook' or 'group/subgroup'). Used for cloning (remote only).")
repo_name: str = dspy.InputField(desc="Repository name (e.g., 'react'). Used for cloning (remote only).")
head_sha: str = dspy.InputField(desc="Git commit SHA to checkout. Used for cloning (remote only).")
target_repo_path: str = dspy.InputField(
desc="Absolute path where repository should be cloned. Clone here before exploring."
desc="Absolute path to the repository. For remote reviews, clone here. For local reviews, the repo is already here."
)
mr_title: str = dspy.InputField(desc="MR title for additional context")
mr_description: str = dspy.InputField(desc="MR description for additional context")
is_local: bool = dspy.InputField(
desc="Whether the repository is already available locally at target_repo_path. If True, skip cloning and go directly to filesystem exploration."
)

scopes: list[ScopeAssignment] = dspy.OutputField(
desc="Identified scopes. Every changed file must appear in exactly one scope. Use concise reasons (<2 sentences)."
Expand All @@ -168,8 +175,13 @@ def __init__(self) -> None:
self._cost_tracker = get_cost_tracker()
self._settings = get_settings()

async def _create_mcp_tools(self, repo_path: Path) -> tuple[list[Any], list[Any]]:
"""Create DSPy tools from MCP servers."""
async def _create_mcp_tools(self, repo_path: Path, is_local: bool = False) -> tuple[list[Any], list[Any]]:
"""Create DSPy tools from MCP servers.

Args:
repo_path: Path to the repository root
is_local: If True, skip git server (repo already on disk, no cloning needed)
"""
tools: list[Any] = []
contexts: list[Any] = []
tools_dir = Path(__file__).parent.parent.parent.parent / "tools"
Expand All @@ -178,11 +190,18 @@ async def _create_mcp_tools(self, repo_path: Path) -> tuple[list[Any], list[Any]
tools.extend(await connect_mcp_server(tools_dir / "filesystem" / "server.py", [repo_path_str], contexts, caller))
tools.extend(await connect_mcp_server(tools_dir / "parsers" / "ripgrep" / "server.py", [repo_path_str], contexts, caller))
tools.extend(await connect_mcp_server(tools_dir / "parsers" / "treesitter" / "server.py", [repo_path_str], contexts, caller))
tools.extend(await connect_mcp_server(tools_dir / "git" / "server.py", [], contexts, caller))
if not is_local:
tools.extend(await connect_mcp_server(tools_dir / "git" / "server.py", [], contexts, caller))
return tools, contexts

async def aforward(self, mr: MergeRequest, repo_path: Path) -> list[ScopeResult]:
"""Identify scopes in the repository for the given MR."""
async def aforward(self, mr: MergeRequest, repo_path: Path, is_local: bool = False) -> list[ScopeResult]:
"""Identify scopes in the repository for the given MR.

Args:
mr: The merge request to analyze
repo_path: Path to the repository root
is_local: If True, repo is already on disk (skip cloning)
"""
# Get excluded directories from settings
excluded_dirs = self._settings.excluded_directories

Expand Down Expand Up @@ -211,7 +230,7 @@ async def aforward(self, mr: MergeRequest, repo_path: Path) -> list[ScopeResult]
changed_files=reviewable_files,
reason="Scope identification disabled - fallback to single scope",
)]
tools, contexts = await self._create_mcp_tools(repo_path)
tools, contexts = await self._create_mcp_tools(repo_path, is_local=is_local)
changed_file_paths = [f.filename for f in reviewable_files]
# Build map from filename to ChangedFile for post-processing
changed_files_map: dict[str, ChangedFile] = {f.filename: f for f in reviewable_files}
Expand All @@ -238,6 +257,7 @@ async def aforward(self, mr: MergeRequest, repo_path: Path) -> list[ScopeResult]
target_repo_path=str(repo_path),
mr_title=mr.title or "No title",
mr_description=mr.body or "No description",
is_local=is_local,
)
scope_assignments: list[ScopeAssignment] = result.scopes
# Ensure we got valid scopes
Expand Down Expand Up @@ -301,6 +321,6 @@ def _convert_assignments_to_results(
))
return results

def forward(self, mr: MergeRequest, repo_path: Path) -> list[ScopeResult]:
def forward(self, mr: MergeRequest, repo_path: Path, is_local: bool = False) -> list[ScopeResult]:
"""Identify scopes (sync wrapper)."""
return asyncio.run(self.aforward(mr, repo_path))
return asyncio.run(self.aforward(mr, repo_path, is_local=is_local))
3 changes: 2 additions & 1 deletion src/codespy/agents/reviewer/reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,9 @@ def forward(self, config: ReviewConfig) -> ReviewResult:
raise ValueError(f"Invalid config type: {type(config)}")

# Identify scopes (the module internally checks if signature is enabled)
is_local = isinstance(config, LocalReviewConfig)
logger.info("Identifying code scopes...")
scopes = self.scope_identifier(mr, repo_path)
scopes = self.scope_identifier(mr, repo_path, is_local=is_local)
for scope in scopes:
logger.info(f" Scope: {scope.subroot} ({scope.scope_type.value}) - {len(scope.changed_files)} files")
if scope.package_manifest:
Expand Down