Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 47 additions & 6 deletions backend/app/main.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import asyncio
import os
import re
import tempfile
Expand Down Expand Up @@ -67,17 +68,44 @@ def _scan_repo_dir(repo_dir: Path):
return semgrep, osv, gitleaks, findings


def github_zip_url(repo_url: str, ref: str = "main") -> str:
def github_zip_url(repo_url: str, ref: str = "main"):
repo_url = repo_url.strip()
m = re.match(
r"^https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?/?$", repo_url, re.IGNORECASE
)
if not m:
raise HTTPException(
status_code=400, detail="Only GitHub repo URLs are supported right now."
raise ValueError(
"Invalid GitHub URL format. Expected: https://github.com/owner/repo"
)

owner, repo = m.group(1), m.group(2)
return f"https://github.com/{owner}/{repo}/archive/refs/heads/{ref}.zip"
return (
f"https://github.com/{owner}/{repo}/archive/refs/heads/{ref}.zip",
owner,
repo,
)


async def check_repo_reachable(owner: str, repo: str) -> None:
url = f"https://github.com/{owner}/{repo}"
try:
async with httpx.AsyncClient(timeout=httpx.Timeout(5.0)) as client:
r = await client.head(url, follow_redirects=True)
if r.status_code == 404:
raise HTTPException(
status_code=422,
detail="Repository not found or is private. Check the URL and try again.",
)
if r.status_code != 200:
raise HTTPException(
status_code=422,
detail=f"Repository not reachable (HTTP {r.status_code}).",
)
except httpx.TimeoutException:
raise HTTPException(
status_code=422,
detail="Could not reach GitHub — check your network connection.",
)


async def download_to_path(url: str, dest_path: Path) -> None:
Expand Down Expand Up @@ -163,11 +191,24 @@ async def scan_url(
repo_dir = job_dir / "repo"
ensure_dir(repo_dir)

zip_url = github_zip_url(repo_url, ref=ref)
try:
zip_url, owner, repo = github_zip_url(repo_url, ref=ref)
except ValueError as e:
safe_rmtree(job_dir)
raise HTTPException(status_code=422, detail=str(e))

try:
await check_repo_reachable(owner, repo)
except HTTPException:
safe_rmtree(job_dir)
raise

try:
await download_to_path(zip_url, archive_path)
await asyncio.wait_for(download_to_path(zip_url, archive_path), timeout=30.0)
unzip_to_dir(archive_path, repo_dir)
except asyncio.TimeoutError:
safe_rmtree(job_dir)
raise HTTPException(status_code=504, detail="Repository clone timed out.")
except HTTPException:
safe_rmtree(job_dir)
raise
Expand Down
3 changes: 2 additions & 1 deletion frontend/src/app/lib/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ export async function scanRepoUrl(
});

if (!res.ok) {
throw new Error(await res.text());
const err = await res.json().catch(() => null);
throw new Error(err?.detail ?? "Import from URL failed");
}

return (await res.json()) as ScanResponse;
Expand Down
Loading