diff --git a/src/dispatch.ts b/src/dispatch.ts index f2c7da0..5b4dd72 100644 --- a/src/dispatch.ts +++ b/src/dispatch.ts @@ -9,9 +9,29 @@ import { applyBridgeToProcessEnv, buildBridgeValues, syncGradleProperties } from import { startRails, type RailsHandle } from "./rails-lifecycle.js"; import { isStub } from "./stub.js"; import { trace } from "./trace.js"; +import { buildRunReport, writeReport, type ReportFormat, type ReportPaths } from "./report/collect.js"; +import { readPackageVersion } from "./version.js"; +import type { RunReport } from "./report/model.js"; import type { JudgeResult } from "./agents/types.js"; -export async function dispatch(spec: string): Promise { +export type DispatchReportOptions = { + enabled?: boolean; + format?: ReportFormat; + embed?: boolean; + dir?: string; +}; + +export type DispatchOptions = { + report?: DispatchReportOptions; +}; + +export type DispatchResult = JudgeResult & { + report: RunReport; + reportPaths: ReportPaths; +}; + +export async function dispatch(spec: string, options: DispatchOptions = {}): Promise { + const startedAt = Date.now(); const domain = await runPlanner(spec); // Mirror the substrate's NATIVEAPPTEMPLATE_API_* config to the @@ -98,8 +118,9 @@ export async function dispatch(spec: string): Promise { trace("dispatch", `rails-lifecycle: live at ${railsServer.url} for Stage 2`); } + let judge: JudgeResult; try { - return await runJudge({ + judge = await runJudge({ domain, rails, ios, @@ -115,4 +136,33 @@ export async function dispatch(spec: string): Promise { }); } } + + const report = buildRunReport({ + spec, + domain, + judge, + reviewer, + agentVersion: readPackageVersion(), + judgeModel: "claude-opus-4-7", + visualLevel: visualLevel as 0 | 1 | 2, + startedAt, + finishedAt: Date.now(), + }); + + // Default off in stub mode so the test suite never writes into ./out. + const reportOpts = options.report ?? {}; + const reportEnabled = reportOpts.enabled ?? !isStub("dispatch"); + let reportPaths: ReportPaths = {}; + if (reportEnabled) { + const dir = reportOpts.dir ?? resolve(process.cwd(), "out", domain.slug); + reportPaths = await writeReport(report, { + dir, + ...(reportOpts.format !== undefined ? { format: reportOpts.format } : {}), + ...(reportOpts.embed !== undefined ? { embed: reportOpts.embed } : {}), + }); + const written = Object.values(reportPaths).filter(Boolean); + if (written.length > 0) trace("dispatch", `report: wrote ${written.join(", ")}`); + } + + return { ...judge, report, reportPaths }; } diff --git a/src/index.ts b/src/index.ts index ac05942..71c2cc2 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,15 +1,40 @@ #!/usr/bin/env node import { realpathSync } from "node:fs"; import { fileURLToPath } from "node:url"; -import { dispatch } from "./dispatch.js"; +import { spawn } from "node:child_process"; +import { dispatch, type DispatchReportOptions } from "./dispatch.js"; import { loadDotenvIfPresent } from "./env.js"; loadDotenvIfPresent(); +type ParsedArgs = { spec: string; report: DispatchReportOptions; open: boolean }; + +function parseArgs(argv: readonly string[]): ParsedArgs { + const specParts: string[] = []; + const report: DispatchReportOptions = {}; + let open = false; + for (const arg of argv) { + if (arg === "--no-report") report.enabled = false; + else if (arg === "--report-open") open = true; + else if (arg.startsWith("--report-format=")) { + const value = arg.slice("--report-format=".length); + if (value === "html" || value === "json" || value === "both") report.format = value; + } else if (arg.startsWith("--report-embed=")) { + report.embed = arg.slice("--report-embed=".length) !== "false"; + } else { + specParts.push(arg); + } + } + return { spec: specParts.join(" ").trim(), report, open }; +} + export async function main(spec?: string): Promise { - const input = spec ?? process.argv.slice(2).join(" ").trim(); + const parsed = parseArgs(process.argv.slice(2)); + const input = (spec ?? parsed.spec).trim(); if (!input) { - console.error('Usage: nativeapptemplate-agent "your spec here"'); + console.error( + 'Usage: nativeapptemplate-agent "your spec here" [--no-report] [--report-format=html|json|both] [--report-embed=true|false] [--report-open]', + ); process.exitCode = 1; return; } @@ -17,12 +42,20 @@ export async function main(spec?: string): Promise { console.log(`nativeapptemplate-agent: received spec: ${input}`); console.log('(tail tmp/trace/*.log in a tiled view via scripts/demo-tmux.sh)'); - const result = await dispatch(input); + const result = await dispatch(input, { report: parsed.report }); console.log(''); console.log('=== run complete ==='); console.log(`result: ${result.summary}`); console.log(`overall: ${result.overallPass ? 'PASS' : 'FAIL'}`); + if (result.reportPaths.htmlPath) { + console.log(`report: file://${result.reportPaths.htmlPath}`); + if (parsed.open && process.platform === 'darwin') { + spawn('open', [result.reportPaths.htmlPath], { stdio: 'ignore', detached: true }).unref(); + } + } else if (result.reportPaths.jsonPath) { + console.log(`report: ${result.reportPaths.jsonPath}`); + } } // Entry guard: run main() when this file is the program entry point. Resolve diff --git a/src/mcp.ts b/src/mcp.ts index 4c4f1d4..35c997d 100644 --- a/src/mcp.ts +++ b/src/mcp.ts @@ -1,12 +1,12 @@ #!/usr/bin/env node -import { realpathSync, readFileSync } from "node:fs"; +import { realpathSync } from "node:fs"; import { fileURLToPath } from "node:url"; -import { dirname, resolve } from "node:path"; import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; import { dispatch } from "./dispatch.js"; import { loadDotenvIfPresent } from "./env.js"; +import { readPackageVersion } from "./version.js"; // MCP surface (MONETIZATION.md §"MCP as a distribution surface"): // thin wrapper around dispatch() so any MCP-compatible AI assistant @@ -44,6 +44,9 @@ export function createMcpServer(): McpServer { overallPass: result.overallPass, summary: result.summary, ...(result.visual ? { visual: result.visual } : {}), + report: result.report, + ...(result.reportPaths.htmlPath ? { reportHtmlPath: result.reportPaths.htmlPath } : {}), + ...(result.reportPaths.jsonPath ? { reportJsonPath: result.reportPaths.jsonPath } : {}), }, isError: !result.overallPass, }; @@ -60,16 +63,6 @@ export async function main(): Promise { await server.connect(transport); } -function readPackageVersion(): string { - try { - const here = dirname(fileURLToPath(import.meta.url)); - const pkg = JSON.parse(readFileSync(resolve(here, "..", "package.json"), "utf8")); - return typeof pkg.version === "string" ? pkg.version : "0.0.0"; - } catch { - return "0.0.0"; - } -} - if (isEntryPoint()) { main().catch((err) => { console.error(err); diff --git a/src/report/collect.ts b/src/report/collect.ts new file mode 100644 index 0000000..5a2e6d1 --- /dev/null +++ b/src/report/collect.ts @@ -0,0 +1,146 @@ +import { copyFile, mkdir, readFile, writeFile } from "node:fs/promises"; +import { basename, isAbsolute, join, resolve } from "node:path"; +import type { DomainSpec, JudgeResult, ReviewerResult } from "../agents/types.js"; +import { renderReport } from "./render.js"; +import type { AssetMap, RunReport } from "./model.js"; + +export type ReportFormat = "html" | "json" | "both"; + +export type BuildRunReportInput = { + spec: string; + domain: DomainSpec; + judge: JudgeResult; + reviewer: ReviewerResult; + agentVersion: string; + judgeModel: string; + visualLevel: 0 | 1 | 2; + startedAt: number; + finishedAt: number; +}; + +// Pure assembly: fold the run's pieces into the single RunReport +// aggregate. No I/O — writeReport handles disk. +export function buildRunReport(input: BuildRunReportInput): RunReport { + return { + meta: { + spec: input.spec, + slug: input.domain.slug, + displayName: input.domain.displayName, + agentVersion: input.agentVersion, + judgeModel: input.judgeModel, + visualLevel: input.visualLevel, + startedAt: new Date(input.startedAt).toISOString(), + finishedAt: new Date(input.finishedAt).toISOString(), + durationMs: input.finishedAt - input.startedAt, + }, + overallPass: input.judge.overallPass, + summary: input.judge.summary, + platforms: input.judge.platforms ?? [], + reviewer: { + contractParity: input.reviewer.contractParity, + diffs: input.reviewer.diffs, + }, + domain: { + renamePlan: input.domain.renamePlan.map((r) => ({ from: r.from, to: r.to })), + entities: input.domain.entities.map((e) => ({ + name: e.name, + replaces: e.replaces, + fields: e.fields.map((f) => ({ + name: f.name, + type: f.type, + ...(f.references !== undefined ? { references: f.references } : {}), + })), + ...(e.states !== undefined ? { states: e.states } : {}), + })), + }, + }; +} + +export type WriteReportOptions = { + dir: string; + format?: ReportFormat; + embed?: boolean; +}; + +export type ReportPaths = { + jsonPath?: string; + htmlPath?: string; +}; + +export async function writeReport(report: RunReport, options: WriteReportOptions): Promise { + const format = options.format ?? "both"; + const embed = options.embed ?? true; + await mkdir(options.dir, { recursive: true }); + + const paths: ReportPaths = {}; + + if (format === "json" || format === "both") { + const jsonPath = join(options.dir, "report.json"); + await writeFile(jsonPath, `${JSON.stringify(report, null, 2)}\n`, "utf8"); + paths.jsonPath = jsonPath; + } + + if (format === "html" || format === "both") { + const assets = await resolveAssets(report, options.dir, embed); + const htmlPath = join(options.dir, "validation-report.html"); + await writeFile(htmlPath, renderReport(report, assets), "utf8"); + paths.htmlPath = htmlPath; + } + + return paths; +} + +// Every screenshot file path referenced anywhere in the report, +// de-duplicated and in stable order. +export function collectScreenshotPaths(report: RunReport): string[] { + const seen = new Set(); + const add = (p: string | undefined): void => { + if (p && !seen.has(p)) seen.add(p); + }; + for (const platform of report.platforms) { + const l3 = platform.layer3; + if (!l3) continue; + add(l3.screenshotPath); + const s2 = l3.stage2; + if (s2) { + add(s2.representativeScreenshot); + for (const s of s2.screenshots) add(s); + } + } + return [...seen]; +} + +// Turn screenshot file paths into render-ready values. With +// embed=true each PNG becomes a base64 data: URI (single self-contained +// file). With embed=false PNGs are copied to /report-assets/ and +// referenced by relative path. Unreadable paths are skipped — the +// renderer shows a placeholder for any path missing from the map. +async function resolveAssets(report: RunReport, dir: string, embed: boolean): Promise { + const paths = collectScreenshotPaths(report); + if (paths.length === 0) return {}; + + const map: AssetMap = {}; + const assetsDir = join(dir, "report-assets"); + if (!embed) await mkdir(assetsDir, { recursive: true }); + + await Promise.all( + paths.map(async (p) => { + const abs = isAbsolute(p) ? p : resolve(process.cwd(), p); + try { + if (embed) { + const buf = await readFile(abs); + map[p] = `data:image/png;base64,${buf.toString("base64")}`; + } else { + const name = basename(abs); + await copyFile(abs, join(assetsDir, name)); + map[p] = `report-assets/${name}`; + } + } catch { + // Leave unmapped — renderer renders a "screenshot unavailable" + // placeholder rather than a broken image. + } + }), + ); + + return map; +} diff --git a/src/report/model.ts b/src/report/model.ts new file mode 100644 index 0000000..6deff11 --- /dev/null +++ b/src/report/model.ts @@ -0,0 +1,56 @@ +import type { PlatformDetail } from "../agents/types.js"; + +// The single aggregate the validation report renders from. Assembled by +// dispatch (src/report/collect.ts#buildRunReport) and serialized to +// report.json; renderReport is a pure function of it. See +// docs/validation-report.md. +export type RunReport = { + meta: RunMeta; + overallPass: boolean; + summary: string; + platforms: readonly PlatformDetail[]; + reviewer: { + contractParity: "pass" | "fail"; + diffs: readonly string[]; + }; + domain: { + renamePlan: readonly { from: string; to: string }[]; + entities: readonly RunReportEntity[]; + }; + // Populated once the self-repair loop is wired (CLAUDE.md ≤5 cap). + // Rendered only when present. + repairAttempts?: readonly RepairAttempt[]; +}; + +export type RunMeta = { + spec: string; + slug: string; + displayName: string; + agentVersion: string; + judgeModel: string; + visualLevel: 0 | 1 | 2; + startedAt: string; + finishedAt: string; + durationMs: number; +}; + +export type RunReportEntity = { + name: string; + replaces: string; + fields: readonly { name: string; type: string; references?: string }[]; + states?: readonly string[]; +}; + +export type RepairAttempt = { + iteration: number; + failingLayer: "layer1" | "layer2" | "layer3" | "reviewer"; + platform?: "rails" | "ios" | "android"; + action: string; + resolved: boolean; +}; + +// Maps an original screenshot file path to a render-ready +// value — a `data:` URI when embedded, or a relative path when +// externalized to report-assets/. Built by the collector (I/O) and +// passed to the pure renderer so render.ts never touches the filesystem. +export type AssetMap = Record; diff --git a/src/report/render.ts b/src/report/render.ts new file mode 100644 index 0000000..b648a0b --- /dev/null +++ b/src/report/render.ts @@ -0,0 +1,261 @@ +import type { PlatformDetail } from "../agents/types.js"; +import type { AssetMap, RunReport } from "./model.js"; +import { REPORT_CSS } from "./theme.js"; + +// Pure: RunReport (+ a resolved screenshot AssetMap) -> a complete, +// self-contained HTML document string. No filesystem, no network, no +// clock — every dynamic value comes from the inputs, so the output is +// deterministic and golden-file testable. +export function renderReport(report: RunReport, assets: AssetMap = {}): string { + const body = [ + head(report), + gates(report), + matrix(report), + layer1Section(report), + layer2Section(report), + layer3Section(report, assets), + reviewerSection(report), + repairSection(report), + domainSection(report), + footer(report), + ].join("\n"); + + return ` + + + + +Validation report — ${esc(report.meta.displayName)} + + + +
+${body} +
+ +`; +} + +function head(report: RunReport): string { + const m = report.meta; + const badge = report.overallPass + ? `✓ Pass` + : `✗ Fail`; + const visual = m.visualLevel === 0 ? "off" : `level ${m.visualLevel}`; + return `
+${badge} +

${esc(m.displayName)}

+

spec: ${esc(m.spec)}

+
+slug: ${esc(m.slug)} +agent v${esc(m.agentVersion)} +judge: ${esc(m.judgeModel)} +visual: ${visual} +${fmtDuration(m.durationMs)} +${esc(m.finishedAt)} +
+
`; +} + +type Gate = { label: string; value: string; cls: "pass" | "fail" | "muted" }; + +function computeGates(report: RunReport): Gate[] | null { + const plats = report.platforms; + if (plats.length === 0) return null; + const l1 = plats.filter((p) => p.layer1.pass).length; + const l2 = plats.filter((p) => p.layer2.pass).length; + const l3plats = plats.filter((p) => p.layer3 !== undefined); + const l3 = l3plats.filter((p) => p.layer3!.pass).length; + const reviewerPass = report.reviewer.contractParity === "pass"; + return [ + gateOf("Layer 1 · structural", l1, plats.length), + gateOf("Layer 2 · runtime", l2, plats.length), + l3plats.length > 0 + ? gateOf("Layer 3 · semantic", l3, l3plats.length) + : { label: "Layer 3 · semantic", value: "skipped", cls: "muted" as const }, + { label: "Reviewer · contract", value: reviewerPass ? "Pass" : "Fail", cls: reviewerPass ? "pass" : "fail" }, + ]; +} + +function gateOf(label: string, pass: number, total: number): Gate { + return { label, value: `${pass}/${total} pass`, cls: pass === total ? "pass" : "fail" }; +} + +function gates(report: RunReport): string { + const gs = computeGates(report); + const cards = gs + ? gs.map((g) => `
${esc(g.label)}
${esc(g.value)}
`).join("") + // Stub / detail-less runs: fall back to the one-line summary. + : `
Summary
${esc(report.summary)}
`; + return `

Gates

${cards}
`; +} + +function matrix(report: RunReport): string { + if (report.platforms.length === 0) return ""; + const rows = report.platforms + .map((p) => { + const l3 = p.platform === "rails" ? markNa() : p.layer3 ? mark(p.layer3.pass) : markNa(); + return `${esc(p.platform)}${mark(p.layer1.pass)}${mark(p.layer2.pass)}${l3}`; + }) + .join(""); + return `

Platform × layer

+ + +${rows} +
PlatformLayer 1Layer 2Layer 3
`; +} + +function layer1Section(report: RunReport): string { + if (report.platforms.length === 0) return ""; + const cards = report.platforms.map((p) => { + const findings = p.layer1.findings; + const inner = findings.length === 0 + ? `

No leftover substrate tokens.

` + : ` + +${findings.map((f) => ``).join("")} +
TokenLocationLine
${esc(f.token)}${esc(f.file)}:${f.line}${esc(f.text)}
`; + return card(p.platform, p.layer1.pass, inner); + }).join(""); + return `

Layer 1 — structural (leftover token scan)

${cards}
`; +} + +function layer2Section(report: RunReport): string { + if (report.platforms.length === 0) return ""; + const cards = report.platforms.map((p) => { + const l2 = p.layer2; + const kv = `

command: ${esc(l2.command)}
mode: ${esc(l2.mode)} · exit: ${l2.exitCode === null ? "—" : l2.exitCode} · ${fmtDuration(l2.durationMs)}

`; + const stderr = l2.stderrTail && !l2.pass + ? `
stderr tail
${esc(l2.stderrTail)}
` + : ""; + return card(p.platform, l2.pass, kv + stderr); + }).join(""); + return `

Layer 2 — runtime (toolchain build/boot)

${cards}
`; +} + +function layer3Section(report: RunReport, assets: AssetMap): string { + const plats = report.platforms.filter((p): p is PlatformDetail & { layer3: NonNullable } => p.layer3 !== undefined); + if (plats.length === 0) return ""; + const cards = plats.map((p) => { + const l3 = p.layer3; + const parts: string[] = []; + if (l3.error) parts.push(`

error: ${esc(l3.error)}

`); + if (l3.screenshotPath) parts.push(`
${shot(l3.screenshotPath, "home screen", assets)}
`); + if (l3.scores && l3.scores.length > 0) { + parts.push(scoreTable("Stage 1 rubric (median of 3 samples)", l3.scores)); + } + if (l3.stage2) { + const s2 = l3.stage2; + parts.push(`

Stage 2: ${esc(s2.scenarioName)} · steps ${s2.stepsPassed}/${s2.stepCount}

`); + if (s2.error) parts.push(`

error: ${esc(s2.error)}

`); + if (s2.screenshots.length > 0) { + parts.push(`
${s2.screenshots.map((sp) => shot(sp, capOf(sp), assets)).join("")}
`); + } + if (s2.layer3Scores && s2.layer3Scores.length > 0) { + parts.push(scoreTable("Stage 2 rubric (post-toggle screen)", s2.layer3Scores)); + } + } + return card(p.platform, l3.pass, parts.join("\n")); + }).join(""); + return `

Layer 3 — semantic (Opus 4.7 vision judge)

${cards}
`; +} + +function reviewerSection(report: RunReport): string { + const pass = report.reviewer.contractParity === "pass"; + const diffs = report.reviewer.diffs; + const inner = diffs.length === 0 + ? `

No contract drift across Rails ↔ iOS ↔ Android.

` + : `
${diffs.length} contract difference(s)
${esc(diffs.join("\n"))}
`; + return `

Reviewer — contract parity

${card("Rails ↔ iOS ↔ Android", pass, inner)}
`; +} + +function repairSection(report: RunReport): string { + const attempts = report.repairAttempts; + if (!attempts || attempts.length === 0) return ""; + const rows = attempts.map((a) => + `${a.iteration}${esc(a.failingLayer)}${a.platform ? ` (${esc(a.platform)})` : ""}${esc(a.action)}${a.resolved ? `` : ``}`, + ).join(""); + return `

Self-repair (≤5 iterations)

+ + +${rows} +
#Failing layerActionResolved
`; +} + +function domainSection(report: RunReport): string { + const { renamePlan, entities } = report.domain; + const rename = renamePlan.length === 0 + ? `

No rename pairs.

` + : `${renamePlan.map((r) => ``).join("")}
${esc(r.from)}${esc(r.to)}
`; + const ents = entities.length === 0 + ? "" + : entities.map((e) => { + const fields = e.fields.map((f) => `${esc(f.name)}:${esc(f.type)}${f.references ? `→${esc(f.references)}` : ""}`).join(" · "); + const states = e.states && e.states.length > 0 ? `
states: ${e.states.map((s) => esc(s)).join(" ↔ ")}` : ""; + return `

${esc(e.name)} (replaces ${esc(e.replaces)})

${fields || "no fields"}

${states}
`; + }).join(""); + return `

Domain plan

+

Rename plan

${rename}
+${ents}
`; +} + +function footer(report: RunReport): string { + const visualPrefix = report.meta.visualLevel > 0 ? `NATIVEAPPTEMPLATE_VISUAL=${report.meta.visualLevel} ` : ""; + const cmd = `${visualPrefix}npx nativeapptemplate-agent ${JSON.stringify(report.meta.spec)}`; + return `
+

Reproduce this run:

+
${esc(cmd)}
+

Raw per-agent logs: tmp/trace/*.log · Generated by nativeapptemplate-agent v${esc(report.meta.agentVersion)}.

+
`; +} + +// --- helpers --- + +function card(title: string, pass: boolean, inner: string): string { + const pill = pass ? `pass` : `fail`; + return `

${esc(title)} ${pill}

${inner}
`; +} + +function scoreTable(title: string, scores: readonly { criterionId: string; pass: boolean; rationale: string }[]): string { + const rows = scores.map((s) => + `${esc(s.criterionId)}${s.pass ? `` : ``}${esc(s.rationale)}`, + ).join(""); + return `${rows}
${esc(title)}
CriterionVerdictRationale
`; +} + +function shot(path: string, caption: string, assets: AssetMap): string { + const src = assets[path]; + if (!src) { + return `
screenshot unavailable
${esc(caption)}
`; + } + return `
${esc(caption)}
${esc(caption)}
`; +} + +function mark(pass: boolean): string { + return pass ? `` : ``; +} + +function markNa(): string { + return ``; +} + +function capOf(path: string): string { + const name = path.split("/").pop() ?? path; + return name.replace(/\.png$/i, ""); +} + +function fmtDuration(ms: number): string { + if (ms < 1000) return `${ms}ms`; + const s = ms / 1000; + if (s < 60) return `${s.toFixed(1)}s`; + const m = Math.floor(s / 60); + return `${m}m ${Math.round(s % 60)}s`; +} + +function esc(value: string): string { + return value + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} diff --git a/src/report/theme.ts b/src/report/theme.ts new file mode 100644 index 0000000..5e1c0fa --- /dev/null +++ b/src/report/theme.ts @@ -0,0 +1,114 @@ +// Inline stylesheet for the validation report. Palette matches +// docs/social-preview.svg (light-blue vivid) for brand coherence. +// No web fonts, no external assets — the report is a single portable +// file. Kept as a plain string so render.ts stays pure. +export const REPORT_CSS = ` +:root { + --bg: #1f2933; + --bg-2: #0b69a3; + --panel: #243441; + --panel-2: #1b2a38; + --border: #3e4c59; + --text: #f5f7fa; + --muted: #9aa5b1; + --accent: #40c3f7; + --accent-2: #2bb0ed; + --pass: #34d399; + --fail: #f87171; + --na: #52606d; +} +* { box-sizing: border-box; } +body { + margin: 0; + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; + background: linear-gradient(160deg, var(--bg) 0%, #14202a 60%, #0e1820 100%); + color: var(--text); + line-height: 1.5; + padding: 32px 20px 80px; +} +.wrap { max-width: 1100px; margin: 0 auto; } +a { color: var(--accent); } +header.report-head { + border-bottom: 1px solid var(--border); + padding-bottom: 20px; + margin-bottom: 28px; +} +.badge { + display: inline-block; + font-size: 13px; + font-weight: 700; + letter-spacing: 1px; + padding: 6px 14px; + border-radius: 20px; + text-transform: uppercase; +} +.badge.pass { background: rgba(52,211,153,0.16); color: var(--pass); border: 1px solid rgba(52,211,153,0.4); } +.badge.fail { background: rgba(248,113,113,0.16); color: var(--fail); border: 1px solid rgba(248,113,113,0.4); } +h1 { font-size: 30px; font-weight: 800; letter-spacing: -0.5px; margin: 14px 0 4px; } +.spec { color: var(--muted); font-size: 16px; margin: 0 0 12px; } +.spec b { color: var(--text); font-weight: 600; } +.meta { color: var(--muted); font-size: 13px; font-family: "SF Mono", Menlo, monospace; } +.meta span { margin-right: 16px; white-space: nowrap; } +section { margin: 30px 0; } +h2 { + font-size: 13px; text-transform: uppercase; letter-spacing: 1.2px; + color: var(--accent); margin: 0 0 14px; font-weight: 700; +} +.gates { display: flex; flex-wrap: wrap; gap: 12px; } +.gate { + flex: 1 1 160px; background: var(--panel); border: 1px solid var(--border); + border-radius: 12px; padding: 14px 16px; +} +.gate .label { font-size: 12px; color: var(--muted); text-transform: uppercase; letter-spacing: 0.6px; } +.gate .value { font-size: 22px; font-weight: 800; margin-top: 4px; } +.gate.pass .value { color: var(--pass); } +.gate.fail .value { color: var(--fail); } +.gate.muted .value { color: var(--muted); font-size: 15px; font-weight: 600; } +table { width: 100%; border-collapse: collapse; font-size: 14px; } +.matrix th, .matrix td { padding: 10px 12px; text-align: center; border: 1px solid var(--border); } +.matrix th:first-child, .matrix td:first-child { text-align: left; font-weight: 600; } +.matrix thead th { background: var(--panel-2); color: var(--muted); font-weight: 600; text-transform: uppercase; font-size: 12px; letter-spacing: 0.5px; } +.mark { font-weight: 800; font-size: 16px; } +.mark.pass { color: var(--pass); } +.mark.fail { color: var(--fail); } +.mark.na { color: var(--na); } +.card { + background: var(--panel); border: 1px solid var(--border); + border-radius: 12px; padding: 16px 18px; margin-bottom: 14px; +} +.card h3 { margin: 0 0 10px; font-size: 16px; display: flex; align-items: center; gap: 10px; } +.pill { font-size: 11px; font-weight: 700; padding: 2px 9px; border-radius: 10px; text-transform: uppercase; letter-spacing: 0.5px; } +.pill.pass { background: rgba(52,211,153,0.16); color: var(--pass); } +.pill.fail { background: rgba(248,113,113,0.16); color: var(--fail); } +.findings th, .findings td { padding: 7px 10px; text-align: left; border-bottom: 1px solid var(--border); font-size: 13px; vertical-align: top; } +.findings th { color: var(--muted); font-weight: 600; } +.findings code, code.inline { font-family: "SF Mono", Menlo, monospace; font-size: 12px; color: var(--accent); } +.kv { font-family: "SF Mono", Menlo, monospace; font-size: 13px; color: var(--muted); } +.kv b { color: var(--text); font-weight: 600; } +.empty { color: var(--muted); font-style: italic; font-size: 13px; } +details { margin-top: 10px; } +summary { cursor: pointer; color: var(--accent); font-size: 13px; } +pre { + background: #0d1820; border: 1px solid var(--border); border-radius: 8px; + padding: 12px; overflow-x: auto; font-size: 12px; color: #cbd2d9; margin: 10px 0 0; +} +.shots { display: flex; flex-wrap: wrap; gap: 14px; margin: 12px 0; } +.shot { border: 1px solid var(--border); border-radius: 10px; overflow: hidden; background: var(--panel-2); } +.shot img { display: block; max-width: 240px; height: auto; } +.shot .cap { font-size: 11px; color: var(--muted); padding: 6px 8px; font-family: "SF Mono", Menlo, monospace; } +.shot.missing { padding: 24px; color: var(--muted); font-size: 12px; font-style: italic; max-width: 240px; } +.scores th, .scores td { padding: 7px 10px; text-align: left; border-bottom: 1px solid var(--border); font-size: 13px; vertical-align: top; } +.scores th { color: var(--muted); font-weight: 600; } +.rename td { padding: 7px 10px; border-bottom: 1px solid var(--border); font-size: 13px; } +.rename .arrow { color: var(--muted); padding: 0 8px; } +footer { margin-top: 48px; padding-top: 20px; border-top: 1px solid var(--border); color: var(--muted); font-size: 13px; } +footer pre { color: var(--text); } +@media (max-width: 640px) { + .gate { flex-basis: 100%; } + .shot img { max-width: 100%; } +} +@media print { + body { background: #fff; color: #111; } + .card, .gate { break-inside: avoid; } +} +`; diff --git a/src/version.ts b/src/version.ts new file mode 100644 index 0000000..bba56dd --- /dev/null +++ b/src/version.ts @@ -0,0 +1,16 @@ +import { readFileSync } from "node:fs"; +import { fileURLToPath } from "node:url"; +import { dirname, resolve } from "node:path"; + +// Read the agent's version from the published package.json. Resolves +// relative to the compiled module (dist/version.js → ../package.json), +// so it works under `npx`, global installs, and local dev alike. +export function readPackageVersion(): string { + try { + const here = dirname(fileURLToPath(import.meta.url)); + const pkg = JSON.parse(readFileSync(resolve(here, "..", "package.json"), "utf8")); + return typeof pkg.version === "string" ? pkg.version : "0.0.0"; + } catch { + return "0.0.0"; + } +} diff --git a/tests/smoke.test.ts b/tests/smoke.test.ts index 781192f..fa7193a 100644 --- a/tests/smoke.test.ts +++ b/tests/smoke.test.ts @@ -4,6 +4,12 @@ import { runLayer1, runLayer2, runLayer3, captureScreenshot, installAndLaunch, r import { dispatch } from "../src/dispatch.js"; import { runReviewer } from "../src/agents/reviewer.js"; import { canonicalizeEndpoint, diffContracts } from "../src/agents/contract-extract.js"; +import { renderReport } from "../src/report/render.js"; +import { buildRunReport, writeReport, collectScreenshotPaths } from "../src/report/collect.js"; +import type { DomainSpec, JudgeResult, ReviewerResult } from "../src/agents/types.js"; +import { mkdtempSync, writeFileSync, readFileSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; test("validation layers are exported as functions", () => { assert.equal(typeof runLayer1, "function"); @@ -1095,10 +1101,198 @@ test("createMcpServer registers generate_app and routes through dispatch", async arguments: { spec: "a walk-in clinic queue for small veterinary practices" }, }); assert.equal(call.isError, false); - const sc = call.structuredContent as { overallPass?: boolean; summary?: string }; + const sc = call.structuredContent as { overallPass?: boolean; summary?: string; report?: { meta?: { slug?: string } } }; assert.equal(sc.overallPass, true); assert.match(sc.summary ?? "", /PASS/); + // Step 7: the RunReport is surfaced in structuredContent. + assert.ok(sc.report, "expected report in structuredContent"); + assert.equal(typeof sc.report?.meta?.slug, "string"); await client.close(); await server.close(); }); + +// --- HTML validation report (docs/validation-report.md) --- + +const reportDomain: DomainSpec = { + slug: "vet-clinic", + displayName: "Vet Clinic", + entities: [ + { name: "Patient", replaces: "ItemTag", fields: [{ name: "name", type: "string" }], states: ["Idled", "Completed"] }, + ], + renamePlan: [ + { from: "Shop", to: "Clinic" }, + { from: "Shopkeeper", to: "Vet" }, + ], + jsonApiContract: {}, +}; + +function mixedJudge(iosScreenshot?: string): JudgeResult { + return { + overallPass: false, + summary: "Layer 1 2/3 pass · Layer 2 2/3 pass · Layer 3 1/2 pass · reviewer FAIL", + platforms: [ + { + platform: "rails", + layer1: { pass: true, findings: [] }, + layer2: { pass: true, command: "bin/rails runner", mode: "build", exitCode: 0, durationMs: 4200 }, + }, + { + platform: "ios", + layer1: { pass: false, findings: [{ token: "Shop", file: "ios/Foo.swift", line: 12, text: "var s: Shop" }] }, + layer2: { pass: true, command: "xcodebuild build", mode: "build", exitCode: 0, durationMs: 61000 }, + layer3: { + pass: true, + ...(iosScreenshot !== undefined ? { screenshotPath: iosScreenshot } : {}), + scores: [{ criterionId: "no-substrate-leak", pass: true, rationale: "No Shop tokens visible." }], + }, + }, + { + platform: "android", + layer1: { pass: true, findings: [] }, + layer2: { pass: false, command: "./gradlew assembleDebug", mode: "build", exitCode: 1, durationMs: 30000, stderrTail: "e: Unresolved reference: Shopkeeper" }, + layer3: { pass: false, error: "launch failed" }, + }, + ], + }; +} + +const failReviewer: ReviewerResult = { contractParity: "fail", diffs: ["iOS calls DELETE /clinics/{id}/reset not in Rails"] }; + +test("buildRunReport assembles meta + platforms + domain from the run pieces", () => { + const report = buildRunReport({ + spec: "a vet clinic queue", + domain: reportDomain, + judge: mixedJudge(), + reviewer: failReviewer, + agentVersion: "9.9.9", + judgeModel: "claude-opus-4-7", + visualLevel: 1, + startedAt: 1000, + finishedAt: 4000, + }); + assert.equal(report.meta.slug, "vet-clinic"); + assert.equal(report.meta.durationMs, 3000); + assert.equal(report.meta.agentVersion, "9.9.9"); + assert.equal(report.platforms.length, 3); + assert.equal(report.reviewer.contractParity, "fail"); + assert.deepEqual(report.domain.renamePlan, [ + { from: "Shop", to: "Clinic" }, + { from: "Shopkeeper", to: "Vet" }, + ]); +}); + +test("renderReport surfaces findings, stderr, reviewer diff, rename plan, and overall verdict", () => { + const report = buildRunReport({ + spec: "a vet clinic queue", + domain: reportDomain, + // A screenshot path the (empty) asset map can't resolve → exercises + // the "screenshot unavailable" placeholder branch. + judge: mixedJudge("/tmp/nonexistent/ios-home.png"), + reviewer: failReviewer, + agentVersion: "9.9.9", + judgeModel: "claude-opus-4-7", + visualLevel: 1, + startedAt: 1000, + finishedAt: 4000, + }); + const html = renderReport(report); + + assert.match(html, //i); + assert.ok(html.includes("Vet Clinic")); + assert.ok(html.includes("✗ Fail"), "overall fail badge"); + assert.ok(html.includes("2/3 pass"), "layer 1 gate count"); + // Layer 1 finding details, HTML-escaped (the fixture text has angle brackets). + assert.ok(html.includes("ios/Foo.swift:12")); + assert.ok(html.includes("Shop<Tag>"), "finding text is HTML-escaped"); + assert.ok(!html.includes("Shop"), "no unescaped angle brackets leak through"); + // Layer 2 stderr tail. + assert.ok(html.includes("Unresolved reference: Shopkeeper")); + // Reviewer diff. + assert.ok(html.includes("DELETE /clinics/{id}/reset")); + // Domain rename plan. + assert.ok(html.includes("Clinic") && html.includes("Vet")); + // No screenshot provided in assets → placeholder, not a broken image. + assert.ok(html.includes("screenshot unavailable")); +}); + +test("renderReport falls back to the summary line when platforms are empty (stub run)", () => { + const report = buildRunReport({ + spec: "x", + domain: reportDomain, + judge: { overallPass: true, summary: "Layer 1/2/3 PASS" }, + reviewer: { contractParity: "pass", diffs: [] }, + agentVersion: "1.0.0", + judgeModel: "claude-opus-4-7", + visualLevel: 0, + startedAt: 0, + finishedAt: 10, + }); + const html = renderReport(report); + assert.ok(html.includes("Layer 1/2/3 PASS")); + assert.ok(html.includes("✓ Pass")); +}); + +test("writeReport emits a self-contained report.json + HTML with embedded screenshot", async () => { + const tmp = mkdtempSync(join(tmpdir(), "report-test-")); + const shotPath = join(tmp, "ios-home.png"); + // Minimal valid-ish PNG header bytes — enough to base64-embed. + writeFileSync(shotPath, Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a])); + + const report = buildRunReport({ + spec: "a vet clinic queue", + domain: reportDomain, + judge: mixedJudge(shotPath), + reviewer: failReviewer, + agentVersion: "1.0.0", + judgeModel: "claude-opus-4-7", + visualLevel: 1, + startedAt: 0, + finishedAt: 1000, + }); + + assert.ok(collectScreenshotPaths(report).includes(shotPath)); + + const dir = join(tmp, "out"); + const paths = await writeReport(report, { dir, embed: true }); + + assert.ok(paths.jsonPath, "json path"); + assert.ok(paths.htmlPath, "html path"); + + const json = JSON.parse(readFileSync(paths.jsonPath!, "utf8")) as { overallPass: boolean; meta: { slug: string } }; + assert.equal(json.overallPass, false); + assert.equal(json.meta.slug, "vet-clinic"); + + const html = readFileSync(paths.htmlPath!, "utf8"); + assert.ok(html.includes("data:image/png;base64,"), "screenshot embedded as data URI"); + // Portability guarantees: no ephemeral tmp paths, no network/asset URLs. + assert.ok(!html.includes(shotPath), "must not leak the raw tmp/ screenshot path"); + assert.ok(!/https?:\/\//.test(html), "must not reference any external URL"); +}); + +test("writeReport with embed=false externalizes screenshots to report-assets/", async () => { + const tmp = mkdtempSync(join(tmpdir(), "report-ext-")); + const shotPath = join(tmp, "ios-home.png"); + writeFileSync(shotPath, Buffer.from([0x89, 0x50, 0x4e, 0x47])); + + const report = buildRunReport({ + spec: "x", + domain: reportDomain, + judge: mixedJudge(shotPath), + reviewer: { contractParity: "pass", diffs: [] }, + agentVersion: "1.0.0", + judgeModel: "claude-opus-4-7", + visualLevel: 1, + startedAt: 0, + finishedAt: 1, + }); + + const dir = join(tmp, "out"); + const paths = await writeReport(report, { dir, embed: false, format: "html" }); + assert.equal(paths.jsonPath, undefined, "format=html skips json"); + const html = readFileSync(paths.htmlPath!, "utf8"); + assert.ok(html.includes("report-assets/ios-home.png"), "relative asset reference"); + assert.ok(!html.includes("data:image/png"), "no embedded data URI when embed=false"); + // The copied asset exists on disk. + assert.ok(readFileSync(join(dir, "report-assets", "ios-home.png")).length > 0); +});