From 0162348cca3c421f909589fe0058a7884a5b08cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alexander=20K=C3=B6lnberger?= <159939812+ProfRandom92@users.noreply.github.com> Date: Thu, 14 May 2026 16:45:40 +0200 Subject: [PATCH] Harden replay showcase artifact data --- showcase/app/scripts/validate-static.mjs | 87 ++- showcase/app/src/data/benchmarkArtifacts.ts | 60 ++ showcase/app/src/main.tsx | 640 +++++++------------- showcase/app/src/styles.css | 262 ++++---- 4 files changed, 455 insertions(+), 594 deletions(-) create mode 100644 showcase/app/src/data/benchmarkArtifacts.ts diff --git a/showcase/app/scripts/validate-static.mjs b/showcase/app/scripts/validate-static.mjs index 1376d2b..d500218 100644 --- a/showcase/app/scripts/validate-static.mjs +++ b/showcase/app/scripts/validate-static.mjs @@ -3,40 +3,79 @@ import { dirname, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; const root = resolve(dirname(fileURLToPath(import.meta.url)), '..'); +const repoRoot = resolve(root, '..', '..'); const app = await readFile(resolve(root, 'src', 'main.tsx'), 'utf8'); const css = await readFile(resolve(root, 'src', 'styles.css'), 'utf8'); +const data = await readFile(resolve(root, 'src', 'data', 'benchmarkArtifacts.ts'), 'utf8'); const packageJson = await readFile(resolve(root, 'package.json'), 'utf8'); +const paperArtifact = JSON.parse(await readFile(resolve(repoRoot, 'artifacts', 'paper_replay_results.json'), 'utf8')); +const agentArtifact = JSON.parse(await readFile(resolve(repoRoot, 'artifacts', 'agent_trace_replay_results.json'), 'utf8')); const requiredPhrases = [ - 'Raw Corpus Ingest', - 'Tokenizer Pipeline', - 'Compression Engine', - 'Validation Runner', - 'Replay Executor', - 'Drift Assertions', - 'Artifact Registry', - 'Enterprise Review Dashboard', - 'RUN-2026-05-13-8842', - 'ART-CTX7-2F91A', - 'REP-CONSISTENCY-441', - 'GH-ACT-551882', - 'Input Tokens', - 'Compressed Tokens', - 'Reduction Ratio', - 'Replay Drift Delta', - 'Validation Status', - 'OpenTelemetry trace', - 'Artifact registry', - 'CI execution history', - 'Reviewer Walkthrough' + 'Deterministic operational replay validation for long-horizon AI agents.', + 'Comptextv7 turns noisy context into compact operational state', + 'Deterministic Replay', + 'Operational Continuity', + 'CI Audit Artifacts', + 'Paper Replay Benchmark', + 'Agent Trace Replay Benchmark', + 'Raw Context / Agent Trace', + 'Operational State Extraction', + 'Compact Replay State', + 'Replay Reconstruction', + 'Deterministic Validation', + 'CI Artifact', + 'No LLM Judging', + 'No Embeddings', + 'No External APIs', + 'Deterministic JSON', + 'CI Reproducible', + 'Audit Friendly', + 'Agent trace replay is currently near-lossless because fixtures are structured', + 'artifacts/paper_replay_results.json', + 'artifacts/agent_trace_replay_results.json', + 'reports/replay_continuity/validation_report.md', + 'docs/benchmarks/paper_replay.md', + 'docs/benchmarks/agent_trace_replay.md' ]; -const missing = requiredPhrases.filter((phrase) => !app.includes(phrase)); +const source = `${app}\n${data}`; +const missing = requiredPhrases.filter((phrase) => !source.includes(phrase)); if (missing.length > 0) { - console.error(`Missing required enterprise showcase copy: ${missing.join(', ')}`); + console.error(`Missing required replay showcase copy: ${missing.join(', ')}`); process.exit(1); } +function formatAggregateRate(value) { + return value.toFixed(6); +} + +function escapeRegExp(value) { + return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function assertMetricValue(label, value) { + const pattern = new RegExp(`label:\\s*['\"]${escapeRegExp(label)}['\"][^}]*value:\\s*['\"]${escapeRegExp(value)}['\"]`, 's'); + if (!pattern.test(data)) { + console.error(`Static benchmark data is missing derived artifact metric: ${label} = ${value}`); + process.exit(1); + } +} + +const expectedArtifactMetrics = [ + ['Paper count', String(paperArtifact.aggregate.paper_count)], + ['Trace count', String(agentArtifact.aggregate.trace_count)], + ['Avg compression ratio', formatAggregateRate(paperArtifact.aggregate.avg_compression_ratio)], + ['Avg compression ratio', formatAggregateRate(agentArtifact.aggregate.avg_compression_ratio)], + ['Replay consistency', formatAggregateRate(paperArtifact.aggregate.avg_replay_consistency)], + ['Replay consistency', formatAggregateRate(agentArtifact.aggregate.avg_replay_consistency)], + ['Operational drift', formatAggregateRate(agentArtifact.aggregate.avg_operational_drift_rate)] +]; + +for (const [label, value] of expectedArtifactMetrics) { + assertMetricValue(label, value); +} + if (!packageJson.includes('vite') || !packageJson.includes('react')) { console.error('React/Vite showcase dependencies are missing.'); process.exit(1); @@ -47,4 +86,4 @@ if (!css.includes('@media (max-width: 760px)')) { process.exit(1); } -console.log('Enterprise infrastructure showcase validation passed.'); +console.log('Deterministic replay showcase validation passed.'); diff --git a/showcase/app/src/data/benchmarkArtifacts.ts b/showcase/app/src/data/benchmarkArtifacts.ts new file mode 100644 index 0000000..5b33fe9 --- /dev/null +++ b/showcase/app/src/data/benchmarkArtifacts.ts @@ -0,0 +1,60 @@ +export const repoBaseUrl = 'https://github.com/ProfRandom92/Comptextv7/blob/main/'; + +export type BenchmarkMetric = { + label: string; + value: string; + detail?: string; +}; + +export type BenchmarkArtifact = { + title: string; + benchmark: string; + artifactPath: string; + methodPath: string; + badges: string[]; + note: string; + metrics: BenchmarkMetric[]; +}; + +// Static dashboard values copied from committed deterministic replay artifacts: +// - artifacts/paper_replay_results.json +// - artifacts/agent_trace_replay_results.json +// Keep these values in sync with committed artifact JSON; do not replace them with +// generated, model-judged, or externally fetched metrics. +export const benchmarkArtifacts: BenchmarkArtifact[] = [ + { + title: 'Paper Replay Benchmark', + benchmark: 'paper_replay_bench', + artifactPath: 'artifacts/paper_replay_results.json', + methodPath: 'docs/benchmarks/paper_replay.md', + badges: ['Deterministic', 'No LLM judging'], + note: 'Dense technical paper fixtures preserve entities, limitations, metrics, and section structure with measurable replay loss.', + metrics: [ + { label: 'Paper count', value: '3', detail: 'dense technical papers' }, + { label: 'Avg compression ratio', value: '1.347063', detail: 'original ÷ compact tokens' }, + { label: 'Replay consistency', value: '0.791667', detail: 'deterministic validator' } + ] + }, + { + title: 'Agent Trace Replay Benchmark', + benchmark: 'agent_trace_replay_bench', + artifactPath: 'artifacts/agent_trace_replay_results.json', + methodPath: 'docs/benchmarks/agent_trace_replay.md', + badges: ['Deterministic', 'Structured fixture baseline'], + note: 'Agent trace replay is currently near-lossless because fixtures are structured; iterative degradation pressure is the next validation target.', + metrics: [ + { label: 'Trace count', value: '3', detail: 'multi-step workflows' }, + { label: 'Avg compression ratio', value: '1.773954', detail: 'original ÷ compact tokens' }, + { label: 'Replay consistency', value: '1.000000', detail: 'deterministic validator' }, + { label: 'Operational drift', value: '0.000000', detail: 'required field loss' } + ] + } +]; + +export const artifactLinks = [ + 'artifacts/paper_replay_results.json', + 'artifacts/agent_trace_replay_results.json', + 'reports/replay_continuity/validation_report.md', + 'docs/benchmarks/paper_replay.md', + 'docs/benchmarks/agent_trace_replay.md' +]; diff --git a/showcase/app/src/main.tsx b/showcase/app/src/main.tsx index 138f445..764e30e 100644 --- a/showcase/app/src/main.tsx +++ b/showcase/app/src/main.tsx @@ -1,491 +1,259 @@ -import { StrictMode, useEffect, useId, useState } from 'react'; +import { StrictMode } from 'react'; import { createRoot } from 'react-dom/client'; import { - Activity, - Archive, + ArrowRight, + Boxes, CheckCircle2, ClipboardCheck, - Code2, - Database, + FileJson, + Gauge, GitBranch, - GitCommitHorizontal, - GitPullRequestArrow, + Link2, + LockKeyhole, Network, - PanelTop, - PlayCircle, - Radar, ShieldCheck, - SplitSquareHorizontal, - TimerReset, - Workflow + Workflow, + Zap } from 'lucide-react'; +import { artifactLinks, benchmarkArtifacts, repoBaseUrl } from './data/benchmarkArtifacts'; import './styles.css'; -type StatusTone = 'pass' | 'warn' | 'info' | 'neutral'; - -type Kpi = { - label: string; - value: string; - detail: string; - tone?: StatusTone; -}; - -type TimelineItem = { - time: string; - title: string; - detail: string; - status: string; -}; - -type Span = { - name: string; - id: string; - duration: string; - width: number; - offset: number; - depth: number; -}; - -const kpis: Kpi[] = [ - { label: 'Input Tokens', value: '1,284,221', detail: 'golden corpus batch', tone: 'neutral' }, - { label: 'Compressed Tokens', value: '382,118', detail: 'KVTC-V7 artifact frame', tone: 'info' }, - { label: 'Reduction Ratio', value: '70.2%', detail: 'deterministic replay retained', tone: 'info' }, - { label: 'Replay Drift Delta', value: '0.000', detail: 'REP-CONSISTENCY-441', tone: 'pass' }, - { label: 'Validation Pass Rate', value: '99.82%', detail: '2,184 CI validation runs', tone: 'pass' }, - { label: 'Replay Determinism', value: '100%', detail: 'pinned replay contract', tone: 'pass' }, - { label: 'Artifact Count', value: '441', detail: 'registry evidence objects', tone: 'neutral' }, - { label: 'CI Validation Runs', value: '2,184', detail: 'contract executions indexed', tone: 'info' } -]; - -const architectureNodes = [ - 'Raw Corpus Ingest', - 'Tokenizer Pipeline', - 'Compression Engine', - 'Validation Runner', - 'Replay Executor', - 'Drift Assertions', - 'Artifact Registry', - 'Enterprise Dashboard' -]; - -const mermaidDiagrams = [ +const valueCards = [ { - title: 'Deterministic Replay Pipeline', - chart: `flowchart LR - A[Raw Corpus Ingest] - B[Tokenizer Pipeline] - C[Compression Engine] - D[Validation Runner] - E[Replay Executor] - F[Drift Assertions] - G[Artifact Registry] - H[Enterprise Dashboard] - - A --> B --> C --> D --> E --> F --> G --> H` + title: 'Deterministic Replay', + body: 'Replay checks compare required operational fields with deterministic validators instead of subjective model scoring.', + icon: