Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 63 additions & 24 deletions showcase/app/scripts/validate-static.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -3,40 +3,79 @@ import { dirname, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';

const root = resolve(dirname(fileURLToPath(import.meta.url)), '..');
const repoRoot = resolve(root, '..', '..');
const app = await readFile(resolve(root, 'src', 'main.tsx'), 'utf8');
const css = await readFile(resolve(root, 'src', 'styles.css'), 'utf8');
const data = await readFile(resolve(root, 'src', 'data', 'benchmarkArtifacts.ts'), 'utf8');
const packageJson = await readFile(resolve(root, 'package.json'), 'utf8');
const paperArtifact = JSON.parse(await readFile(resolve(repoRoot, 'artifacts', 'paper_replay_results.json'), 'utf8'));
const agentArtifact = JSON.parse(await readFile(resolve(repoRoot, 'artifacts', 'agent_trace_replay_results.json'), 'utf8'));

const requiredPhrases = [
'Raw Corpus Ingest',
'Tokenizer Pipeline',
'Compression Engine',
'Validation Runner',
'Replay Executor',
'Drift Assertions',
'Artifact Registry',
'Enterprise Review Dashboard',
'RUN-2026-05-13-8842',
'ART-CTX7-2F91A',
'REP-CONSISTENCY-441',
'GH-ACT-551882',
'Input Tokens',
'Compressed Tokens',
'Reduction Ratio',
'Replay Drift Delta',
'Validation Status',
'OpenTelemetry trace',
'Artifact registry',
'CI execution history',
'Reviewer Walkthrough'
'Deterministic operational replay validation for long-horizon AI agents.',
'Comptextv7 turns noisy context into compact operational state',
'Deterministic Replay',
'Operational Continuity',
'CI Audit Artifacts',
'Paper Replay Benchmark',
'Agent Trace Replay Benchmark',
'Raw Context / Agent Trace',
'Operational State Extraction',
'Compact Replay State',
'Replay Reconstruction',
'Deterministic Validation',
'CI Artifact',
'No LLM Judging',
'No Embeddings',
'No External APIs',
'Deterministic JSON',
'CI Reproducible',
'Audit Friendly',
'Agent trace replay is currently near-lossless because fixtures are structured',
'artifacts/paper_replay_results.json',
'artifacts/agent_trace_replay_results.json',
'reports/replay_continuity/validation_report.md',
'docs/benchmarks/paper_replay.md',
'docs/benchmarks/agent_trace_replay.md'
];

const missing = requiredPhrases.filter((phrase) => !app.includes(phrase));
const source = `${app}\n${data}`;
const missing = requiredPhrases.filter((phrase) => !source.includes(phrase));
if (missing.length > 0) {
console.error(`Missing required enterprise showcase copy: ${missing.join(', ')}`);
console.error(`Missing required replay showcase copy: ${missing.join(', ')}`);
process.exit(1);
}

function formatAggregateRate(value) {
return value.toFixed(6);
}

function escapeRegExp(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

function assertMetricValue(label, value) {
const pattern = new RegExp(`label:\\s*['\"]${escapeRegExp(label)}['\"][^}]*value:\\s*['\"]${escapeRegExp(value)}['\"]`, 's');
Comment thread
ProfRandom92 marked this conversation as resolved.
if (!pattern.test(data)) {
console.error(`Static benchmark data is missing derived artifact metric: ${label} = ${value}`);
process.exit(1);
}
}
Comment thread
ProfRandom92 marked this conversation as resolved.

const expectedArtifactMetrics = [
['Paper count', String(paperArtifact.aggregate.paper_count)],
['Trace count', String(agentArtifact.aggregate.trace_count)],
['Avg compression ratio', formatAggregateRate(paperArtifact.aggregate.avg_compression_ratio)],
['Avg compression ratio', formatAggregateRate(agentArtifact.aggregate.avg_compression_ratio)],
['Replay consistency', formatAggregateRate(paperArtifact.aggregate.avg_replay_consistency)],
['Replay consistency', formatAggregateRate(agentArtifact.aggregate.avg_replay_consistency)],
['Operational drift', formatAggregateRate(agentArtifact.aggregate.avg_operational_drift_rate)]
];

for (const [label, value] of expectedArtifactMetrics) {
assertMetricValue(label, value);
}

if (!packageJson.includes('vite') || !packageJson.includes('react')) {
console.error('React/Vite showcase dependencies are missing.');
process.exit(1);
Expand All @@ -47,4 +86,4 @@ if (!css.includes('@media (max-width: 760px)')) {
process.exit(1);
}

console.log('Enterprise infrastructure showcase validation passed.');
console.log('Deterministic replay showcase validation passed.');
60 changes: 60 additions & 0 deletions showcase/app/src/data/benchmarkArtifacts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
export const repoBaseUrl = 'https://github.com/ProfRandom92/Comptextv7/blob/main/';
Comment thread
ProfRandom92 marked this conversation as resolved.

export type BenchmarkMetric = {
label: string;
value: string;
detail?: string;
};

export type BenchmarkArtifact = {
title: string;
benchmark: string;
artifactPath: string;
methodPath: string;
badges: string[];
note: string;
metrics: BenchmarkMetric[];
};

// Static dashboard values copied from committed deterministic replay artifacts:
// - artifacts/paper_replay_results.json
// - artifacts/agent_trace_replay_results.json
// Keep these values in sync with committed artifact JSON; do not replace them with
// generated, model-judged, or externally fetched metrics.
export const benchmarkArtifacts: BenchmarkArtifact[] = [
{
title: 'Paper Replay Benchmark',
benchmark: 'paper_replay_bench',
artifactPath: 'artifacts/paper_replay_results.json',
methodPath: 'docs/benchmarks/paper_replay.md',
badges: ['Deterministic', 'No LLM judging'],
note: 'Dense technical paper fixtures preserve entities, limitations, metrics, and section structure with measurable replay loss.',
metrics: [
{ label: 'Paper count', value: '3', detail: 'dense technical papers' },
{ label: 'Avg compression ratio', value: '1.347063', detail: 'original ÷ compact tokens' },
{ label: 'Replay consistency', value: '0.791667', detail: 'deterministic validator' }
]
},
{
title: 'Agent Trace Replay Benchmark',
benchmark: 'agent_trace_replay_bench',
artifactPath: 'artifacts/agent_trace_replay_results.json',
methodPath: 'docs/benchmarks/agent_trace_replay.md',
badges: ['Deterministic', 'Structured fixture baseline'],
note: 'Agent trace replay is currently near-lossless because fixtures are structured; iterative degradation pressure is the next validation target.',
metrics: [
{ label: 'Trace count', value: '3', detail: 'multi-step workflows' },
{ label: 'Avg compression ratio', value: '1.773954', detail: 'original ÷ compact tokens' },
{ label: 'Replay consistency', value: '1.000000', detail: 'deterministic validator' },
{ label: 'Operational drift', value: '0.000000', detail: 'required field loss' }
]
}
];

export const artifactLinks = [
'artifacts/paper_replay_results.json',
'artifacts/agent_trace_replay_results.json',
'reports/replay_continuity/validation_report.md',
'docs/benchmarks/paper_replay.md',
'docs/benchmarks/agent_trace_replay.md'
];
Loading
Loading