From 7d12906e6ca974d7d52dbdce531ca9bf24b9eec6 Mon Sep 17 00:00:00 2001
From: adibarra <93070681+adibarra@users.noreply.github.com>
Date: Fri, 17 Apr 2026 16:40:15 -0500
Subject: [PATCH] feat: add Pareto uplift panel for PR regression tracking

---
 .../components/inference/ui/ChartDisplay.tsx  |  25 ++
 .../inference/ui/ParetoUpliftPanel.test.tsx   | 234 ++++++++++
 .../inference/ui/ParetoUpliftPanel.tsx        | 418 ++++++++++++++++++
 packages/app/src/lib/pareto-uplift.test.ts    | 262 +++++++++++
 packages/app/src/lib/pareto-uplift.ts         | 235 ++++++++++
 packages/app/vitest.config.ts                 |   2 +-
 6 files changed, 1175 insertions(+), 1 deletion(-)
 create mode 100644 packages/app/src/components/inference/ui/ParetoUpliftPanel.test.tsx
 create mode 100644 packages/app/src/components/inference/ui/ParetoUpliftPanel.tsx
 create mode 100644 packages/app/src/lib/pareto-uplift.test.ts
 create mode 100644 packages/app/src/lib/pareto-uplift.ts
diff --git a/packages/app/src/components/inference/ui/ChartDisplay.tsx b/packages/app/src/components/inference/ui/ChartDisplay.tsx
index 2ecc93b2..fb9b859a 100644
--- a/packages/app/src/components/inference/ui/ChartDisplay.tsx
+++ b/packages/app/src/components/inference/ui/ChartDisplay.tsx
@@ -43,6 +43,7 @@ import ComparisonChangelog from './ComparisonChangelog';
 import CustomCosts from './CustomCosts';
 import CustomPowers from './CustomPowers';
 import GPUGraph from './GPUGraph';
+import ParetoUpliftPanel from './ParetoUpliftPanel';
 import TrendChart from './TrendChart';
 
 const ModelArchitectureDiagram = dynamic(() => import('./ModelArchitectureDiagram'), {
@@ -144,6 +145,7 @@ export default function ChartDisplay() {
     activeHwTypes,
     activeDates,
     setSelectedE2eXAxisMetric,
+    hardwareConfig,
   } = useInference();
 
   const {
@@ -501,6 +503,29 @@ export default function ChartDisplay() {
                     </div>
                   );
                 })()}
+                <ParetoUpliftPanel
+                  data={graph.data}
+                  chartDefinition={graph.chartDefinition}
+                  selectedYAxisMetric={selectedYAxisMetric}
+                  hardwareConfig={hardwareConfig}
+                  activeHwTypes={activeHwTypes}
+                  activeDates={activeDates}
+                  selectedPrecisions={selectedPrecisions}
+                  selectedRunDate={selectedRunDate}
+                  selectedDates={selectedDates}
+                  selectedDateRange={selectedDateRange}
+                  isTimelineMode={Boolean(
+                    selectedDateRange.startDate &&
+                    selectedDateRange.endDate &&
+                    selectedGPUs.length > 0,
+                  )}
+                  overlayData={
+                    graph.chartDefinition.chartType === 'e2e'
+                      ? (overlayDataByChartType.e2e ?? undefined)
+                      : (overlayDataByChartType.interactivity ?? undefined)
+                  }
+                  chartType={graph.chartDefinition.chartType}
+                />
               </Card>
             </figure>
           </section>
diff --git a/packages/app/src/components/inference/ui/ParetoUpliftPanel.test.tsx b/packages/app/src/components/inference/ui/ParetoUpliftPanel.test.tsx
new file mode 100644
index 00000000..4b97f2b5
--- /dev/null
+++ b/packages/app/src/components/inference/ui/ParetoUpliftPanel.test.tsx
@@ -0,0 +1,234 @@
+// @vitest-environment jsdom
+import React, { act } from 'react';
+import { createRoot, type Root } from 'react-dom/client';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import type {
+  ChartDefinition,
+  HardwareConfig,
+  InferenceData,
+  OverlayData,
+} from '@/components/inference/types';
+
+vi.mock('@/lib/constants', () => ({
+  getModelSortIndex: () => 0,
+}));
+
+import ParetoUpliftPanel from './ParetoUpliftPanel';
+
+let container: HTMLDivElement;
+let root: Root;
+
+function renderUi(ui: React.ReactNode) {
+  act(() => root.render(ui));
+}
+
+beforeEach(() => {
+  container = document.createElement('div');
+  document.body.append(container);
+  root = createRoot(container);
+});
+
+afterEach(() => {
+  act(() => root.unmount());
+  container.remove();
+});
+
+interface PtOpts {
+  ttft?: number;
+  p99_ttft?: number;
+  tpot?: number;
+  e2el?: number;
+  intvty?: number;
+}
+
+function pt(x: number, y: number, hwKey: string, date: string, opts: PtOpts = {}): InferenceData {
+  return {
+    date,
+    x,
+    y,
+    tp: 1,
+    conc: 1,
+    hwKey,
+    precision: 'fp8',
+    tpPerGpu: { y, roof: false },
+    tpPerMw: { y, roof: false },
+    costh: { y, roof: false },
+    costn: { y, roof: false },
+    costr: { y, roof: false },
+    costhi: { y, roof: false },
+    costni: { y, roof: false },
+    costri: { y, roof: false },
+    median_ttft: opts.ttft,
+    p99_ttft: opts.p99_ttft,
+    median_tpot: opts.tpot,
+    median_e2el: opts.e2el,
+    median_intvty: opts.intvty,
+  };
+}
+
+const interactivityChartDef: ChartDefinition = {
+  chartType: 'interactivity',
+  heading: 'vs. Interactivity',
+  x: 'median_intvty',
+  x_label: 'Interactivity (tok/s/user)',
+  y: 'tput_per_gpu',
+  y_tpPerGpu: 'tpPerGpu.y',
+  y_tpPerGpu_label: 'Token Throughput per GPU',
+  y_tpPerGpu_title: 'Token Throughput per GPU',
+  y_tpPerGpu_roofline: 'upper_left',
+};
+
+const hardwareConfig: HardwareConfig = {
+  h100: { name: 'h100', label: 'H100', suffix: '(TRT)', gpu: 'H100 TRT' },
+  b200: { name: 'b200', label: 'B200', suffix: '(TRT)', gpu: 'B200 TRT' },
+};
+
+const baseProps = {
+  chartDefinition: interactivityChartDef,
+  selectedYAxisMetric: 'y_tpPerGpu',
+  hardwareConfig,
+  activeHwTypes: new Set(['h100', 'b200']),
+  activeDates: new Set<string>(),
+  selectedPrecisions: ['fp8'],
+  selectedRunDate: '2025-04-17',
+  selectedDates: [] as string[],
+  selectedDateRange: { startDate: '', endDate: '' },
+  isTimelineMode: false,
+  chartType: 'interactivity',
+};
+
+describe('ParetoUpliftPanel', () => {
+  it('renders nothing when there are no comparison dates and no overlay', () => {
+    const data = [pt(10, 300, 'h100', '2025-04-17'), pt(20, 200, 'h100', '2025-04-17')];
+    renderUi(<ParetoUpliftPanel {...baseProps} data={data} />);
+    expect(container.querySelector('[data-testid="pareto-uplift-panel"]')).toBeNull();
+  });
+
+  it('renders a GPU × metric table with a primary Pareto row plus scalar time-stat rows', () => {
+    // Reference (main date): Pareto-valid upper_left front with 2× throughput and halved latencies.
+    const data = [
+      pt(10, 300, 'h100', '2025-04-17', {
+        ttft: 0.05,
+        p99_ttft: 0.1,
+        tpot: 0.02,
+        e2el: 0.5,
+        intvty: 50,
+      }),
+      pt(20, 200, 'h100', '2025-04-17', {
+        ttft: 0.04,
+        p99_ttft: 0.08,
+        tpot: 0.015,
+        e2el: 0.4,
+        intvty: 60,
+      }),
+      pt(10, 150, 'h100', '2025-04-10', {
+        ttft: 0.1,
+        p99_ttft: 0.2,
+        tpot: 0.04,
+        e2el: 1,
+        intvty: 25,
+      }),
+      pt(20, 100, 'h100', '2025-04-10', {
+        ttft: 0.08,
+        p99_ttft: 0.16,
+        tpot: 0.03,
+        e2el: 0.8,
+        intvty: 30,
+      }),
+    ];
+    renderUi(<ParetoUpliftPanel {...baseProps} data={data} selectedDates={['2025-04-10']} />);
+
+    const panel = container.querySelector('[data-testid="pareto-uplift-panel"]');
+    expect(panel).not.toBeNull();
+
+    const headers = [...container.querySelectorAll('thead th')].map((th) => th.textContent);
+    expect(headers[0]).toBe('GPU');
+    expect(headers[1]).toBe('Metric');
+    expect(headers[2]).toContain('2025-04-10');
+
+    const rows = [...container.querySelectorAll('tbody tr')];
+    // 1 primary (Pareto) + 5 scalar metrics (Median TTFT, P99 TTFT, Median TPOT, Median E2EL, Interactivity)
+    expect(rows).toHaveLength(6);
+
+    // First row: GPU name, "Token Throughput per GPU" metric label, +% cell (Pareto).
+    const firstCells = rows[0].querySelectorAll('td');
+    expect(firstCells[0].textContent).toContain('H100');
+    expect(firstCells[1].textContent).toContain('Token Throughput per GPU');
+    expect(firstCells[2].textContent).toMatch(/\+/);
+
+    // Subsequent rows carry no GPU label (only first row does).
+    expect(rows[1].querySelectorAll('td')[0].textContent).toBe('');
+    // Time-stat metrics present in order.
+    const metricLabels = rows.map((r) => r.querySelectorAll('td')[1].textContent);
+    expect(metricLabels).toEqual([
+      'Token Throughput per GPU',
+      'Median TTFT',
+      'P99 TTFT',
+      'Median TPOT',
+      'Median E2EL',
+      'Interactivity',
+    ]);
+
+    // TTFT row: ref mean 0.045, hist mean 0.09 → ratio 2 → "+100%"
+    const ttftCell = rows[1].querySelectorAll('td')[2];
+    expect(ttftCell.textContent).toContain('+100.0%');
+
+    // Interactivity (higher is better): ref mean 55, hist mean 27.5 → ratio 2 → "+100%"
+    const intvtyCell = rows[5].querySelectorAll('td')[2];
+    expect(intvtyCell.textContent).toContain('+100.0%');
+  });
+
+  it('uses overlay data as the reference when an unofficial PR run is present', () => {
+    const overlayData: OverlayData = {
+      data: [
+        pt(10, 600, 'h100', '2025-04-17', { ttft: 0.025, intvty: 100 }),
+        pt(20, 400, 'h100', '2025-04-17', { ttft: 0.02, intvty: 120 }),
+      ],
+      hardwareConfig,
+      label: 'feat/new-kernel',
+    };
+    const data = [
+      pt(10, 300, 'h100', '2025-04-17', { ttft: 0.05, intvty: 50 }),
+      pt(20, 200, 'h100', '2025-04-17', { ttft: 0.04, intvty: 60 }),
+    ];
+    renderUi(<ParetoUpliftPanel {...baseProps} data={data} overlayData={overlayData} />);
+    const panel = container.querySelector('[data-testid="pareto-uplift-panel"]');
+    expect(panel).not.toBeNull();
+    expect(panel?.textContent).toContain('feat/new-kernel');
+
+    const headers = [...container.querySelectorAll('thead th')].map((th) => th.textContent);
+    expect(headers.some((h) => h?.includes('2025-04-17'))).toBe(true);
+  });
+
+  it('renders scalar rows even when the primary Pareto row is not computable', () => {
+    // Only 1 point on the reference side — too few for a Pareto front.
+    const data = [
+      pt(10, 300, 'h100', '2025-04-17', { ttft: 0.05 }),
+      pt(10, 150, 'h100', '2025-04-10', { ttft: 0.1 }),
+    ];
+    renderUi(<ParetoUpliftPanel {...baseProps} data={data} selectedDates={['2025-04-10']} />);
+    const rows = [...container.querySelectorAll('tbody tr')];
+    const labels = rows.map((r) => r.querySelectorAll('td')[1].textContent);
+    expect(labels).not.toContain('Token Throughput per GPU');
+    expect(labels).toContain('Median TTFT');
+  });
+
+  it('hides rows whose hwKey has no usable historical overlap', () => {
+    const data = [
+      pt(10, 300, 'h100', '2025-04-17', { ttft: 0.05 }),
+      pt(20, 200, 'h100', '2025-04-17', { ttft: 0.04 }),
+      // b200 has main-date data but no historical date data.
+      pt(10, 300, 'b200', '2025-04-17', { ttft: 0.05 }),
+      pt(20, 200, 'b200', '2025-04-17', { ttft: 0.04 }),
+      pt(10, 150, 'h100', '2025-04-10', { ttft: 0.1 }),
+      pt(20, 100, 'h100', '2025-04-10', { ttft: 0.08 }),
+    ];
+    renderUi(<ParetoUpliftPanel {...baseProps} data={data} selectedDates={['2025-04-10']} />);
+    const gpuLabels = [...container.querySelectorAll('tbody tr')]
+      .map((r) => r.querySelectorAll('td')[0].textContent)
+      .filter((s) => s && s.length > 0);
+    expect(gpuLabels).toContain('H100 (TRT)');
+    expect(gpuLabels).not.toContain('B200 (TRT)');
+  });
+});
diff --git a/packages/app/src/components/inference/ui/ParetoUpliftPanel.tsx b/packages/app/src/components/inference/ui/ParetoUpliftPanel.tsx
new file mode 100644
index 00000000..289f5f2e
--- /dev/null
+++ b/packages/app/src/components/inference/ui/ParetoUpliftPanel.tsx
@@ -0,0 +1,418 @@
+'use client';
+
+import { Info } from 'lucide-react';
+import { useMemo } from 'react';
+
+import type {
+  ChartDefinition,
+  HardwareConfig,
+  InferenceData,
+  OverlayData,
+} from '@/components/inference/types';
+import {
+  TooltipContent,
+  TooltipProvider,
+  TooltipRoot,
+  TooltipTrigger,
+} from '@/components/ui/tooltip';
+import {
+  computeMeanUplift,
+  computeUplift,
+  formatUpliftPercent,
+  type MeanUpliftResult,
+  type RooflineDirection,
+  type UpliftResult,
+} from '@/lib/pareto-uplift';
+import { getModelSortIndex } from '@/lib/constants';
+import { getDisplayLabel } from '@/lib/utils';
+
+interface ParetoUpliftPanelProps {
+  /** Chart data (already filtered by useChartData — includes main + comparison-date points). */
+  data: InferenceData[];
+  chartDefinition: ChartDefinition;
+  selectedYAxisMetric: string;
+  hardwareConfig: HardwareConfig;
+  activeHwTypes: Set<string>;
+  activeDates: Set<string>;
+  selectedPrecisions: string[];
+  /** Main (official) run date. Used as the reference when no unofficial overlay is present. */
+  selectedRunDate: string;
+  /** Discrete comparison dates selected by the user. */
+  selectedDates: string[];
+  /** Range endpoints, used when the user picks a date range instead of individual dates. */
+  selectedDateRange: { startDate: string; endDate: string };
+  isTimelineMode: boolean;
+  /** When an unofficial PR run is active, these points become the reference instead of the main date. */
+  overlayData?: OverlayData;
+  chartType: string;
+}
+
+interface ColumnSpec {
+  id: string;
+  label: string;
+  hint?: string;
+  date: string;
+}
+
+/** Scalar time / interactivity stats added beneath each GPU's primary Pareto row. */
+interface ScalarMetric {
+  id: string;
+  label: string;
+  field: keyof InferenceData;
+  higherIsBetter: boolean;
+  /** Unit shown in the tooltip. */
+  unit: string;
+}
+
+const SCALAR_METRICS: ScalarMetric[] = [
+  {
+    id: 'median_ttft',
+    label: 'Median TTFT',
+    field: 'median_ttft',
+    higherIsBetter: false,
+    unit: 's',
+  },
+  { id: 'p99_ttft', label: 'P99 TTFT', field: 'p99_ttft', higherIsBetter: false, unit: 's' },
+  {
+    id: 'median_tpot',
+    label: 'Median TPOT',
+    field: 'median_tpot',
+    higherIsBetter: false,
+    unit: 's',
+  },
+  {
+    id: 'median_e2el',
+    label: 'Median E2EL',
+    field: 'median_e2el',
+    higherIsBetter: false,
+    unit: 's',
+  },
+  {
+    id: 'median_intvty',
+    label: 'Interactivity',
+    field: 'median_intvty',
+    higherIsBetter: true,
+    unit: 'tok/s/user',
+  },
+];
+
+type CellValue =
+  | { kind: 'pareto'; uplift: UpliftResult }
+  | { kind: 'mean'; uplift: MeanUpliftResult; metric: ScalarMetric }
+  | null;
+
+interface Row {
+  rowKey: string;
+  /** GPU display label shown only on the first row of each group. */
+  gpuLabel: string;
+  /** Metric label shown in the "Metric" column. */
+  metricLabel: string;
+  /** True for the first row of each GPU group (used for a subtle top border). */
+  isFirstInGroup: boolean;
+  cells: { columnId: string; value: CellValue }[];
+}
+
+/**
+ * Historical uplift table: rows = (GPU × metric), columns = comparison dates.
+ *
+ * Per GPU: one "primary" row for the chart's Pareto-curve uplift plus one scalar row per time
+ * stat (Median/P99 TTFT, Median TPOT, Median E2EL, Median Interactivity). Scalar cells compare
+ * the arithmetic mean of each metric between the reference and the historical date, normalized
+ * so &gt;1 always reads "reference is better".
+ *
+ * Reference = unofficial PR overlay when present, else the main run date. Built for the PR
+ * review workflow — "does my branch regress H100 Dynamo-TRT vs last Friday's main?"
+ */
+export default function ParetoUpliftPanel({
+  data,
+  chartDefinition,
+  selectedYAxisMetric,
+  hardwareConfig,
+  activeHwTypes,
+  activeDates,
+  selectedPrecisions,
+  selectedRunDate,
+  selectedDates,
+  selectedDateRange,
+  isTimelineMode,
+  overlayData,
+  chartType,
+}: ParetoUpliftPanelProps) {
+  const rooflineDir = chartDefinition[
+    `${selectedYAxisMetric}_roofline` as keyof ChartDefinition
+  ] as RooflineDirection | undefined;
+
+  const primaryLabel = useMemo(() => {
+    const titleKey = `${selectedYAxisMetric}_title` as keyof ChartDefinition;
+    const title = chartDefinition[titleKey];
+    return typeof title === 'string' && title.length > 0 ? title : 'Primary';
+  }, [chartDefinition, selectedYAxisMetric]);
+
+  const hasUnofficial = Boolean(overlayData && overlayData.data.length > 0);
+
+  const visibleData = useMemo(
+    () =>
+      data.filter((d) => {
+        if (isTimelineMode) return activeDates.has(`${d.date}_${d.hwKey}`);
+        return activeHwTypes.has(d.hwKey) && selectedPrecisions.includes(d.precision ?? '');
+      }),
+    [data, isTimelineMode, activeDates, activeHwTypes, selectedPrecisions],
+  );
+
+  const columns = useMemo<ColumnSpec[]>(() => {
+    const seen = new Set<string>();
+    const cols: ColumnSpec[] = [];
+    const push = (date: string, hint?: string) => {
+      if (!date || seen.has(date)) return;
+      seen.add(date);
+      cols.push({ id: date, date, label: date, hint });
+    };
+    if (hasUnofficial && selectedRunDate) push(selectedRunDate, 'main');
+    if (selectedDateRange.startDate) push(selectedDateRange.startDate, 'range start');
+    if (selectedDateRange.endDate) push(selectedDateRange.endDate, 'range end');
+    for (const d of selectedDates) push(d);
+    return cols.toSorted((a, b) => b.date.localeCompare(a.date));
+  }, [hasUnofficial, selectedRunDate, selectedDates, selectedDateRange]);
+
+  const { referenceByHw, historyByKey } = useMemo(() => {
+    const refByHw = new Map<string, InferenceData[]>();
+    const histByKey = new Map<string, InferenceData[]>();
+
+    if (hasUnofficial && overlayData) {
+      for (const p of overlayData.data) {
+        if (!activeHwTypes.has(p.hwKey)) continue;
+        const arr = refByHw.get(p.hwKey);
+        if (arr) arr.push(p);
+        else refByHw.set(p.hwKey, [p]);
+      }
+    }
+
+    for (const p of visibleData) {
+      if (!hasUnofficial && p.date === selectedRunDate) {
+        const arr = refByHw.get(p.hwKey);
+        if (arr) arr.push(p);
+        else refByHw.set(p.hwKey, [p]);
+      }
+      const isHistorical = hasUnofficial ? true : p.date !== selectedRunDate;
+      if (isHistorical) {
+        const key = `${p.hwKey}|${p.date}`;
+        const arr = histByKey.get(key);
+        if (arr) arr.push(p);
+        else histByKey.set(key, [p]);
+      }
+    }
+    return { referenceByHw: refByHw, historyByKey: histByKey };
+  }, [visibleData, overlayData, hasUnofficial, activeHwTypes, selectedRunDate]);
+
+  const rows = useMemo<Row[]>(() => {
+    if (columns.length === 0 || referenceByHw.size === 0) return [];
+
+    const sortedHwKeys = [...referenceByHw.keys()].toSorted(
+      (a, b) => getModelSortIndex(a) - getModelSortIndex(b) || a.localeCompare(b),
+    );
+
+    const result: Row[] = [];
+    for (const hwKey of sortedHwKeys) {
+      const refPoints = referenceByHw.get(hwKey)!;
+      const cfg = hardwareConfig[hwKey];
+      const gpuLabel = cfg ? getDisplayLabel({ label: cfg.label, suffix: cfg.suffix }) : hwKey;
+
+      const groupRows: Row[] = [];
+
+      // Primary Pareto-uplift row (only when the chart has a roofline direction AND enough ref pts).
+      if (rooflineDir && refPoints.length >= 2) {
+        const cells = columns.map((col) => {
+          const histPoints = historyByKey.get(`${hwKey}|${col.date}`);
+          if (!histPoints || histPoints.length < 2) {
+            return { columnId: col.id, value: null };
+          }
+          const uplift = computeUplift(histPoints, refPoints, rooflineDir);
+          return {
+            columnId: col.id,
+            value:
+              Number.isFinite(uplift.geomean) && uplift.samples.length > 0
+                ? ({ kind: 'pareto', uplift } as const)
+                : null,
+          };
+        });
+        if (cells.some((c) => c.value !== null)) {
+          groupRows.push({
+            rowKey: `${hwKey}|__primary`,
+            gpuLabel,
+            metricLabel: primaryLabel,
+            isFirstInGroup: true,
+            cells,
+          });
+        }
+      }
+
+      // Scalar rows: arithmetic-mean ratio per metric.
+      for (const metric of SCALAR_METRICS) {
+        const cells = columns.map((col) => {
+          const histPoints = historyByKey.get(`${hwKey}|${col.date}`);
+          if (!histPoints || histPoints.length === 0) {
+            return { columnId: col.id, value: null };
+          }
+          const uplift = computeMeanUplift(
+            histPoints,
+            refPoints,
+            metric.field,
+            metric.higherIsBetter,
+          );
+          return {
+            columnId: col.id,
+            value: Number.isFinite(uplift.ratio)
+              ? ({ kind: 'mean', uplift, metric } as const)
+              : null,
+          };
+        });
+        if (cells.some((c) => c.value !== null)) {
+          groupRows.push({
+            rowKey: `${hwKey}|${metric.id}`,
+            gpuLabel,
+            metricLabel: metric.label,
+            isFirstInGroup: groupRows.length === 0,
+            cells,
+          });
+        }
+      }
+
+      if (groupRows.length > 0) result.push(...groupRows);
+    }
+    return result;
+  }, [referenceByHw, historyByKey, columns, rooflineDir, hardwareConfig, primaryLabel]);
+
+  if (!hasUnofficial && columns.length === 0) return null;
+  if (rows.length === 0) return null;
+
+  const referenceLabel =
+    hasUnofficial && overlayData
+      ? `Reference: PR · ${overlayData.label}`
+      : `Reference: ${selectedRunDate || 'current'}`;
+
+  return (
+    <div
+      data-testid="pareto-uplift-panel"
+      data-chart-type={chartType}
+      className="mt-3 rounded-md border border-border/40 bg-muted/20 p-3 text-sm"
+    >
+      <div className="mb-2 flex flex-wrap items-center gap-x-3 gap-y-1">
+        <div className="flex items-center gap-1.5">
+          <span className="font-medium">Performance uplift</span>
+          <TooltipProvider>
+            <TooltipRoot delayDuration={150}>
+              <TooltipTrigger asChild>
+                <button
+                  type="button"
+                  className="text-muted-foreground hover:text-foreground transition-colors cursor-help"
+                  aria-label="About this metric"
+                >
+                  <Info className="size-3.5" />
+                </button>
+              </TooltipTrigger>
+              <TooltipContent className="max-w-xs">
+                <p className="text-xs leading-relaxed">
+                  How the reference set (PR overlay or current run) compares to each historical
+                  date, per GPU+framework. The first row per GPU is the Pareto-curve uplift for the
+                  chart's y-metric (geomean of per-SLA ratios across the x-overlap). The following
+                  rows compare the arithmetic mean of each time stat. All cells are
+                  direction-normalized so &gt;1 = reference is better.
+                </p>
+              </TooltipContent>
+            </TooltipRoot>
+          </TooltipProvider>
+        </div>
+        <span className="text-xs text-muted-foreground">{referenceLabel}</span>
+      </div>
+      <div className="overflow-x-auto">
+        <table className="w-full border-collapse text-xs">
+          <thead>
+            <tr className="border-b border-border/40">
+              <th className="sticky left-0 bg-muted/20 text-left font-medium text-muted-foreground py-1 pr-3">
+                GPU
+              </th>
+              <th className="text-left font-medium text-muted-foreground py-1 pr-3">Metric</th>
+              {columns.map((col) => (
+                <th
+                  key={col.id}
+                  className="text-right font-medium text-muted-foreground py-1 px-2 whitespace-nowrap"
+                >
+                  vs {col.label}
+                  {col.hint && <span className="ml-1 text-[10px] opacity-70">({col.hint})</span>}
+                </th>
+              ))}
+            </tr>
+          </thead>
+          <tbody>
+            {rows.map((row) => (
+              <tr
+                key={row.rowKey}
+                className={row.isFirstInGroup ? 'border-t border-border/40' : ''}
+              >
+                <td className="sticky left-0 bg-muted/20 py-1 pr-3 truncate max-w-[14rem]">
+                  {row.isFirstInGroup ? row.gpuLabel : ''}
+                </td>
+                <td className="py-1 pr-3 text-muted-foreground whitespace-nowrap">
+                  {row.metricLabel}
+                </td>
+                {row.cells.map((cell) => (
+                  <UpliftCell key={cell.columnId} value={cell.value} />
+                ))}
+              </tr>
+            ))}
+          </tbody>
+        </table>
+      </div>
+    </div>
+  );
+}
+
+function UpliftCell({ value }: { value: CellValue }) {
+  if (!value) {
+    return <td className="py-1 px-2 text-right text-muted-foreground">—</td>;
+  }
+  const ratio = value.kind === 'pareto' ? value.uplift.geomean : value.uplift.ratio;
+  const pct = formatUpliftPercent(ratio);
+  const isBetter = ratio > 1.0005;
+  const isWorse = ratio < 0.9995;
+  const color = isBetter ? 'text-emerald-500' : isWorse ? 'text-red-500' : 'text-muted-foreground';
+
+  const tooltip =
+    value.kind === 'pareto' ? (
+      <p className="text-xs">
+        Pareto geomean across {value.uplift.samples.length} SLA samples, covering{' '}
+        {Math.round(value.uplift.coverage * 100)}% of the union x-range.
+        {value.uplift.coverage < 0.5 && ' ⚠ Narrow overlap — inspect the curves.'}
+      </p>
+    ) : (
+      <p className="text-xs">
+        Mean reference: {formatValue(value.uplift.meanCandidate, value.metric.unit)} · Mean
+        historical: {formatValue(value.uplift.meanBaseline, value.metric.unit)}
+        <br />
+        Based on {value.uplift.countCandidate} ref / {value.uplift.countBaseline} historical points.
+      </p>
+    );
+
+  return (
+    <td className="py-1 px-2 text-right">
+      <TooltipProvider>
+        <TooltipRoot delayDuration={150}>
+          <TooltipTrigger asChild>
+            <span className={`font-mono tabular-nums ${color} cursor-help`}>{pct}</span>
+          </TooltipTrigger>
+          <TooltipContent className="max-w-xs">{tooltip}</TooltipContent>
+        </TooltipRoot>
+      </TooltipProvider>
+    </td>
+  );
+}
+
+function formatValue(v: number, unit: string): string {
+  if (!Number.isFinite(v)) return '—';
+  if (unit === 's') {
+    if (v < 0.001) return `${(v * 1_000_000).toFixed(0)} µs`;
+    if (v < 1) return `${(v * 1000).toFixed(1)} ms`;
+    return `${v.toFixed(2)} s`;
+  }
+  return `${v.toFixed(1)} ${unit}`;
+}
diff --git a/packages/app/src/lib/pareto-uplift.test.ts b/packages/app/src/lib/pareto-uplift.test.ts
new file mode 100644
index 00000000..cf01fca2
--- /dev/null
+++ b/packages/app/src/lib/pareto-uplift.test.ts
@@ -0,0 +1,262 @@
+import { describe, expect, it } from 'vitest';
+
+import type { InferenceData } from '@/components/inference/types';
+
+import {
+  computeMeanUplift,
+  computeUplift,
+  formatUpliftPercent,
+  interpolateY,
+  sampleSLAs,
+  yHigherIsBetter,
+} from './pareto-uplift';
+
+function pt(x: number, y: number, hwKey = 'h100', date = '2025-01-01'): InferenceData {
+  return {
+    date,
+    x,
+    y,
+    tp: 1,
+    conc: 1,
+    hwKey,
+    precision: 'fp8',
+    tpPerGpu: { y, roof: false },
+    tpPerMw: { y, roof: false },
+    costh: { y, roof: false },
+    costn: { y, roof: false },
+    costr: { y, roof: false },
+    costhi: { y, roof: false },
+    costni: { y, roof: false },
+    costri: { y, roof: false },
+  };
+}
+
+describe('interpolateY', () => {
+  it('returns null for empty curve', () => {
+    expect(interpolateY([], 1)).toBeNull();
+  });
+
+  it('returns null when x is out of range', () => {
+    const curve = [
+      { x: 1, y: 10 },
+      { x: 2, y: 20 },
+    ];
+    expect(interpolateY(curve, 0.5)).toBeNull();
+    expect(interpolateY(curve, 2.5)).toBeNull();
+  });
+
+  it('returns the exact y at endpoints', () => {
+    const curve = [
+      { x: 1, y: 10 },
+      { x: 2, y: 20 },
+    ];
+    expect(interpolateY(curve, 1)).toBe(10);
+    expect(interpolateY(curve, 2)).toBe(20);
+  });
+
+  it('linearly interpolates between two points', () => {
+    const curve = [
+      { x: 0, y: 0 },
+      { x: 10, y: 100 },
+    ];
+    expect(interpolateY(curve, 5)).toBe(50);
+    expect(interpolateY(curve, 2.5)).toBe(25);
+  });
+
+  it('interpolates across multi-segment curves', () => {
+    const curve = [
+      { x: 0, y: 0 },
+      { x: 10, y: 100 },
+      { x: 20, y: 110 },
+    ];
+    expect(interpolateY(curve, 5)).toBe(50);
+    expect(interpolateY(curve, 15)).toBe(105);
+  });
+});
+
+describe('sampleSLAs', () => {
+  it('returns 5 log-spaced points over a 100x range', () => {
+    const slas = sampleSLAs(1, 100, 5);
+    expect(slas).toHaveLength(5);
+    expect(slas[0]).toBeCloseTo(1);
+    expect(slas[4]).toBeCloseTo(100);
+    expect(slas[2]).toBeCloseTo(10);
+  });
+
+  it('returns linear-spaced points when the range is <10x', () => {
+    const slas = sampleSLAs(10, 50, 5);
+    expect(slas).toHaveLength(5);
+    expect(slas[0]).toBeCloseTo(10);
+    expect(slas[2]).toBeCloseTo(30);
+    expect(slas[4]).toBeCloseTo(50);
+  });
+
+  it('returns [] for invalid ranges', () => {
+    expect(sampleSLAs(0, 10, 5)).toEqual([]);
+    expect(sampleSLAs(10, 10, 5)).toEqual([]);
+    expect(sampleSLAs(-1, 10, 5)).toEqual([]);
+  });
+
+  it('returns geometric midpoint for n=1', () => {
+    expect(sampleSLAs(1, 100, 1)).toEqual([10]);
+  });
+});
+
+describe('yHigherIsBetter', () => {
+  it('is true for upper_* directions', () => {
+    expect(yHigherIsBetter('upper_left')).toBe(true);
+    expect(yHigherIsBetter('upper_right')).toBe(true);
+  });
+
+  it('is false for lower_* directions', () => {
+    expect(yHigherIsBetter('lower_left')).toBe(false);
+    expect(yHigherIsBetter('lower_right')).toBe(false);
+  });
+});
+
+describe('computeUplift', () => {
+  it('returns 1.0 geomean for identical curves', () => {
+    const a = [pt(10, 100), pt(20, 200), pt(30, 250)];
+    const b = [pt(10, 100), pt(20, 200), pt(30, 250)];
+    const r = computeUplift(a, b, 'upper_right');
+    expect(r.geomean).toBeCloseTo(1, 5);
+    expect(r.coverage).toBe(1);
+    expect(r.samples.length).toBeGreaterThan(0);
+  });
+
+  it('returns ~2.0 for a candidate uniformly 2x baseline throughput (upper_right)', () => {
+    const baseline = [pt(10, 100), pt(20, 150), pt(30, 180)];
+    const candidate = [pt(10, 200), pt(20, 300), pt(30, 360)];
+    const r = computeUplift(baseline, candidate, 'upper_right');
+    expect(r.geomean).toBeCloseTo(2, 2);
+  });
+
+  it('inverts ratio for lower-is-better metrics (cost)', () => {
+    // lower_left front shape: y decreases as x increases. Candidate has half the y everywhere
+    // → ratio (yBaseline / yCandidate) = 2, meaning candidate is 2x "better".
+    const baseline = [pt(1, 4), pt(2, 2), pt(3, 1)];
+    const candidate = [pt(1, 2), pt(2, 1), pt(3, 0.5)];
+    const r = computeUplift(baseline, candidate, 'lower_left');
+    expect(r.geomean).toBeCloseTo(2, 2);
+  });
+
+  it('returns NaN geomean and coverage=0 for disjoint x-ranges', () => {
+    const a = [pt(1, 10), pt(2, 20)];
+    const b = [pt(10, 100), pt(20, 200)];
+    const r = computeUplift(a, b, 'upper_right');
+    expect(Number.isNaN(r.geomean)).toBe(true);
+    expect(r.coverage).toBe(0);
+    expect(r.samples).toHaveLength(0);
+  });
+
+  it('reports partial coverage for partially-overlapping curves', () => {
+    // baseline spans [1, 10]; candidate spans [5, 20]. Overlap = [5, 10], union = [1, 20].
+    const baseline = [pt(1, 10), pt(5, 50), pt(10, 100)];
+    const candidate = [pt(5, 50), pt(10, 100), pt(20, 150)];
+    const r = computeUplift(baseline, candidate, 'upper_right');
+    expect(r.coverage).toBeCloseTo(5 / 19, 3);
+    expect(r.geomean).toBeCloseTo(1, 2);
+    expect(r.overlapRange).toEqual({ min: 5, max: 10 });
+  });
+
+  it('handles empty inputs', () => {
+    const r = computeUplift([], [pt(1, 1)], 'upper_right');
+    expect(r.baselineFrontSize).toBe(0);
+    expect(Number.isNaN(r.geomean)).toBe(true);
+  });
+
+  it('does not mutate input arrays', () => {
+    const a = [pt(30, 300), pt(10, 100), pt(20, 200)];
+    const aSnapshot = a.map((p) => ({ x: p.x, y: p.y }));
+    const b = [pt(10, 120), pt(20, 220), pt(30, 320)];
+    computeUplift(a, b, 'upper_right');
+    expect(a.map((p) => ({ x: p.x, y: p.y }))).toEqual(aSnapshot);
+  });
+
+  it('filters non-dominated points before sampling (picks Pareto front only)', () => {
+    // Baseline has a dominated point at (15, 50) that should not affect the curve.
+    const baseline = [pt(10, 100), pt(15, 50), pt(20, 150), pt(30, 180)];
+    const candidate = [pt(10, 200), pt(20, 300), pt(30, 360)];
+    const r = computeUplift(baseline, candidate, 'upper_right');
+    expect(r.geomean).toBeCloseTo(2, 2);
+    expect(r.baselineFrontSize).toBeLessThan(baseline.length);
+  });
+});
+
+describe('computeMeanUplift', () => {
+  // Build a point with a specific ttft value; other fields use defaults.
+  const pointWithTtft = (ttft: number): InferenceData => ({
+    ...pt(1, 1),
+    median_ttft: ttft,
+  });
+
+  it('returns 1.0 for identical averages', () => {
+    const baseline = [pointWithTtft(0.5), pointWithTtft(1)];
+    const candidate = [pointWithTtft(0.5), pointWithTtft(1)];
+    const r = computeMeanUplift(baseline, candidate, 'median_ttft', false);
+    expect(r.ratio).toBe(1);
+    expect(r.meanBaseline).toBe(0.75);
+    expect(r.meanCandidate).toBe(0.75);
+  });
+
+  it('inverts ratio for lower-is-better fields (TTFT)', () => {
+    const baseline = [pointWithTtft(1), pointWithTtft(1)]; // mean 1.0
+    const candidate = [pointWithTtft(0.5), pointWithTtft(0.5)]; // mean 0.5 (better)
+    const r = computeMeanUplift(baseline, candidate, 'median_ttft', false);
+    expect(r.ratio).toBe(2); // candidate (ref) is 2x better
+  });
+
+  it('keeps ratio unflipped for higher-is-better fields (interactivity)', () => {
+    const baseline = [
+      { ...pt(1, 1), median_intvty: 10 },
+      { ...pt(1, 1), median_intvty: 20 },
+    ];
+    const candidate = [
+      { ...pt(1, 1), median_intvty: 30 },
+      { ...pt(1, 1), median_intvty: 30 },
+    ];
+    const r = computeMeanUplift(baseline, candidate, 'median_intvty', true);
+    expect(r.ratio).toBe(2);
+  });
+
+  it('skips non-finite / non-positive values when computing the mean', () => {
+    const baseline = [
+      pointWithTtft(1),
+      pointWithTtft(Number.NaN),
+      pointWithTtft(-5),
+      pointWithTtft(3),
+    ];
+    const candidate = [pointWithTtft(1), pointWithTtft(3)];
+    const r = computeMeanUplift(baseline, candidate, 'median_ttft', false);
+    expect(r.countBaseline).toBe(2);
+    expect(r.meanBaseline).toBe(2);
+    expect(r.ratio).toBe(1);
+  });
+
+  it('returns NaN ratio when either side has no usable values', () => {
+    const baseline = [pointWithTtft(1)];
+    const candidate = [pointWithTtft(Number.NaN)];
+    const r = computeMeanUplift(baseline, candidate, 'median_ttft', false);
+    expect(Number.isNaN(r.ratio)).toBe(true);
+  });
+});
+
+describe('formatUpliftPercent', () => {
+  it('formats positive uplift', () => {
+    expect(formatUpliftPercent(1.173)).toBe('+17.3%');
+  });
+
+  it('formats negative uplift with a minus sign', () => {
+    expect(formatUpliftPercent(0.83)).toBe('−17.0%');
+  });
+
+  it('returns "parity" for ratios near 1', () => {
+    expect(formatUpliftPercent(1)).toBe('parity');
+    expect(formatUpliftPercent(0.9999)).toBe('parity');
+  });
+
+  it('returns em dash for non-finite values', () => {
+    expect(formatUpliftPercent(NaN)).toBe('—');
+    expect(formatUpliftPercent(Infinity)).toBe('—');
+  });
+});
diff --git a/packages/app/src/lib/pareto-uplift.ts b/packages/app/src/lib/pareto-uplift.ts
new file mode 100644
index 00000000..f9e1da47
--- /dev/null
+++ b/packages/app/src/lib/pareto-uplift.ts
@@ -0,0 +1,235 @@
+import type { InferenceData } from '@/components/inference/types';
+
+import {
+  paretoFrontLowerLeft,
+  paretoFrontLowerRight,
+  paretoFrontUpperLeft,
+  paretoFrontUpperRight,
+} from './chart-utils';
+
+export type RooflineDirection = 'upper_left' | 'upper_right' | 'lower_left' | 'lower_right';
+
+export interface ParetoPoint {
+  x: number;
+  y: number;
+}
+
+export interface UpliftSample {
+  x: number;
+  yBaseline: number;
+  yCandidate: number;
+  ratio: number;
+}
+
+export interface UpliftResult {
+  /**
+   * Geometric mean of per-SLA ratios, direction-normalized so >1 always means "candidate
+   * better" and <1 always means "baseline better", regardless of whether y is higher-is-better
+   * (throughput) or lower-is-better (cost, energy).
+   */
+  geomean: number;
+  samples: UpliftSample[];
+  /** x-range where both Pareto fronts overlap and SLAs are sampled. */
+  overlapRange: { min: number; max: number } | null;
+  /** overlap / union of the two x-ranges. 0 = disjoint, 1 = identical bounds. */
+  coverage: number;
+  baselineFrontSize: number;
+  candidateFrontSize: number;
+}
+
+const FRONT_FNS: Record<RooflineDirection, (pts: InferenceData[]) => InferenceData[]> = {
+  upper_left: paretoFrontUpperLeft,
+  upper_right: paretoFrontUpperRight,
+  lower_left: paretoFrontLowerLeft,
+  lower_right: paretoFrontLowerRight,
+};
+
+export function yHigherIsBetter(dir: RooflineDirection): boolean {
+  return dir === 'upper_left' || dir === 'upper_right';
+}
+
+/** Linear interpolation of y at x along a curve sorted ascending by x. Returns null out of range. */
+export function interpolateY(curve: ParetoPoint[], x: number): number | null {
+  if (curve.length === 0) return null;
+  if (curve.length === 1) return curve[0].x === x ? curve[0].y : null;
+  const first = curve[0];
+  const last = curve.at(-1)!;
+  if (x < first.x || x > last.x) return null;
+  for (let i = 0; i < curve.length - 1; i++) {
+    const a = curve[i];
+    const b = curve[i + 1];
+    if (x >= a.x && x <= b.x) {
+      if (b.x === a.x) return (a.y + b.y) / 2;
+      const t = (x - a.x) / (b.x - a.x);
+      return a.y + t * (b.y - a.y);
+    }
+  }
+  return null;
+}
+
+/**
+ * Sample n points across [min, max]. Uses log-spacing when the range spans >10×
+ * (so SLAs distribute evenly across orders of magnitude).
+ */
+export function sampleSLAs(min: number, max: number, n: number): number[] {
+  if (!(min > 0) || !(max > 0) || min >= max || n < 1) return [];
+  if (n === 1) return [Math.sqrt(min * max)];
+  const useLog = Math.log10(max / min) > 1;
+  const pts: number[] = [];
+  for (let i = 0; i < n; i++) {
+    const t = i / (n - 1);
+    pts.push(useLog ? min * (max / min) ** t : min + t * (max - min));
+  }
+  return pts;
+}
+
+/** Compute Pareto front from raw points and return sorted ascending by x. */
+function computeFront(points: InferenceData[], direction: RooflineDirection): ParetoPoint[] {
+  if (points.length === 0) return [];
+  // paretoFront* mutates via .sort; clone to avoid side effects on the caller's array.
+  const front = FRONT_FNS[direction]([...points]);
+  return front
+    .map((p) => ({ x: p.x, y: p.y }))
+    .filter((p) => Number.isFinite(p.x) && Number.isFinite(p.y))
+    .toSorted((a, b) => a.x - b.x);
+}
+
+/**
+ * Compare two Pareto curves and return a single-number geomean uplift ratio.
+ *
+ * Algorithm: compute Pareto fronts for each set, find x-overlap, sample N SLAs across the
+ * overlap, linearly interpolate y on each front, and take the geometric mean of per-SLA
+ * (candidate/baseline) ratios. Ratio is inverted for lower-is-better metrics so the result
+ * always reads "candidate performance relative to baseline" (>1 = better, <1 = worse).
+ */
+export function computeUplift(
+  baselinePts: InferenceData[],
+  candidatePts: InferenceData[],
+  direction: RooflineDirection,
+  slaCount = 5,
+): UpliftResult {
+  const baseline = computeFront(baselinePts, direction);
+  const candidate = computeFront(candidatePts, direction);
+
+  const empty: UpliftResult = {
+    geomean: NaN,
+    samples: [],
+    overlapRange: null,
+    coverage: 0,
+    baselineFrontSize: baseline.length,
+    candidateFrontSize: candidate.length,
+  };
+
+  if (baseline.length === 0 || candidate.length === 0) return empty;
+
+  const bMin = baseline[0].x;
+  const bMax = baseline.at(-1)!.x;
+  const cMin = candidate[0].x;
+  const cMax = candidate.at(-1)!.x;
+
+  const overlapMin = Math.max(bMin, cMin);
+  const overlapMax = Math.min(bMax, cMax);
+  const unionMin = Math.min(bMin, cMin);
+  const unionMax = Math.max(bMax, cMax);
+  const coverage =
+    unionMax > unionMin ? Math.max(0, (overlapMax - overlapMin) / (unionMax - unionMin)) : 0;
+
+  if (overlapMin >= overlapMax) {
+    return { ...empty, coverage };
+  }
+
+  const slas = sampleSLAs(overlapMin, overlapMax, slaCount);
+  const higherIsBetter = yHigherIsBetter(direction);
+
+  const samples: UpliftSample[] = [];
+  let logSum = 0;
+  for (const x of slas) {
+    const yB = interpolateY(baseline, x);
+    const yC = interpolateY(candidate, x);
+    if (yB === null || yC === null || yB <= 0 || yC <= 0) continue;
+    const ratio = higherIsBetter ? yC / yB : yB / yC;
+    samples.push({ x, yBaseline: yB, yCandidate: yC, ratio });
+    logSum += Math.log(ratio);
+  }
+
+  if (samples.length === 0) {
+    return {
+      ...empty,
+      overlapRange: { min: overlapMin, max: overlapMax },
+      coverage,
+    };
+  }
+
+  return {
+    geomean: Math.exp(logSum / samples.length),
+    samples,
+    overlapRange: { min: overlapMin, max: overlapMax },
+    coverage,
+    baselineFrontSize: baseline.length,
+    candidateFrontSize: candidate.length,
+  };
+}
+
+export interface MeanUpliftResult {
+  /** Direction-normalized ratio (>1 = reference is better on this metric). */
+  ratio: number;
+  meanBaseline: number;
+  meanCandidate: number;
+  countBaseline: number;
+  countCandidate: number;
+}
+
+/**
+ * Compare the arithmetic mean of a scalar field between two point sets. Returned ratio is
+ * direction-normalized so >1 always reads "candidate (reference) outperformed baseline".
+ *
+ * Used for the time-stat rows in the uplift table — a simpler "how did the average move?"
+ * signal that does not need a Pareto curve or SLA sampling.
+ */
+export function computeMeanUplift(
+  baselinePts: InferenceData[],
+  candidatePts: InferenceData[],
+  field: keyof InferenceData,
+  higherIsBetter: boolean,
+): MeanUpliftResult {
+  const extract = (pts: InferenceData[]): { mean: number; count: number } => {
+    let sum = 0;
+    let count = 0;
+    for (const p of pts) {
+      const v = p[field];
+      if (typeof v === 'number' && Number.isFinite(v) && v > 0) {
+        sum += v;
+        count++;
+      }
+    }
+    return { count, mean: count > 0 ? sum / count : NaN };
+  };
+  const b = extract(baselinePts);
+  const c = extract(candidatePts);
+  if (!Number.isFinite(b.mean) || !Number.isFinite(c.mean) || b.mean <= 0 || c.mean <= 0) {
+    return {
+      ratio: NaN,
+      meanBaseline: b.mean,
+      meanCandidate: c.mean,
+      countBaseline: b.count,
+      countCandidate: c.count,
+    };
+  }
+  const ratio = higherIsBetter ? c.mean / b.mean : b.mean / c.mean;
+  return {
+    ratio,
+    meanBaseline: b.mean,
+    meanCandidate: c.mean,
+    countBaseline: b.count,
+    countCandidate: c.count,
+  };
+}
+
+/** Format a geomean ratio as "+17.3%", "−4.1%", or "parity". */
+export function formatUpliftPercent(ratio: number): string {
+  if (!Number.isFinite(ratio)) return '—';
+  const pct = (ratio - 1) * 100;
+  if (Math.abs(pct) < 0.05) return 'parity';
+  const sign = pct > 0 ? '+' : '−';
+  return `${sign}${Math.abs(pct).toFixed(1)}%`;
+}
diff --git a/packages/app/vitest.config.ts b/packages/app/vitest.config.ts
index 374c66e5..85e6fbc6 100644
--- a/packages/app/vitest.config.ts
+++ b/packages/app/vitest.config.ts
@@ -4,7 +4,7 @@ import path from 'path';
 export default defineConfig({
   test: {
     environment: 'node',
-    include: ['src/**/*.test.ts'],
+    include: ['src/**/*.test.{ts,tsx}'],
     coverage: {
       provider: 'v8',
       include: ['src/lib/**/*.ts', 'src/scripts/**/*.ts', 'src/app/api/**/*.ts'],