diff --git a/.gitignore b/.gitignore
index 92aa0ee8..a03616d2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -31,3 +31,5 @@ dist/
 
 # Eval results
 tests/eval-results/
+.next/
+.react-router/
diff --git a/skills/workos-authkit-nextjs/SKILL.md b/skills/workos-authkit-nextjs/SKILL.md
index fb9ff394..215ce099 100644
--- a/skills/workos-authkit-nextjs/SKILL.md
+++ b/skills/workos-authkit-nextjs/SKILL.md
@@ -55,6 +55,45 @@ Next.js version?
 
 Middleware/proxy code: See README for `authkitMiddleware()` export pattern.
 
+### Existing Middleware (IMPORTANT)
+
+If `middleware.ts` already exists with custom logic (rate limiting, logging, headers, etc.), use the **`authkit()` composable function** instead of `authkitMiddleware`.
+
+**Pattern for composing with existing middleware:**
+
+```typescript
+import { NextRequest, NextResponse } from 'next/server';
+import { authkit, handleAuthkitHeaders } from '@workos-inc/authkit-nextjs';
+
+export default async function middleware(request: NextRequest) {
+  // 1. Get auth session and headers from AuthKit
+  const { session, headers, authorizationUrl } = await authkit(request);
+  const { pathname } = request.nextUrl;
+
+  // 2. === YOUR EXISTING MIDDLEWARE LOGIC ===
+  // Rate limiting, logging, custom headers, etc.
+  const rateLimitResult = checkRateLimit(request);
+  if (!rateLimitResult.allowed) {
+    return new NextResponse('Too Many Requests', { status: 429 });
+  }
+
+  // 3. Protect routes - redirect to auth if needed
+  if (pathname.startsWith('/dashboard') && !session.user && authorizationUrl) {
+    return handleAuthkitHeaders(request, headers, { redirect: authorizationUrl });
+  }
+
+  // 4. Continue with AuthKit headers properly handled
+  return handleAuthkitHeaders(request, headers);
+}
+```
+
+**Key functions:**
+- `authkit(request)` - Returns `{ session, headers, authorizationUrl }` for composition
+- `handleAuthkitHeaders(request, headers, options?)` - Ensures AuthKit headers pass through correctly
+- For rewrites, use `partitionAuthkitHeaders()` and `applyResponseHeaders()` (see README)
+
+**Critical:** Always return via `handleAuthkitHeaders()` to ensure `withAuth()` works in pages.
+
 ## Step 5: Create Callback Route
 
 Parse `NEXT_PUBLIC_WORKOS_REDIRECT_URI` to determine route path:
@@ -78,33 +117,57 @@ export const GET = handleAuth();
 
 Check README for exact usage. If build fails with "cookies outside request scope", the handler is likely missing async/await.
 
-## Step 6: Provider Setup
+## Step 6: Provider Setup (REQUIRED)
+
+**CRITICAL:** You MUST wrap the app in `AuthKitProvider` in `app/layout.tsx`.
+
+This is required for:
+- Client-side auth state via `useAuth()` hook
+- Consistent auth UX across client/server boundaries
+- Proper migration from Auth0 (which uses client-side auth)
+
+```tsx
+// app/layout.tsx
+import { AuthKitProvider } from '@workos-inc/authkit-nextjs';
+
+export default function RootLayout({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <body>
+        <AuthKitProvider>{children}</AuthKitProvider>
+      </body>
+    </html>
+  );
+}
+```
+
+Check README for exact import path - it may be a subpath export like `@workos-inc/authkit-nextjs/components`.
 
-Wrap app in `AuthKitProvider` in `app/layout.tsx`. See README for import path.
+**Do NOT skip this step** even if using server-side auth patterns elsewhere.
 
 ## Step 7: UI Integration
 
 Add auth UI to `app/page.tsx` using SDK functions. See README for `getUser`, `getSignInUrl`, `signOut` usage.
 
-## Verification Checklist
+## Verification Checklist (ALL MUST PASS)
 
-Run these commands to confirm integration:
+Run these commands to confirm integration. **Do not mark complete until all pass:**
 
 ```bash
-# Check middleware/proxy exists (one should match)
+# 1. Check middleware/proxy exists (one should match)
 ls proxy.ts middleware.ts src/proxy.ts src/middleware.ts 2>/dev/null
 
-# Check provider is wrapped
-grep -l "AuthKitProvider" app/layout.tsx
+# 2. CRITICAL: Check AuthKitProvider is in layout (REQUIRED)
+grep "AuthKitProvider" app/layout.tsx || echo "FAIL: AuthKitProvider missing from layout"
 
-# Check callback route exists
+# 3. Check callback route exists
 find app -name "route.ts" -path "*/callback/*"
 
-# Build succeeds
+# 4. Build succeeds
 npm run build
 ```
 
-All checks must pass before marking complete.
+**If check #2 fails:** Go back to Step 6 and add AuthKitProvider. This is not optional.
 
 ## Error Recovery
 
diff --git a/skills/workos-authkit-tanstack-start/SKILL.md b/skills/workos-authkit-tanstack-start/SKILL.md
index 5d0e531e..c83ed2d5 100644
--- a/skills/workos-authkit-tanstack-start/SKILL.md
+++ b/skills/workos-authkit-tanstack-start/SKILL.md
@@ -43,6 +43,7 @@ From README, extract:
 ## Directory Structure Detection
 
 **Modern TanStack Start (v1.132+)** uses `src/`:
+
 ```
 src/
 ├── start.ts              # Middleware config (CRITICAL)
@@ -54,6 +55,7 @@ src/
 ```
 
 **Legacy (vinxi-based)** uses `app/`:
+
 ```
 app/
 ├── start.ts or router.tsx
@@ -62,6 +64,7 @@ app/
 ```
 
 **Detection:**
+
 ```bash
 ls src/routes 2>/dev/null && echo "Modern (src/)" || echo "Legacy (app/)"
 ```
@@ -94,6 +97,7 @@ export default {
 ```
 
 Alternative pattern with createStart:
+
 ```typescript
 import { createStart } from '@tanstack/react-start';
 import { authkitMiddleware } from '@workos/authkit-tanstack-react-start';
@@ -132,6 +136,7 @@ export const Route = createFileRoute('/api/auth/callback')({
 ```
 
 **Key points:**
+
 - Use `handleCallbackRoute()` - do not write custom OAuth logic
 - Route path string must match the URI path exactly
 - This is a server-only route (no component needed)
@@ -221,6 +226,7 @@ function Profile() {
 
 **Cause:** Route file path doesn't match WORKOS_REDIRECT_URI
 **Fix:**
+
 - URI `/api/auth/callback` → file `src/routes/api.auth.callback.tsx` (flat) or `app/routes/api/auth/callback.tsx` (nested)
 - Route path string in `createFileRoute()` must match exactly
 
@@ -242,6 +248,7 @@ function Profile() {
 ## SDK Exports Reference
 
 **Server (main export):**
+
 - `authkitMiddleware()` - Request middleware
 - `handleCallbackRoute()` - OAuth callback handler
 - `getAuth()` - Get current session
@@ -250,6 +257,7 @@ function Profile() {
 - `switchToOrganization()` - Change org context
 
 **Client (`/client` subpath):**
+
 - `AuthKitProvider` - Context provider
 - `useAuth()` - Auth state hook
 - `useAccessToken()` - Token management
diff --git a/tests/evals/README.md b/tests/evals/README.md
index 71009ee8..292e29d5 100644
--- a/tests/evals/README.md
+++ b/tests/evals/README.md
@@ -1,6 +1,6 @@
 # Installer Evaluations
 
-Automated evaluation framework for testing WorkOS AuthKit installer skills against realistic project scenarios.
+Automated evaluation framework for testing WorkOS AuthKit installer skills.
 
 ## Quick Start
 
@@ -11,72 +11,137 @@ pnpm eval
 # Run specific framework
 pnpm eval --framework=nextjs
 
-# Run specific scenario
-pnpm eval --framework=react --state=example-auth0
+# Run with quality grading
+pnpm eval --quality
 ```
 
+## Success Criteria
+
+The eval framework validates against these thresholds:
+
+| Metric                  | Threshold |
+| ----------------------- | --------- |
+| First-attempt pass rate | ≥90%      |
+| With-retry pass rate    | ≥95%      |
+
+Use `--no-fail` to run without exit code validation.
+
 ## Test Matrix
 
-The framework tests 10 scenarios (5 frameworks × 2 project states):
+**Scenarios: 24 total (5 frameworks × 4-5 states)**
 
-| State           | Description                                          |
-| --------------- | ---------------------------------------------------- |
-| `example`       | Project with routes, components, custom config       |
-| `example-auth0` | Project with Auth0 authentication already integrated |
+| State                    | Description                       |
+| ------------------------ | --------------------------------- |
+| `example`                | Clean project, no existing auth   |
+| `example-auth0`          | Project with Auth0 to migrate     |
+| `partial-install`        | Half-completed AuthKit attempt    |
+| `typescript-strict`      | Strict TypeScript configuration   |
+| `conflicting-middleware` | Existing middleware to merge      |
 
-| Framework        | Skill                         | Key Checks                                     |
-| ---------------- | ----------------------------- | ---------------------------------------------- |
-| `nextjs`         | workos-authkit-nextjs         | middleware.ts, callback route, AuthKitProvider |
-| `react`          | workos-authkit-react          | AuthKitProvider, callback component, useAuth   |
-| `react-router`   | workos-authkit-react-router   | Auth loader, protected routes                  |
-| `tanstack-start` | workos-authkit-tanstack-start | Server functions, callback route               |
-| `vanilla-js`     | workos-authkit-vanilla-js     | Auth script, callback page                     |
+| Framework        | Skill                         | Key Checks                                      |
+| ---------------- | ----------------------------- | ----------------------------------------------- |
+| `nextjs`         | workos-authkit-nextjs         | middleware.ts, callback route, AuthKitProvider  |
+| `react`          | workos-authkit-react          | AuthKitProvider, callback component, useAuth    |
+| `react-router`   | workos-authkit-react-router   | Auth loader, protected routes                   |
+| `tanstack-start` | workos-authkit-tanstack-start | Server functions, callback route                |
+| `vanilla-js`     | workos-authkit-vanilla-js     | Auth script, callback page                      |
 
 ## CLI Options
 
 ```
---framework=<name>  Filter by framework
+--framework=<name>  Filter by framework (nextjs, react, react-router, tanstack-start, vanilla-js)
 --state=<state>     Filter by project state
---verbose, -v       Show agent tool calls and detailed output
+--quality, -q       Enable LLM-based quality grading
+--verbose, -v       Show agent output and tool calls
 --debug             Extra verbose, preserve temp dirs on failure
 --keep-on-fail      Don't cleanup temp directory when scenario fails
---retry=<n>         Number of retry attempts (default: 2)
+--retry=<n>         Retry attempts (default: 2)
 --no-retry          Disable retries
---json              Output results as JSON
+--no-fail           Don't exit 1 on threshold failure
+--sequential        Run scenarios sequentially (disable parallelism)
+--no-dashboard      Disable live dashboard, use sequential logging
+--json              Output as JSON
 --help, -h          Show help
 ```
 
-## Debugging Failures
+## Quality Grading
 
-### 1. Inspect the failure details
+When enabled with `--quality`, passing scenarios are graded on:
 
-```bash
-pnpm eval --framework=react --state=example-auth0 --verbose
-```
+| Dimension      | Description                         |
+| -------------- | ----------------------------------- |
+| Code Style     | Adherence to project conventions    |
+| Minimalism     | Changes are focused, no extras      |
+| Error Handling | Proper error handling and messages  |
+| Idiomatic      | Follows framework best practices    |
 
-### 2. Preserve the temp directory
+Each dimension scored 1-5. See `quality-rubrics.ts` for detailed rubrics.
 
-```bash
-pnpm eval --framework=react --state=example-auth0 --keep-on-fail
-# Output will show: "Temp directory preserved: /tmp/eval-react-xxxxx"
-```
+## Latency Metrics
 
-### 3. Manually inspect the project state
+Every run tracks:
 
-```bash
-cd /tmp/eval-react-xxxxx
-ls -la
-cat middleware.ts
-```
+- **TTFT**: Time to first token
+- **Agent Thinking**: Time spent deliberating
+- **Tool Execution**: Time in tool calls
+- **Tokens/sec**: Output throughput
 
-### 4. Compare with previous runs
+## Comparing Runs
 
 ```bash
 # List recent runs
 pnpm eval:history
 
+# Show more runs
+pnpm eval:history --limit=20
+
 # Compare two runs
-pnpm eval:compare 2024-01-15T10-30-00 2024-01-16T14-45-00
+pnpm eval:diff 2024-01-15T10-30-00 2024-01-16T14-45-00
+
+# Use 'latest' as alias for most recent run
+pnpm eval:diff latest 2024-01-15T10-30-00
+```
+
+The diff command shows:
+
+- Pass rate changes (first-attempt and with-retry)
+- Skill version changes (with correlation analysis)
+- Scenario regressions/improvements
+- Latency changes (p50, p95)
+- Quality score changes
+
+### Correlation Analysis
+
+When skill files change AND scenarios regress, the diff command highlights likely causes:
+
+```
+Likely Causes:
+  ⚠ nextjs skill changed (03133745 → a1b2c3d4) and 2 scenario(s) regressed
+```
+
+## Results Storage
+
+Results saved to `tests/eval-results/`:
+
+- `{timestamp}.json` - Full results with metadata
+- `latest.json` - Symlink to most recent
+
+Each result file includes:
+
+- Summary (pass rates, scenario counts)
+- Per-scenario results with checks
+- Latency metrics (TTFT, tool breakdown)
+- Quality grades (if enabled)
+- Metadata (skill versions, CLI version, model version)
+
+Prune old results:
+
+```bash
+# Keep only 10 most recent (default)
+pnpm eval:prune
+
+# Keep specific number
+pnpm eval:prune --keep=5
 ```
 
 ## Adding a New Fixture
@@ -135,16 +200,29 @@ checks.push(await this.buildGrader.checkBuild());
 return { passed: checks.every((c) => c.passed), checks };
 ```
 
-## Results Storage
+## Troubleshooting
 
-Results are saved to `tests/eval-results/`:
+### "Build failed" but files look correct
 
-- Each run creates `{timestamp}.json`
-- `latest.json` symlinks to most recent
-- Use `pnpm eval:history` to list runs
-- Use `pnpm eval:compare` to diff runs
+Use `--keep-on-fail` to preserve temp directory and inspect:
 
-## Troubleshooting
+```bash
+pnpm eval --framework=nextjs --keep-on-fail
+cd /tmp/eval-nextjs-xxxxx && pnpm build
+```
+
+### Flaky passes/failures
+
+Increase retries: `pnpm eval --retry=3`
+
+If consistently flaky, check if skill instructions are ambiguous.
+
+### Pass rate regression
+
+1. Run `pnpm eval:diff latest <previous-run>`
+2. Check "Likely Causes" section
+3. Review skill file changes listed
+4. If no skill changes, check for external factors (API changes, dependency updates)
 
 ### "pnpm install failed"
 
@@ -155,21 +233,13 @@ cd tests/fixtures/{framework}/{state}
 pnpm install
 ```
 
-### "Build failed" but files look correct
+### High latency
 
-The agent may have created correct files but with syntax errors. Use `--keep-on-fail` to inspect:
+Check the tool breakdown in the summary output to identify bottlenecks:
 
-```bash
-pnpm eval --framework=nextjs --keep-on-fail
-# Then run build manually in temp dir to see full error
 ```
-
-### Flaky passes/failures
-
-LLM responses vary. Use `--retry=3` for more attempts:
-
-```bash
-pnpm eval --retry=3
+Tool Time Breakdown (total across all scenarios):
+  Bash: 206.5s (27 calls)
+  Read: 54.3s (14 calls)
+  ...
 ```
-
-If a scenario is consistently flaky, check if the skill instructions are ambiguous.
diff --git a/tests/evals/__tests__/latency-tracker.spec.ts b/tests/evals/__tests__/latency-tracker.spec.ts
new file mode 100644
index 00000000..3e0a7412
--- /dev/null
+++ b/tests/evals/__tests__/latency-tracker.spec.ts
@@ -0,0 +1,192 @@
+import { describe, it, expect, beforeEach, vi, afterEach } from 'vitest';
+import { LatencyTracker } from '../latency-tracker.js';
+
+describe('LatencyTracker', () => {
+  let tracker: LatencyTracker;
+  let mockTime: number;
+
+  beforeEach(() => {
+    tracker = new LatencyTracker();
+    mockTime = 0;
+    vi.spyOn(performance, 'now').mockImplementation(() => mockTime);
+  });
+
+  afterEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  describe('start()', () => {
+    it('resets all counters', () => {
+      // First run with some data
+      tracker.start();
+      mockTime = 100;
+      tracker.recordFirstContent();
+      tracker.startToolCall('Bash');
+      mockTime = 200;
+      tracker.endToolCall();
+      tracker.recordTokens(1000, 500);
+
+      // Start a new tracking session
+      mockTime = 0;
+      tracker.start();
+      mockTime = 50;
+      const metrics = tracker.finish();
+
+      // Should be fresh - no TTFT recorded, no tools
+      expect(metrics.ttftMs).toBeNull();
+      expect(metrics.toolBreakdown).toHaveLength(0);
+      expect(metrics.tokenMetrics?.inputTokens).toBe(0);
+      expect(metrics.tokenMetrics?.outputTokens).toBe(0);
+    });
+  });
+
+  describe('recordFirstContent()', () => {
+    it('only records first call', () => {
+      tracker.start();
+
+      mockTime = 100;
+      tracker.recordFirstContent();
+
+      mockTime = 200;
+      tracker.recordFirstContent(); // Should be ignored
+
+      mockTime = 300;
+      const metrics = tracker.finish();
+
+      expect(metrics.ttftMs).toBe(100);
+    });
+
+    it('returns null if never called', () => {
+      tracker.start();
+      mockTime = 100;
+      const metrics = tracker.finish();
+
+      expect(metrics.ttftMs).toBeNull();
+    });
+  });
+
+  describe('tool timing', () => {
+    it('aggregates by tool name', () => {
+      tracker.start();
+
+      // First Bash call: 100ms
+      mockTime = 0;
+      tracker.startToolCall('Bash');
+      mockTime = 100;
+      tracker.endToolCall();
+
+      // Second Bash call: 50ms
+      mockTime = 150;
+      tracker.startToolCall('Bash');
+      mockTime = 200;
+      tracker.endToolCall();
+
+      // Write call: 30ms
+      mockTime = 200;
+      tracker.startToolCall('Write');
+      mockTime = 230;
+      tracker.endToolCall();
+
+      mockTime = 300;
+      const metrics = tracker.finish();
+
+      const bashBreakdown = metrics.toolBreakdown?.find((t) => t.tool === 'Bash');
+      const writeBreakdown = metrics.toolBreakdown?.find((t) => t.tool === 'Write');
+
+      expect(bashBreakdown?.count).toBe(2);
+      expect(bashBreakdown?.durationMs).toBe(150); // 100 + 50
+      expect(writeBreakdown?.count).toBe(1);
+      expect(writeBreakdown?.durationMs).toBe(30);
+    });
+
+    it('uses end time for unclosed tool calls', () => {
+      tracker.start();
+
+      mockTime = 0;
+      tracker.startToolCall('Bash');
+      // Don't call endToolCall
+
+      mockTime = 100;
+      const metrics = tracker.finish();
+
+      expect(metrics.toolBreakdown?.[0]?.durationMs).toBe(100);
+    });
+  });
+
+  describe('finish()', () => {
+    it('calculates correct derived metrics', () => {
+      tracker.start();
+
+      // TTFT at 50ms
+      mockTime = 50;
+      tracker.recordFirstContent();
+
+      // Tool takes 200ms (100-300)
+      mockTime = 100;
+      tracker.startToolCall('Bash');
+      mockTime = 300;
+      tracker.endToolCall();
+
+      // Record tokens
+      tracker.recordTokens(1000, 400);
+
+      // End at 500ms
+      mockTime = 500;
+      const metrics = tracker.finish();
+
+      expect(metrics.ttftMs).toBe(50);
+      expect(metrics.totalDurationMs).toBe(500);
+      expect(metrics.toolExecutionMs).toBe(200);
+      expect(metrics.agentThinkingMs).toBe(300); // 500 - 200
+      expect(metrics.tokenMetrics?.inputTokens).toBe(1000);
+      expect(metrics.tokenMetrics?.outputTokens).toBe(400);
+      // 400 tokens / 0.5 seconds = 800 tokens/sec
+      expect(metrics.tokenMetrics?.tokensPerSecond).toBe(800);
+    });
+
+    it('returns 0 tool execution time for empty tool list', () => {
+      tracker.start();
+      mockTime = 100;
+      const metrics = tracker.finish();
+
+      expect(metrics.toolExecutionMs).toBe(0);
+      expect(metrics.toolBreakdown).toHaveLength(0);
+    });
+
+    it('handles edge case of zero duration', () => {
+      tracker.start();
+      const metrics = tracker.finish();
+
+      expect(metrics.totalDurationMs).toBe(0);
+      expect(metrics.tokenMetrics?.tokensPerSecond).toBe(0);
+    });
+
+    it('clamps negative durations to 0', () => {
+      tracker.start();
+      mockTime = 100;
+      tracker.startToolCall('Bash');
+      // Simulate clock going backwards (edge case)
+      mockTime = 50;
+      tracker.endToolCall();
+
+      const metrics = tracker.finish();
+
+      // Duration should be clamped to 0, not negative
+      expect(metrics.toolExecutionMs).toBeGreaterThanOrEqual(0);
+    });
+  });
+
+  describe('recordTokens()', () => {
+    it('records input and output tokens', () => {
+      tracker.start();
+      tracker.recordTokens(5000, 2000);
+
+      mockTime = 1000; // 1 second
+      const metrics = tracker.finish();
+
+      expect(metrics.tokenMetrics?.inputTokens).toBe(5000);
+      expect(metrics.tokenMetrics?.outputTokens).toBe(2000);
+      expect(metrics.tokenMetrics?.tokensPerSecond).toBe(2000); // 2000 / 1
+    });
+  });
+});
diff --git a/tests/evals/agent-executor.ts b/tests/evals/agent-executor.ts
index 897d2e68..cfc64ed5 100644
--- a/tests/evals/agent-executor.ts
+++ b/tests/evals/agent-executor.ts
@@ -4,13 +4,15 @@ import { Integration } from '../../src/lib/constants.js';
 import { loadCredentials } from './env-loader.js';
 import { writeEnvLocal } from '../../src/lib/env-writer.js';
 import { getConfig } from '../../src/lib/settings.js';
-import type { ToolCall } from './types.js';
+import { LatencyTracker } from './latency-tracker.js';
+import type { ToolCall, LatencyMetrics } from './types.js';
 
 export interface AgentResult {
   success: boolean;
   output: string;
   toolCalls: ToolCall[];
   error?: string;
+  latencyMetrics?: LatencyMetrics;
 }
 
 export interface AgentExecutorOptions {
@@ -30,6 +32,7 @@ const SKILL_NAMES: Record<Integration, string> = {
 export class AgentExecutor {
   private options: AgentExecutorOptions;
   private credentials: ReturnType<typeof loadCredentials>;
+  private latencyTracker: LatencyTracker;
 
   constructor(
     private workDir: string,
@@ -38,6 +41,7 @@ export class AgentExecutor {
   ) {
     this.options = options;
     this.credentials = loadCredentials();
+    this.latencyTracker = new LatencyTracker();
   }
 
   async run(): Promise<AgentResult> {
@@ -50,6 +54,9 @@ export class AgentExecutor {
       console.log(`${label} Initializing agent for ${integration}...`);
     }
 
+    // Start latency tracking
+    this.latencyTracker.start();
+
     // Write .env.local with credentials (agent configures redirect URI per framework)
     writeEnvLocal(this.workDir, {
       WORKOS_API_KEY: this.credentials.workosApiKey,
@@ -104,16 +111,20 @@ export class AgentExecutor {
         this.handleMessage(message, toolCalls, collectedOutput, label);
       }
 
+      const latencyMetrics = this.latencyTracker.finish();
       return {
         success: true,
         output: collectedOutput.join('\n'),
         toolCalls,
+        latencyMetrics,
       };
     } catch (error) {
+      const latencyMetrics = this.latencyTracker.finish();
       return {
         success: false,
         output: collectedOutput.join('\n'),
         toolCalls,
+        latencyMetrics,
         error: error instanceof Error ? error.message : String(error),
       };
     }
@@ -139,18 +150,23 @@ Begin by invoking the ${skillName} skill.`;
 
   private handleMessage(message: any, toolCalls: ToolCall[], collectedOutput: string[], label: string): void {
     if (message.type === 'assistant') {
+      // End any in-progress tool call when we get a new assistant message
+      this.latencyTracker.endToolCall();
+
       const content = message.message?.content;
       if (Array.isArray(content)) {
         for (const block of content) {
-          // Capture text output
+          // Capture text output and track TTFT
           if (block.type === 'text' && typeof block.text === 'string') {
+            this.latencyTracker.recordFirstContent();
             collectedOutput.push(block.text);
             if (this.options.verbose) {
               console.log(`${label} Agent: ${block.text.slice(0, 100)}...`);
             }
           }
-          // Capture tool calls
+          // Capture tool calls and start timing
           if (block.type === 'tool_use') {
+            this.latencyTracker.startToolCall(block.name);
             const call: ToolCall = {
               tool: block.name,
               input: block.input as Record<string, unknown>,
@@ -165,6 +181,13 @@ Begin by invoking the ${skillName} skill.`;
     }
 
     if (message.type === 'result') {
+      // Capture token usage from result
+      if (message.usage) {
+        this.latencyTracker.recordTokens(
+          message.usage.input_tokens ?? 0,
+          message.usage.output_tokens ?? 0,
+        );
+      }
       if (message.subtype !== 'success' && message.errors?.length > 0) {
         collectedOutput.push(`Error: ${message.errors.join(', ')}`);
       }
diff --git a/tests/evals/cli.ts b/tests/evals/cli.ts
index c2481449..b701c95c 100644
--- a/tests/evals/cli.ts
+++ b/tests/evals/cli.ts
@@ -11,9 +11,13 @@ export interface CliOptions {
   noRetry: boolean;
   sequential: boolean;
   noDashboard: boolean;
-  command?: 'run' | 'history' | 'compare' | 'logs' | 'show';
+  noFail: boolean;
+  quality: boolean;
+  command?: 'run' | 'history' | 'compare' | 'diff' | 'prune' | 'logs' | 'show';
   compareIds?: [string, string];
   logFile?: string;
+  limit?: number;
+  pruneKeep?: number;
 }
 
 const FRAMEWORKS = ['nextjs', 'react', 'react-router', 'tanstack-start', 'vanilla-js'];
@@ -31,20 +35,40 @@ export function parseArgs(args: string[]): CliOptions {
     noRetry: false,
     sequential: false,
     noDashboard: false,
+    noFail: false,
+    quality: false,
   };
 
   // Check for subcommands
   if (args[0] === 'history') {
     options.command = 'history';
+    // Parse --limit=N option
+    for (const arg of args.slice(1)) {
+      if (arg.startsWith('--limit=')) {
+        options.limit = parseInt(arg.split('=')[1], 10);
+      }
+    }
     return options;
   }
 
-  if (args[0] === 'compare' && args.length >= 3) {
-    options.command = 'compare';
+  // Support both 'compare' (legacy) and 'diff' (new)
+  if ((args[0] === 'compare' || args[0] === 'diff') && args.length >= 3) {
+    options.command = 'diff';
     options.compareIds = [args[1], args[2]];
     return options;
   }
 
+  if (args[0] === 'prune') {
+    options.command = 'prune';
+    // Parse --keep=N option
+    for (const arg of args.slice(1)) {
+      if (arg.startsWith('--keep=')) {
+        options.pruneKeep = parseInt(arg.split('=')[1], 10);
+      }
+    }
+    return options;
+  }
+
   if (args[0] === 'logs') {
     options.command = 'logs';
     return options;
@@ -93,6 +117,10 @@ export function parseArgs(args: string[]): CliOptions {
       options.sequential = true;
     } else if (arg === '--no-dashboard') {
       options.noDashboard = true;
+    } else if (arg === '--no-fail') {
+      options.noFail = true;
+    } else if (arg === '--quality' || arg === '-q') {
+      options.quality = true;
     }
   }
 
@@ -109,8 +137,9 @@ Usage: pnpm eval [command] [options]
 
 Commands:
   run (default)       Run evaluations
-  history             List recent eval runs
-  compare <id1> <id2> Compare two eval runs
+  history             List recent eval runs (--limit=N)
+  diff <id1> <id2>    Compare two eval runs with correlation analysis
+  prune               Delete old results (--keep=N, default 10)
   logs                List recent detailed log files
   show <file>         Display formatted log summary
 
@@ -137,6 +166,10 @@ Options:
 
   --no-dashboard      Disable live dashboard, use sequential logging
 
+  --no-fail           Exit 0 even if success criteria thresholds not met
+
+  --quality, -q       Enable LLM-based quality grading (adds cost/time)
+
   --json              Output results as JSON (for scripting)
 
   --help, -h          Show this help message
@@ -150,6 +183,8 @@ Examples:
   pnpm eval --debug                   # Verbose output, keep failed dirs
   pnpm eval --retry=3                 # More retry attempts
   pnpm eval:history                   # List recent runs
-  pnpm eval:compare <id1> <id2>       # Compare two runs
+  pnpm eval:history --limit=20        # Show more runs
+  pnpm eval:diff <id1> <id2>          # Compare two runs
+  pnpm eval:prune --keep=5            # Keep only 5 most recent runs
 `);
 }
diff --git a/tests/evals/commands/diff.ts b/tests/evals/commands/diff.ts
new file mode 100644
index 00000000..9be4143d
--- /dev/null
+++ b/tests/evals/commands/diff.ts
@@ -0,0 +1,274 @@
+import chalk from 'chalk';
+import type { EvalRun } from '../history.js';
+import type { EvalResultMetadata, LatencyMetrics, QualityGrade } from '../types.js';
+
+export interface DiffResult {
+  passRateDelta: {
+    firstAttempt: number;
+    withRetry: number;
+  };
+  skillChanges: Array<{
+    framework: string;
+    oldHash: string;
+    newHash: string;
+  }>;
+  scenarioChanges: {
+    regressions: string[];
+    improvements: string[];
+    unchanged: string[];
+  };
+  latencyChanges?: {
+    ttftP50Delta: number;
+    ttftP95Delta: number;
+    durationP50Delta: number;
+    durationP95Delta: number;
+  };
+  qualityChanges?: {
+    overallDelta: number;
+    dimensionDeltas: Record<string, number>;
+  };
+  likelyCauses: string[];
+}
+
+export function diffRuns(run1: EvalRun, run2: EvalRun): DiffResult {
+  // Calculate pass rate deltas
+  const passRateDelta = {
+    firstAttempt: calculateFirstAttemptRate(run2) - calculateFirstAttemptRate(run1),
+    withRetry: run2.summary.passRate - run1.summary.passRate,
+  };
+
+  // Find skill version changes
+  const skillChanges = findSkillChanges(run1.metadata, run2.metadata);
+
+  // Find scenario status changes
+  const scenarioChanges = findScenarioChanges(run1, run2);
+
+  // Calculate latency changes (if available)
+  const latencyChanges = calculateLatencyChanges(run1, run2);
+
+  // Calculate quality changes (if available)
+  const qualityChanges = calculateQualityChanges(run1, run2);
+
+  // Determine likely causes
+  const likelyCauses = determineLikelyCauses(skillChanges, scenarioChanges, passRateDelta);
+
+  return {
+    passRateDelta,
+    skillChanges,
+    scenarioChanges,
+    latencyChanges,
+    qualityChanges,
+    likelyCauses,
+  };
+}
+
+function calculateFirstAttemptRate(run: EvalRun): number {
+  const firstAttemptPassed = run.results.filter((r) => r.attempts === 1 && r.passed).length;
+  return run.results.length > 0 ? firstAttemptPassed / run.results.length : 0;
+}
+
+function findSkillChanges(
+  meta1?: EvalResultMetadata,
+  meta2?: EvalResultMetadata,
+): Array<{ framework: string; oldHash: string; newHash: string }> {
+  if (!meta1?.skillVersions || !meta2?.skillVersions) return [];
+
+  const changes: Array<{ framework: string; oldHash: string; newHash: string }> = [];
+
+  for (const [framework, newHash] of Object.entries(meta2.skillVersions)) {
+    const oldHash = meta1.skillVersions[framework] || 'unknown';
+    if (oldHash !== newHash) {
+      changes.push({ framework, oldHash, newHash });
+    }
+  }
+
+  return changes;
+}
+
+function findScenarioChanges(
+  run1: EvalRun,
+  run2: EvalRun,
+): { regressions: string[]; improvements: string[]; unchanged: string[] } {
+  const results1 = new Map(run1.results.map((r) => [r.scenario, r.passed]));
+  const results2 = new Map(run2.results.map((r) => [r.scenario, r.passed]));
+
+  const regressions: string[] = [];
+  const improvements: string[] = [];
+  const unchanged: string[] = [];
+
+  for (const [scenario, passed2] of results2) {
+    const passed1 = results1.get(scenario);
+    if (passed1 === true && passed2 === false) {
+      regressions.push(scenario);
+    } else if (passed1 === false && passed2 === true) {
+      improvements.push(scenario);
+    } else {
+      unchanged.push(scenario);
+    }
+  }
+
+  return { regressions, improvements, unchanged };
+}
+
+function calculateLatencyChanges(
+  run1: EvalRun,
+  run2: EvalRun,
+): DiffResult['latencyChanges'] | undefined {
+  const latencies1 = run1.results.map((r) => r.latencyMetrics).filter(Boolean) as LatencyMetrics[];
+  const latencies2 = run2.results.map((r) => r.latencyMetrics).filter(Boolean) as LatencyMetrics[];
+
+  if (latencies1.length === 0 || latencies2.length === 0) return undefined;
+
+  const ttfts1 = latencies1.map((l) => l.ttftMs).filter((t): t is number => t !== null);
+  const ttfts2 = latencies2.map((l) => l.ttftMs).filter((t): t is number => t !== null);
+  const durations1 = latencies1.map((l) => l.totalDurationMs);
+  const durations2 = latencies2.map((l) => l.totalDurationMs);
+
+  if (ttfts1.length === 0 || ttfts2.length === 0) return undefined;
+
+  return {
+    ttftP50Delta: percentile(ttfts2, 50) - percentile(ttfts1, 50),
+    ttftP95Delta: percentile(ttfts2, 95) - percentile(ttfts1, 95),
+    durationP50Delta: percentile(durations2, 50) - percentile(durations1, 50),
+    durationP95Delta: percentile(durations2, 95) - percentile(durations1, 95),
+  };
+}
+
+function calculateQualityChanges(
+  run1: EvalRun,
+  run2: EvalRun,
+): DiffResult['qualityChanges'] | undefined {
+  const grades1 = run1.results.map((r) => r.qualityGrade).filter(Boolean) as QualityGrade[];
+  const grades2 = run2.results.map((r) => r.qualityGrade).filter(Boolean) as QualityGrade[];
+
+  if (grades1.length === 0 || grades2.length === 0) return undefined;
+
+  const avgScore1 = grades1.reduce((s, g) => s + g.score, 0) / grades1.length;
+  const avgScore2 = grades2.reduce((s, g) => s + g.score, 0) / grades2.length;
+
+  // Calculate dimension averages
+  const dims = ['codeStyle', 'minimalism', 'errorHandling', 'idiomatic'] as const;
+  const dimensionDeltas: Record<string, number> = {};
+
+  for (const dim of dims) {
+    const avg1 = grades1.reduce((s, g) => s + g.dimensions[dim], 0) / grades1.length;
+    const avg2 = grades2.reduce((s, g) => s + g.dimensions[dim], 0) / grades2.length;
+    dimensionDeltas[dim] = avg2 - avg1;
+  }
+
+  return {
+    overallDelta: avgScore2 - avgScore1,
+    dimensionDeltas,
+  };
+}
+
+function determineLikelyCauses(
+  skillChanges: Array<{ framework: string; oldHash: string; newHash: string }>,
+  scenarioChanges: { regressions: string[] },
+  passRateDelta: { firstAttempt: number; withRetry: number },
+): string[] {
+  const causes: string[] = [];
+
+  // If pass rate dropped AND skill changed, correlate
+  if (passRateDelta.withRetry < -0.05) {
+    // >5% drop
+    for (const change of skillChanges) {
+      const relatedRegressions = scenarioChanges.regressions.filter((s) =>
+        s.startsWith(change.framework),
+      );
+      if (relatedRegressions.length > 0) {
+        causes.push(
+          `${change.framework} skill changed (${change.oldHash.slice(0, 8)} → ${change.newHash.slice(0, 8)}) ` +
+            `and ${relatedRegressions.length} scenario(s) regressed`,
+        );
+      }
+    }
+  }
+
+  // No skill changes but regressions occurred
+  if (skillChanges.length === 0 && scenarioChanges.regressions.length > 0) {
+    causes.push('Regressions occurred without skill changes - possible flaky tests or external factors');
+  }
+
+  return causes;
+}
+
+function percentile(values: number[], p: number): number {
+  if (values.length === 0) return 0;
+  const sorted = [...values].sort((a, b) => a - b);
+  const idx = Math.ceil((p / 100) * sorted.length) - 1;
+  return sorted[Math.max(0, idx)];
+}
+
+export function printDiff(diff: DiffResult, run1Id: string, run2Id: string): void {
+  console.log(chalk.bold(`\nComparing: ${run1Id} → ${run2Id}\n`));
+
+  // Pass rate changes
+  console.log(chalk.bold('Pass Rate Changes:'));
+  printDelta('  First-attempt', diff.passRateDelta.firstAttempt * 100, '%');
+  printDelta('  With-retry', diff.passRateDelta.withRetry * 100, '%');
+
+  // Skill changes
+  if (diff.skillChanges.length > 0) {
+    console.log(chalk.bold('\nSkill Version Changes:'));
+    for (const change of diff.skillChanges) {
+      console.log(
+        `  ${change.framework}: ${change.oldHash.slice(0, 8)} → ${change.newHash.slice(0, 8)}`,
+      );
+    }
+  }
+
+  // Scenario changes
+  if (diff.scenarioChanges.regressions.length > 0) {
+    console.log(chalk.bold.red('\nRegressions (PASS → FAIL):'));
+    for (const s of diff.scenarioChanges.regressions) {
+      console.log(chalk.red(`  ✗ ${s}`));
+    }
+  }
+
+  if (diff.scenarioChanges.improvements.length > 0) {
+    console.log(chalk.bold.green('\nImprovements (FAIL → PASS):'));
+    for (const s of diff.scenarioChanges.improvements) {
+      console.log(chalk.green(`  ✓ ${s}`));
+    }
+  }
+
+  // Latency changes
+  if (diff.latencyChanges) {
+    console.log(chalk.bold('\nLatency Changes:'));
+    printDelta('  TTFT p50', diff.latencyChanges.ttftP50Delta, 'ms');
+    printDelta('  TTFT p95', diff.latencyChanges.ttftP95Delta, 'ms');
+    printDelta('  Duration p50', diff.latencyChanges.durationP50Delta / 1000, 's');
+    printDelta('  Duration p95', diff.latencyChanges.durationP95Delta / 1000, 's');
+  }
+
+  // Quality changes
+  if (diff.qualityChanges) {
+    console.log(chalk.bold('\nQuality Changes:'));
+    printDelta('  Overall', diff.qualityChanges.overallDelta, '/5');
+    for (const [dim, delta] of Object.entries(diff.qualityChanges.dimensionDeltas)) {
+      printDelta(`    ${dim}`, delta, '/5');
+    }
+  }
+
+  // Likely causes
+  if (diff.likelyCauses.length > 0) {
+    console.log(chalk.bold.yellow('\nLikely Causes:'));
+    for (const cause of diff.likelyCauses) {
+      console.log(chalk.yellow(`  ⚠ ${cause}`));
+    }
+  }
+
+  // Summary
+  const totalChanges =
+    diff.scenarioChanges.regressions.length + diff.scenarioChanges.improvements.length;
+  if (totalChanges === 0 && diff.skillChanges.length === 0) {
+    console.log(chalk.gray('\nNo significant changes between runs.'));
+  }
+}
+
+function printDelta(label: string, delta: number, unit: string): void {
+  const sign = delta > 0 ? '+' : '';
+  const color = delta > 0 ? chalk.green : delta < 0 ? chalk.red : chalk.gray;
+  console.log(`${label}: ${color(`${sign}${delta.toFixed(1)}${unit}`)}`);
+}
diff --git a/tests/evals/commands/history.ts b/tests/evals/commands/history.ts
new file mode 100644
index 00000000..c42b6972
--- /dev/null
+++ b/tests/evals/commands/history.ts
@@ -0,0 +1,90 @@
+import { readdir, unlink, readFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import chalk from 'chalk';
+import type { EvalRun } from '../history.js';
+
+const RESULTS_DIR = join(process.cwd(), 'tests/eval-results');
+
+export async function listHistory(limit: number = 10): Promise<void> {
+  let files: string[];
+  try {
+    files = await readdir(RESULTS_DIR);
+  } catch {
+    console.log(chalk.yellow('No eval results found. Run `pnpm eval` first.'));
+    return;
+  }
+
+  const runFiles = files
+    .filter((f) => f.endsWith('.json') && f !== 'latest.json' && !f.startsWith('eval-run-'))
+    .sort()
+    .reverse()
+    .slice(0, limit);
+
+  if (runFiles.length === 0) {
+    console.log(chalk.yellow('No eval results found. Run `pnpm eval` first.'));
+    return;
+  }
+
+  console.log(chalk.bold('\nRecent Eval Runs:\n'));
+  console.log('  ID                              Pass Rate   Scenarios   Avg Duration');
+  console.log('  ' + '─'.repeat(68));
+
+  for (const file of runFiles) {
+    try {
+      const content = await readFile(join(RESULTS_DIR, file), 'utf-8');
+      const run: EvalRun = JSON.parse(content);
+
+      const passRate = (run.summary.passRate * 100).toFixed(0) + '%';
+      const scenarios = `${run.summary.passed}/${run.summary.total}`;
+      const avgDuration =
+        run.results.length > 0
+          ? Math.round(run.results.reduce((s, r) => s + r.duration, 0) / run.results.length / 1000) +
+            's'
+          : 'N/A';
+
+      const color = run.summary.passRate >= 0.9 ? chalk.green : chalk.red;
+      const id = run.id.padEnd(32);
+
+      console.log(
+        `  ${id}  ${color(passRate.padEnd(10))} ${scenarios.padEnd(11)} ${avgDuration}`,
+      );
+    } catch {
+      const id = file.replace('.json', '').padEnd(32);
+      console.log(`  ${id}  ${chalk.gray('(unable to read)')}`);
+    }
+  }
+
+  const totalRuns = files.filter((f) => f.endsWith('.json') && f !== 'latest.json' && !f.startsWith('eval-run-')).length;
+  console.log(`\n  Showing ${runFiles.length} of ${totalRuns} runs. Use --limit=N for more.`);
+}
+
+export async function pruneHistory(keep: number = 10): Promise<void> {
+  let files: string[];
+  try {
+    files = await readdir(RESULTS_DIR);
+  } catch {
+    console.log('No results directory found.');
+    return;
+  }
+
+  const runFiles = files
+    .filter((f) => f.endsWith('.json') && f !== 'latest.json' && !f.startsWith('eval-run-'))
+    .sort()
+    .reverse();
+
+  const toDelete = runFiles.slice(keep);
+
+  if (toDelete.length === 0) {
+    console.log(`No runs to prune. Keeping all ${runFiles.length} runs.`);
+    return;
+  }
+
+  console.log(`Pruning ${toDelete.length} old runs, keeping ${keep} most recent...`);
+
+  for (const file of toDelete) {
+    await unlink(join(RESULTS_DIR, file));
+    console.log(chalk.gray(`  Deleted: ${file}`));
+  }
+
+  console.log(chalk.green(`Done. ${keep} runs remaining.`));
+}
diff --git a/tests/evals/dashboard/EvalDashboard.tsx b/tests/evals/dashboard/EvalDashboard.tsx
index d5ad2e99..1847ae45 100644
--- a/tests/evals/dashboard/EvalDashboard.tsx
+++ b/tests/evals/dashboard/EvalDashboard.tsx
@@ -1,11 +1,6 @@
 import React, { useState, useEffect } from 'react';
 import { Box, Text, useApp } from 'ink';
-import {
-  evalEvents,
-  type ScenarioStartEvent,
-  type ScenarioCompleteEvent,
-  type RunProgressEvent,
-} from '../events.js';
+import { evalEvents, type ScenarioStartEvent, type ScenarioCompleteEvent, type RunProgressEvent } from '../events.js';
 import { Header } from './Header.js';
 import { ScenarioRow } from './ScenarioRow.js';
 
diff --git a/tests/evals/fixture-manager.ts b/tests/evals/fixture-manager.ts
index d98e3cc7..eae3fdc7 100644
--- a/tests/evals/fixture-manager.ts
+++ b/tests/evals/fixture-manager.ts
@@ -39,6 +39,11 @@ export class FixtureManager {
       throw new Error(`pnpm install failed: ${result.stderr}`);
     }
 
+    // Initialize git repo for diff capture (quality grading)
+    await execFileNoThrow('git', ['init'], { cwd: this.tempDir });
+    await execFileNoThrow('git', ['add', '-A'], { cwd: this.tempDir });
+    await execFileNoThrow('git', ['commit', '-m', 'initial', '--no-gpg-sign'], { cwd: this.tempDir });
+
     return this.tempDir;
   }
 
diff --git a/tests/evals/graders/collect-key-files.ts b/tests/evals/graders/collect-key-files.ts
new file mode 100644
index 00000000..0987767a
--- /dev/null
+++ b/tests/evals/graders/collect-key-files.ts
@@ -0,0 +1,75 @@
+import { readFile } from 'node:fs/promises';
+import { join, relative } from 'node:path';
+import fg from 'fast-glob';
+import { QUALITY_KEY_FILES } from '../quality-key-files.js';
+
+/**
+ * Collects the content of key integration files for quality grading.
+ *
+ * Uses glob patterns to find files, reads their content, and returns
+ * a map of relative paths to file contents.
+ *
+ * @param workDir - The working directory to search in
+ * @param framework - The framework name (e.g., 'nextjs', 'react')
+ * @returns Map of relative file paths to their contents
+ */
+export async function collectKeyFiles(
+  workDir: string,
+  framework: string,
+): Promise<Map<string, string>> {
+  const patterns = QUALITY_KEY_FILES[framework];
+  if (!patterns) {
+    return new Map();
+  }
+
+  const files = new Map<string, string>();
+  const foundPaths = new Set<string>();
+
+  for (const pattern of patterns) {
+    // Find files matching the pattern
+    const matches = await fg(pattern, {
+      cwd: workDir,
+      absolute: true,
+      onlyFiles: true,
+      ignore: ['**/node_modules/**', '**/dist/**', '**/build/**', '**/.next/**'],
+    });
+
+    for (const absPath of matches) {
+      const relPath = relative(workDir, absPath);
+
+      // Skip if we already have this file
+      if (foundPaths.has(relPath)) {
+        continue;
+      }
+
+      try {
+        const content = await readFile(absPath, 'utf-8');
+        files.set(relPath, content);
+        foundPaths.add(relPath);
+      } catch {
+        // Skip unreadable files
+      }
+    }
+  }
+
+  return files;
+}
+
+/**
+ * Formats key files for inclusion in the quality grading prompt.
+ *
+ * @param keyFiles - Map of file paths to contents
+ * @returns Formatted string with all files as markdown code blocks
+ */
+export function formatKeyFilesForPrompt(keyFiles: Map<string, string>): string {
+  if (keyFiles.size === 0) {
+    return 'No key integration files found.';
+  }
+
+  return Array.from(keyFiles.entries())
+    .map(([path, content]) => {
+      const ext = path.split('.').pop() || 'txt';
+      return `### ${path}\n\`\`\`${ext}\n${content}\n\`\`\``;
+    })
+    .join('\n\n');
+}
diff --git a/tests/evals/graders/file-grader.ts b/tests/evals/graders/file-grader.ts
index 0a680faf..1118ed95 100644
--- a/tests/evals/graders/file-grader.ts
+++ b/tests/evals/graders/file-grader.ts
@@ -54,9 +54,7 @@ export class FileGrader {
     for (const file of files) {
       try {
         const content = await readFile(file, 'utf-8');
-        const allMatch = contentPatterns.every((p) =>
-          typeof p === 'string' ? content.includes(p) : p.test(content),
-        );
+        const allMatch = contentPatterns.every((p) => (typeof p === 'string' ? content.includes(p) : p.test(content)));
         if (allMatch) {
           const relativePath = file.replace(this.workDir + '/', '');
           return {
diff --git a/tests/evals/graders/nextjs.grader.ts b/tests/evals/graders/nextjs.grader.ts
index c214479d..a9ec2109 100644
--- a/tests/evals/graders/nextjs.grader.ts
+++ b/tests/evals/graders/nextjs.grader.ts
@@ -25,16 +25,35 @@ export class NextjsGrader implements Grader {
     // Check middleware exists
     checks.push(await this.fileGrader.checkFileExists('middleware.ts'));
 
-    // Check middleware imports authkit
-    checks.push(
-      ...(await this.fileGrader.checkFileContains('middleware.ts', [
-        '@workos-inc/authkit-nextjs',
-        'authkitMiddleware',
-      ])),
-    );
+    // Check middleware imports authkit SDK
+    const sdkImportChecks = await this.fileGrader.checkFileContains('middleware.ts', ['@workos-inc/authkit-nextjs']);
+    checks.push(...sdkImportChecks);
+
+    // Check for authkit integration: authkitMiddleware OR (authkit + handleAuthkitHeaders)
+    const middlewareChecks = await this.fileGrader.checkFileContains('middleware.ts', ['authkitMiddleware']);
+    const composableChecks = await this.fileGrader.checkFileContains('middleware.ts', ['authkit(', 'handleAuthkitHeaders']);
+
+    const usesAuthkitMiddleware = middlewareChecks.every((c) => c.passed);
+    const usesComposable = composableChecks.every((c) => c.passed);
 
-    // Check AuthKitProvider in layout
-    checks.push(...(await this.fileGrader.checkFileContains('app/layout.tsx', ['AuthKitProvider'])));
+    const authkitCheck: GradeCheck = {
+      name: 'AuthKit middleware integration',
+      passed: usesAuthkitMiddleware || usesComposable,
+      message: usesAuthkitMiddleware
+        ? 'Uses authkitMiddleware'
+        : usesComposable
+          ? 'Uses authkit() composable with handleAuthkitHeaders'
+          : 'Missing authkitMiddleware or authkit() composable integration',
+    };
+    checks.push(authkitCheck);
+
+    // Check AuthKitProvider in layout or extracted providers file
+    const authKitProviderCheck = await this.fileGrader.checkFileWithPattern(
+      'app/**/*.tsx',
+      ['AuthKitProvider'],
+      'AuthKitProvider in app',
+    );
+    checks.push(authKitProviderCheck);
 
     // Check build succeeds
     checks.push(await this.buildGrader.checkBuild());
diff --git a/tests/evals/graders/quality-grader.ts b/tests/evals/graders/quality-grader.ts
new file mode 100644
index 00000000..91165a42
--- /dev/null
+++ b/tests/evals/graders/quality-grader.ts
@@ -0,0 +1,130 @@
+import Anthropic from '@anthropic-ai/sdk';
+import { QUALITY_RUBRICS, QUALITY_DIMENSIONS } from '../quality-rubrics.js';
+import type { QualityGrade, QualityInput } from '../types.js';
+import { formatKeyFilesForPrompt } from './collect-key-files.js';
+
+const QUALITY_MODEL = 'claude-3-5-haiku-20241022';
+
+export class QualityGrader {
+  private client: Anthropic;
+
+  constructor(apiKey: string) {
+    this.client = new Anthropic({ apiKey });
+  }
+
+  async grade(input: QualityInput): Promise<QualityGrade | null> {
+    if (input.keyFiles.size === 0) {
+      return null;
+    }
+
+    const prompt = this.buildPrompt(input);
+
+    try {
+      const response = await this.client.messages.create({
+        model: QUALITY_MODEL,
+        max_tokens: 1024,
+        messages: [{ role: 'user', content: prompt }],
+      });
+
+      const content = response.content[0];
+      if (content.type !== 'text') {
+        return null;
+      }
+
+      return this.parseResponse(content.text);
+    } catch (error) {
+      console.warn('Quality grading failed:', error);
+      return null;
+    }
+  }
+
+  private buildPrompt(input: QualityInput): string {
+    const rubricText = QUALITY_DIMENSIONS.map((dim) => {
+      const rubric = QUALITY_RUBRICS[dim];
+      const scaleText = Object.entries(rubric.scale)
+        .map(([score, desc]) => `  ${score}: ${desc}`)
+        .join('\n');
+      return `### ${rubric.name}\n${rubric.description}\n${scaleText}`;
+    }).join('\n\n');
+
+    const keyFilesText = formatKeyFilesForPrompt(input.keyFiles);
+
+    // Chain-of-thought before scoring improves grading accuracy (Anthropic best practice)
+    return `You are evaluating code written by an AI agent installing WorkOS AuthKit into a ${input.framework} project.
+
+## Key Integration Files
+
+${keyFilesText}
+
+## Installation Metadata
+- Files created: ${input.metadata.filesCreated.join(', ') || 'None'}
+- Files modified: ${input.metadata.filesModified.join(', ') || 'None'}
+- Tool activity: ${input.metadata.toolCallSummary}
+- Checks passed: ${input.metadata.checksPassed.join(', ') || 'None'}
+
+## Grading Rubrics
+${rubricText}
+
+## Instructions
+First, analyze the code thoroughly in <thinking> tags. For each dimension, examine the code and determine the appropriate score based on the rubric. Consider specific examples from the code.
+
+Then, output your final scores as JSON.
+
+<thinking>
+[Analyze each dimension here - what patterns do you see? What's done well? What could be better?]
+</thinking>
+
+{
+  "codeStyle": <1-5>,
+  "minimalism": <1-5>,
+  "errorHandling": <1-5>,
+  "idiomatic": <1-5>
+}`;
+  }
+
+  private parseResponse(text: string): QualityGrade | null {
+    try {
+      // Extract chain-of-thought reasoning from <thinking> tags
+      const thinkingMatch = text.match(/<thinking>([\s\S]*?)<\/thinking>/);
+      const reasoning = thinkingMatch?.[1]?.trim() || 'No reasoning provided';
+
+      // Extract JSON scores (after thinking block)
+      const jsonMatch = text.match(/\{[\s\S]*\}/);
+      if (!jsonMatch) return null;
+
+      const parsed = JSON.parse(jsonMatch[0]) as Record<string, unknown>;
+
+      // Handle both formats: direct scores or nested { score: n }
+      const getScore = (val: unknown): number => {
+        if (typeof val === 'number') return val;
+        if (typeof val === 'object' && val !== null && 'score' in val) {
+          return (val as { score: unknown }).score as number;
+        }
+        return 3;
+      };
+
+      const dimensions = {
+        codeStyle: this.clampScore(getScore(parsed.codeStyle)),
+        minimalism: this.clampScore(getScore(parsed.minimalism)),
+        errorHandling: this.clampScore(getScore(parsed.errorHandling)),
+        idiomatic: this.clampScore(getScore(parsed.idiomatic)),
+      };
+
+      const score = Object.values(dimensions).reduce((a, b) => a + b, 0) / 4;
+
+      return {
+        score: Math.round(score * 10) / 10,
+        dimensions,
+        reasoning,
+      };
+    } catch (error) {
+      console.warn('Failed to parse quality response:', error);
+      return null;
+    }
+  }
+
+  private clampScore(score: unknown): number {
+    const num = typeof score === 'number' ? score : 3;
+    return Math.max(1, Math.min(5, Math.round(num)));
+  }
+}
diff --git a/tests/evals/graders/vanilla.grader.ts b/tests/evals/graders/vanilla.grader.ts
index 9031f88c..36008d46 100644
--- a/tests/evals/graders/vanilla.grader.ts
+++ b/tests/evals/graders/vanilla.grader.ts
@@ -44,11 +44,7 @@ export class VanillaGrader implements Grader {
 
     // Check createClient usage (the core initialization pattern)
     checks.push(
-      await this.fileGrader.checkFileWithPattern(
-        '**/*.{js,ts}',
-        ['createClient'],
-        'createClient initialization',
-      ),
+      await this.fileGrader.checkFileWithPattern('**/*.{js,ts}', ['createClient'], 'createClient initialization'),
     );
 
     // Check for auth methods usage (signIn, signOut, or getUser)
@@ -61,13 +57,7 @@ export class VanillaGrader implements Grader {
     );
 
     // Check index.html exists and references auth script or module
-    checks.push(
-      await this.fileGrader.checkFileWithPattern(
-        '*.html',
-        [/<script/i],
-        'HTML with script reference',
-      ),
-    );
+    checks.push(await this.fileGrader.checkFileWithPattern('*.html', [/<script/i], 'HTML with script reference'));
 
     // Vanilla JS may not have build step - check if build script exists
     const hasBuildScript = await this.checkHasBuildScript();
diff --git a/tests/evals/history.ts b/tests/evals/history.ts
index 34f9e2ab..24bf9a78 100644
--- a/tests/evals/history.ts
+++ b/tests/evals/history.ts
@@ -1,6 +1,6 @@
 import { writeFile, readFile, readdir, symlink, unlink, mkdir } from 'node:fs/promises';
 import { join } from 'node:path';
-import type { EvalResult } from './types.js';
+import type { EvalResult, EvalResultMetadata } from './types.js';
 
 const RESULTS_DIR = join(process.cwd(), 'tests/eval-results');
 
@@ -18,11 +18,13 @@ export interface EvalRun {
     state?: string;
   };
   results: EvalResult[];
+  metadata?: EvalResultMetadata;
 }
 
 export async function saveResults(
   results: EvalResult[],
   options: { framework?: string; state?: string },
+  metadata?: EvalResultMetadata,
 ): Promise<string> {
   // Ensure results directory exists
   await mkdir(RESULTS_DIR, { recursive: true });
@@ -42,6 +44,7 @@ export async function saveResults(
     },
     options,
     results,
+    metadata,
   };
 
   await writeFile(filepath, JSON.stringify(run, null, 2));
diff --git a/tests/evals/index.ts b/tests/evals/index.ts
index 444be0f2..7e922740 100644
--- a/tests/evals/index.ts
+++ b/tests/evals/index.ts
@@ -2,8 +2,10 @@
 import { parseArgs, printHelp } from './cli.js';
 import { runEvals } from './runner.js';
 import { printMatrix, printJson } from './reporter.js';
-import { listRuns, loadRun, compareRuns } from './history.js';
+import { loadRun } from './history.js';
 import { listLogs, showLog } from './log-commands.js';
+import { diffRuns, printDiff } from './commands/diff.js';
+import { listHistory, pruneHistory } from './commands/history.js';
 
 async function main() {
   const options = parseArgs(process.argv.slice(2));
@@ -16,24 +18,22 @@ async function main() {
   try {
     switch (options.command) {
       case 'history': {
-        const runs = await listRuns();
-        if (runs.length === 0) {
-          console.log('No eval runs found. Run `pnpm eval` first.');
-          break;
-        }
-        console.log('Recent eval runs:');
-        for (const run of runs.slice(0, 10)) {
-          const data = await loadRun(run.replace('.json', ''));
-          console.log(`  ${run.replace('.json', '')} - ${data.summary.passed}/${data.summary.total} passed`);
-        }
+        await listHistory(options.limit || 10);
         break;
       }
 
+      case 'diff':
       case 'compare': {
         const [id1, id2] = options.compareIds!;
         const run1 = await loadRun(id1);
         const run2 = await loadRun(id2);
-        compareRuns(run1, run2);
+        const diff = diffRuns(run1, run2);
+        printDiff(diff, id1, id2);
+        break;
+      }
+
+      case 'prune': {
+        await pruneHistory(options.pruneKeep || 10);
         break;
       }
 
@@ -56,6 +56,11 @@ async function main() {
           keep: options.keep,
           keepOnFail: options.keepOnFail,
           retry: options.retry,
+          sequential: options.sequential,
+          noDashboard: options.noDashboard,
+          debug: options.debug,
+          noFail: options.noFail,
+          quality: options.quality,
         });
 
         if (options.json) {
@@ -63,8 +68,6 @@ async function main() {
         } else {
           printMatrix(results);
         }
-
-        process.exit(results.every((r) => r.passed) ? 0 : 1);
       }
     }
   } catch (error) {
diff --git a/tests/evals/latency-tracker.ts b/tests/evals/latency-tracker.ts
new file mode 100644
index 00000000..dd4dda50
--- /dev/null
+++ b/tests/evals/latency-tracker.ts
@@ -0,0 +1,106 @@
+import type { LatencyMetrics } from './types.js';
+
+interface ToolTiming {
+  tool: string;
+  startMs: number;
+  endMs?: number;
+}
+
+export class LatencyTracker {
+  private startTime: number = 0;
+  private firstContentTime: number | null = null;
+  private toolTimings: ToolTiming[] = [];
+  private currentTool: ToolTiming | null = null;
+  private inputTokens: number = 0;
+  private outputTokens: number = 0;
+
+  start(): void {
+    this.startTime = performance.now();
+    this.firstContentTime = null;
+    this.toolTimings = [];
+    this.currentTool = null;
+    this.inputTokens = 0;
+    this.outputTokens = 0;
+  }
+
+  recordFirstContent(): void {
+    if (this.firstContentTime === null) {
+      this.firstContentTime = performance.now();
+    }
+  }
+
+  startToolCall(toolName: string): void {
+    this.currentTool = {
+      tool: toolName,
+      startMs: performance.now(),
+    };
+  }
+
+  endToolCall(): void {
+    if (this.currentTool) {
+      this.currentTool.endMs = performance.now();
+      this.toolTimings.push(this.currentTool);
+      this.currentTool = null;
+    }
+  }
+
+  recordTokens(input: number, output: number): void {
+    this.inputTokens = input;
+    this.outputTokens = output;
+  }
+
+  finish(): LatencyMetrics {
+    const endTime = performance.now();
+    const totalDurationMs = Math.max(0, endTime - this.startTime);
+
+    // Calculate TTFT
+    const ttftMs = this.firstContentTime ? Math.max(0, this.firstContentTime - this.startTime) : null;
+
+    // Include any in-progress tool in the timings
+    const allToolTimings = this.currentTool
+      ? [...this.toolTimings, { ...this.currentTool, endMs: endTime }]
+      : this.toolTimings;
+
+    // Aggregate tool execution time
+    const toolExecutionMs = allToolTimings.reduce((sum, t) => {
+      const duration = (t.endMs ?? endTime) - t.startMs;
+      return sum + Math.max(0, duration);
+    }, 0);
+
+    // Agent thinking = total - tool execution
+    const agentThinkingMs = Math.max(0, totalDurationMs - toolExecutionMs);
+
+    // Tool breakdown by type
+    const toolCounts = new Map<string, { durationMs: number; count: number }>();
+    for (const timing of allToolTimings) {
+      const duration = Math.max(0, (timing.endMs ?? endTime) - timing.startMs);
+      const existing = toolCounts.get(timing.tool) || { durationMs: 0, count: 0 };
+      toolCounts.set(timing.tool, {
+        durationMs: existing.durationMs + duration,
+        count: existing.count + 1,
+      });
+    }
+
+    const toolBreakdown = Array.from(toolCounts.entries()).map(([tool, data]) => ({
+      tool,
+      durationMs: Math.round(data.durationMs),
+      count: data.count,
+    }));
+
+    // Calculate tokens per second
+    const tokensPerSecond = totalDurationMs > 0 ? this.outputTokens / (totalDurationMs / 1000) : 0;
+
+    return {
+      ttftMs: ttftMs !== null ? Math.round(ttftMs) : null,
+      agentThinkingMs: Math.round(agentThinkingMs),
+      toolExecutionMs: Math.round(toolExecutionMs),
+      totalDurationMs: Math.round(totalDurationMs),
+      tokenMetrics: {
+        inputTokens: this.inputTokens,
+        outputTokens: this.outputTokens,
+        tokensPerSecond: Math.round(tokensPerSecond),
+      },
+      toolBreakdown,
+    };
+  }
+}
diff --git a/tests/evals/parallel-runner.ts b/tests/evals/parallel-runner.ts
index 34e1845e..d2f2a56c 100644
--- a/tests/evals/parallel-runner.ts
+++ b/tests/evals/parallel-runner.ts
@@ -4,6 +4,7 @@ import { FixtureManager } from './fixture-manager.js';
 import { AgentExecutor } from './agent-executor.js';
 import { detectConcurrency } from './concurrency.js';
 import { evalEvents } from './events.js';
+import { collectKeyFiles } from './graders/collect-key-files.js';
 
 interface Scenario {
   framework: string;
@@ -130,6 +131,11 @@ export class ParallelRunner {
         const grader = new scenario.grader(workDir);
         const gradeResult = await grader.grade();
 
+        // Collect key files for quality grading (only on pass to avoid wasted effort)
+        const keyFiles = gradeResult.passed
+          ? await collectKeyFiles(workDir, scenario.framework)
+          : undefined;
+
         lastResult = {
           scenario: scenarioName,
           passed: gradeResult.passed,
@@ -137,6 +143,8 @@ export class ParallelRunner {
           checks: gradeResult.checks,
           agentOutput: agentResult.output,
           attempts: attempt,
+          latencyMetrics: agentResult.latencyMetrics,
+          keyFiles,
         };
 
         if (gradeResult.passed) {
diff --git a/tests/evals/quality-key-files.ts b/tests/evals/quality-key-files.ts
new file mode 100644
index 00000000..3bb8b0f0
--- /dev/null
+++ b/tests/evals/quality-key-files.ts
@@ -0,0 +1,67 @@
+/**
+ * Key files per framework for quality grading.
+ *
+ * These are the integration-critical files the LLM should evaluate.
+ * Patterns use fast-glob syntax. Order matters - first match wins for each pattern.
+ */
+export const QUALITY_KEY_FILES: Record<string, string[]> = {
+  nextjs: [
+    // Middleware - auth protection layer
+    'middleware.ts',
+    // Callback route - OAuth handling
+    'app/**/callback/**/route.ts',
+    'app/auth/callback/route.ts',
+    // Provider - client-side auth context
+    'app/**/providers.tsx',
+    'app/providers.tsx',
+    'app/layout.tsx',
+  ],
+
+  react: [
+    // Entry point - AuthKitProvider setup
+    'src/main.tsx',
+    'src/index.tsx',
+    // App component - useAuth usage
+    'src/App.tsx',
+    // Auth-specific components if they exist
+    'src/auth/**/*.tsx',
+    'src/components/auth/**/*.tsx',
+  ],
+
+  'react-router': [
+    // Callback route - authLoader
+    'app/routes/**/callback*.tsx',
+    'src/routes/**/callback*.tsx',
+    // Root - authkitLoader setup
+    'app/root.tsx',
+    'src/root.tsx',
+    // Auth utilities
+    'app/lib/auth*.ts',
+    'src/lib/auth*.ts',
+  ],
+
+  'tanstack-start': [
+    // Middleware config
+    'src/start.ts',
+    'app/start.ts',
+    // Callback route - handleCallbackRoute
+    'src/routes/**/callback*.tsx',
+    'app/routes/**/callback*.tsx',
+    // Router - middleware registration
+    'src/router.tsx',
+    'app/router.tsx',
+  ],
+
+  'vanilla-js': [
+    // Main entry script
+    'src/main.js',
+    'src/index.js',
+    'main.js',
+    'index.js',
+    // Auth module if separated
+    'src/auth.js',
+    'auth.js',
+    // HTML entry
+    'index.html',
+  ],
+};
diff --git a/tests/evals/quality-rubrics.ts b/tests/evals/quality-rubrics.ts
new file mode 100644
index 00000000..6527096f
--- /dev/null
+++ b/tests/evals/quality-rubrics.ts
@@ -0,0 +1,49 @@
+export const QUALITY_RUBRICS = {
+  codeStyle: {
+    name: 'Code Style',
+    description: 'Adherence to project conventions and formatting',
+    scale: {
+      1: 'Major violations: inconsistent indentation, wrong naming conventions, poor organization',
+      2: 'Several issues: some style inconsistencies, minor formatting problems',
+      3: 'Acceptable: mostly follows conventions with a few deviations',
+      4: 'Good: follows conventions well, minor improvements possible',
+      5: 'Excellent: exemplary adherence to project style, clean and consistent',
+    },
+  },
+  minimalism: {
+    name: 'Minimalism',
+    description: 'Changes are focused and minimal, no unnecessary modifications',
+    scale: {
+      1: 'Excessive: many unnecessary changes, modified unrelated files, over-engineered',
+      2: 'Bloated: some unnecessary additions or modifications',
+      3: 'Acceptable: mostly focused, few extra changes',
+      4: 'Good: changes are well-scoped with minimal extras',
+      5: 'Excellent: surgically precise, only necessary changes made',
+    },
+  },
+  errorHandling: {
+    name: 'Error Handling',
+    description: 'Proper error handling and user-friendly error messages',
+    scale: {
+      1: 'Missing: no error handling, crashes on edge cases',
+      2: 'Basic: catches some errors but poor messages or recovery',
+      3: 'Acceptable: handles main errors, generic messages',
+      4: 'Good: comprehensive error handling, helpful messages',
+      5: 'Excellent: robust handling with actionable user-friendly messages',
+    },
+  },
+  idiomatic: {
+    name: 'Idiomatic',
+    description: 'Follows framework best practices and patterns',
+    scale: {
+      1: "Anti-patterns: uses deprecated APIs, ignores framework conventions",
+      2: "Suboptimal: works but doesn't follow recommended patterns",
+      3: 'Acceptable: functional, follows basic patterns',
+      4: 'Good: uses recommended patterns and APIs',
+      5: 'Excellent: exemplary use of framework patterns and best practices',
+    },
+  },
+} as const;
+
+export type QualityDimension = keyof typeof QUALITY_RUBRICS;
+export const QUALITY_DIMENSIONS: QualityDimension[] = ['codeStyle', 'minimalism', 'errorHandling', 'idiomatic'];
diff --git a/tests/evals/reporter.ts b/tests/evals/reporter.ts
index 562f3cc3..125c370e 100644
--- a/tests/evals/reporter.ts
+++ b/tests/evals/reporter.ts
@@ -1,29 +1,50 @@
 import type { EvalResult } from './types.js';
 
-const FRAMEWORKS = ['nextjs', 'react', 'react-router', 'tanstack-start', 'vanilla-js'];
-const STATES = ['example', 'example-auth0'];
+// Short labels for display
+const STATE_LABELS: Record<string, string> = {
+  example: 'Base',
+  'example-auth0': 'Auth0',
+  'partial-install': 'Partial',
+  'typescript-strict': 'Strict',
+  'conflicting-middleware': 'Conflict',
+  'conflicting-auth': 'Conflict',
+};
 
 export function printMatrix(results: EvalResult[]): void {
   const resultMap = new Map(results.map((r) => [r.scenario, r]));
 
+  // Extract unique frameworks and states from results
+  const frameworks = [...new Set(results.map((r) => r.scenario.split('/')[0]))];
+  const states = [...new Set(results.map((r) => r.scenario.split('/')[1]))];
+
+  // Sort states in logical order
+  const stateOrder = ['example', 'example-auth0', 'partial-install', 'typescript-strict', 'conflicting-middleware', 'conflicting-auth'];
+  states.sort((a, b) => stateOrder.indexOf(a) - stateOrder.indexOf(b));
+
+  // Build dynamic table
+  const colWidth = 8;
+  const fwWidth = 15;
+
   // Header
-  console.log('\n┌─────────────────┬─────────┬───────────────┐');
-  console.log('│ Framework       │ Example │ Example+Auth0 │');
-  console.log('├─────────────────┼─────────┼───────────────┤');
+  const headerCells = states.map((s) => (STATE_LABELS[s] || s).padStart(colWidth)).join('│');
+  const divider = states.map(() => '─'.repeat(colWidth)).join('┼');
+
+  console.log(`\n┌${'─'.repeat(fwWidth)}┬${states.map(() => '─'.repeat(colWidth)).join('┬')}┐`);
+  console.log(`│${'Framework'.padEnd(fwWidth)}│${headerCells}│`);
+  console.log(`├${'─'.repeat(fwWidth)}┼${divider}┤`);
 
-  for (const framework of FRAMEWORKS) {
-    const cells = STATES.map((state) => {
+  for (const framework of frameworks) {
+    const cells = states.map((state) => {
       const key = `${framework}/${state}`;
       const result = resultMap.get(key);
-      if (!result) return '   -   ';
-      return result.passed ? '   ✓   ' : '   ✗   ';
+      if (!result) return '-'.padStart(colWidth);
+      return (result.passed ? '✓' : '✗').padStart(colWidth);
     });
 
-    const name = framework.padEnd(15);
-    console.log(`│ ${name} │ ${cells[0]} │ ${cells[1]}       │`);
+    console.log(`│${framework.padEnd(fwWidth)}│${cells.join('│')}│`);
   }
 
-  console.log('└─────────────────┴─────────┴───────────────┘');
+  console.log(`└${'─'.repeat(fwWidth)}┴${states.map(() => '─'.repeat(colWidth)).join('┴')}┘`);
 
   // Summary
   const passed = results.filter((r) => r.passed).length;
diff --git a/tests/evals/runner.ts b/tests/evals/runner.ts
index 3abce796..c8361b4f 100644
--- a/tests/evals/runner.ts
+++ b/tests/evals/runner.ts
@@ -7,7 +7,11 @@ import { saveResults } from './history.js';
 import { ParallelRunner } from './parallel-runner.js';
 import { renderDashboard } from './dashboard/index.js';
 import { LogWriter } from './log-writer.js';
-import type { EvalResult, EvalOptions, Grader } from './types.js';
+import { validateResults, type ValidationResult } from './success-criteria.js';
+import { captureVersionMetadata } from './versioning.js';
+import { QualityGrader } from './graders/quality-grader.js';
+import { loadCredentials } from './env-loader.js';
+import type { EvalResult, EvalOptions, EvalResultMetadata, Grader, QualityInput } from './types.js';
 
 interface Scenario {
   framework: string;
@@ -16,25 +20,39 @@ interface Scenario {
 }
 
 const SCENARIOS: Scenario[] = [
-  // Next.js
+  // Next.js (5 states)
   { framework: 'nextjs', state: 'example', grader: NextjsGrader },
   { framework: 'nextjs', state: 'example-auth0', grader: NextjsGrader },
+  { framework: 'nextjs', state: 'partial-install', grader: NextjsGrader },
+  { framework: 'nextjs', state: 'typescript-strict', grader: NextjsGrader },
+  { framework: 'nextjs', state: 'conflicting-middleware', grader: NextjsGrader },
 
-  // React SPA
+  // React SPA (5 states)
   { framework: 'react', state: 'example', grader: ReactGrader },
   { framework: 'react', state: 'example-auth0', grader: ReactGrader },
+  { framework: 'react', state: 'partial-install', grader: ReactGrader },
+  { framework: 'react', state: 'typescript-strict', grader: ReactGrader },
+  { framework: 'react', state: 'conflicting-auth', grader: ReactGrader },
 
-  // React Router
+  // React Router (5 states)
   { framework: 'react-router', state: 'example', grader: ReactRouterGrader },
   { framework: 'react-router', state: 'example-auth0', grader: ReactRouterGrader },
+  { framework: 'react-router', state: 'partial-install', grader: ReactRouterGrader },
+  { framework: 'react-router', state: 'typescript-strict', grader: ReactRouterGrader },
+  { framework: 'react-router', state: 'conflicting-middleware', grader: ReactRouterGrader },
 
-  // TanStack Start
+  // TanStack Start (5 states)
   { framework: 'tanstack-start', state: 'example', grader: TanstackGrader },
   { framework: 'tanstack-start', state: 'example-auth0', grader: TanstackGrader },
+  { framework: 'tanstack-start', state: 'partial-install', grader: TanstackGrader },
+  { framework: 'tanstack-start', state: 'typescript-strict', grader: TanstackGrader },
+  { framework: 'tanstack-start', state: 'conflicting-middleware', grader: TanstackGrader },
 
-  // Vanilla JS
+  // Vanilla JS (4 states - no TypeScript)
   { framework: 'vanilla-js', state: 'example', grader: VanillaGrader },
   { framework: 'vanilla-js', state: 'example-auth0', grader: VanillaGrader },
+  { framework: 'vanilla-js', state: 'partial-install', grader: VanillaGrader },
+  { framework: 'vanilla-js', state: 'conflicting-auth', grader: VanillaGrader },
 ];
 
 export interface ExtendedEvalOptions extends EvalOptions {
@@ -43,9 +61,18 @@ export interface ExtendedEvalOptions extends EvalOptions {
   retry?: number;
   noDashboard?: boolean;
   debug?: boolean;
+  noFail?: boolean;
+  quality?: boolean;
 }
 
 export async function runEvals(options: ExtendedEvalOptions): Promise<EvalResult[]> {
+  // Capture version metadata at start
+  const versionMeta = await captureVersionMetadata();
+  const metadata: EvalResultMetadata = {
+    ...versionMeta,
+    timestamp: new Date().toISOString(),
+  };
+
   const scenarios = SCENARIOS.filter(
     (s) => (!options.framework || s.framework === options.framework) && (!options.state || s.state === options.state),
   );
@@ -90,21 +117,74 @@ export async function runEvals(options: ExtendedEvalOptions): Promise<EvalResult
     dashboard.unmount();
   }
 
+  // Quality grading (optional, only for passing scenarios with key files)
+  if (options.quality) {
+    const credentials = loadCredentials();
+    const qualityGrader = new QualityGrader(credentials.anthropicApiKey);
+
+    console.log('\nRunning quality grading on passing scenarios...');
+
+    for (const result of results) {
+      if (result.passed && result.keyFiles && result.keyFiles.size > 0) {
+        const framework = result.scenario.split('/')[0];
+
+        // Build metadata from result
+        const qualityInput: QualityInput = {
+          framework,
+          keyFiles: result.keyFiles,
+          metadata: {
+            filesCreated: [], // Could be extracted from tool calls if tracked
+            filesModified: [], // Could be extracted from tool calls if tracked
+            toolCallSummary: buildToolCallSummary(result),
+            checksPassed: result.checks?.filter((c) => c.passed).map((c) => c.name) || [],
+          },
+        };
+
+        result.qualityGrade = await qualityGrader.grade(qualityInput);
+
+        if (result.qualityGrade) {
+          console.log(`  ${result.scenario}: ${result.qualityGrade.score}/5`);
+          if (options.verbose) {
+            for (const line of result.qualityGrade.reasoning.split('\n')) {
+              console.log(`    ${line}`);
+            }
+          }
+        }
+      }
+    }
+
+    printQualitySummary(results);
+  }
+
   // Print summary
   printSummary(results);
+  printLatencySummary(results);
+
+  // Validate against success criteria
+  const validation = validateResults(results);
+  printValidationSummary(validation);
 
   // Print log file location
   console.log(`\nDetailed log: ${logWriter.getFilePath()}`);
 
   logWriter.cleanup();
 
-  // Save results
-  const filepath = await saveResults(results, {
-    framework: options.framework,
-    state: options.state,
-  });
+  // Save results with metadata
+  const filepath = await saveResults(
+    results,
+    {
+      framework: options.framework,
+      state: options.state,
+    },
+    metadata,
+  );
   console.log(`Results saved to: ${filepath}`);
 
+  // Exit with error if thresholds not met (unless --no-fail)
+  if (!validation.passed && !options.noFail) {
+    process.exitCode = 1;
+  }
+
   return results;
 }
 
@@ -123,3 +203,110 @@ function printSummary(results: EvalResult[]): void {
     }
   }
 }
+
+function printLatencySummary(results: EvalResult[]): void {
+  const withLatency = results.filter((r) => r.latencyMetrics);
+  if (withLatency.length === 0) return;
+
+  // Extract metrics
+  const ttfts = withLatency
+    .map((r) => r.latencyMetrics!.ttftMs)
+    .filter((t): t is number => t !== null)
+    .sort((a, b) => a - b);
+
+  const durations = withLatency.map((r) => r.latencyMetrics!.totalDurationMs).sort((a, b) => a - b);
+
+  console.log('\nLatency Summary:');
+  console.log('─'.repeat(40));
+
+  if (ttfts.length > 0) {
+    console.log(
+      `  TTFT:     p50=${percentile(ttfts, 50)}ms, p95=${percentile(ttfts, 95)}ms, max=${ttfts[ttfts.length - 1]}ms`,
+    );
+  }
+
+  console.log(
+    `  Duration: p50=${percentile(durations, 50)}ms, p95=${percentile(durations, 95)}ms, max=${durations[durations.length - 1]}ms`,
+  );
+
+  // Aggregate tool breakdown across all runs
+  const toolTotals = new Map<string, { durationMs: number; count: number }>();
+  for (const result of withLatency) {
+    for (const tool of result.latencyMetrics!.toolBreakdown || []) {
+      const existing = toolTotals.get(tool.tool) || { durationMs: 0, count: 0 };
+      toolTotals.set(tool.tool, {
+        durationMs: existing.durationMs + tool.durationMs,
+        count: existing.count + tool.count,
+      });
+    }
+  }
+
+  if (toolTotals.size > 0) {
+    console.log('\nTool Time Breakdown (total across all scenarios):');
+    const sorted = Array.from(toolTotals.entries()).sort((a, b) => b[1].durationMs - a[1].durationMs);
+    for (const [tool, data] of sorted.slice(0, 5)) {
+      console.log(`  ${tool}: ${(data.durationMs / 1000).toFixed(1)}s (${data.count} calls)`);
+    }
+  }
+}
+
+function percentile(sorted: number[], p: number): number {
+  const index = Math.ceil((p / 100) * sorted.length) - 1;
+  return sorted[Math.max(0, index)];
+}
+
+function printValidationSummary(validation: ValidationResult): void {
+  console.log('\n' + '═'.repeat(50));
+  if (validation.passed) {
+    console.log('✓ PASS: All success criteria met');
+  } else {
+    console.log('✗ FAIL: Success criteria not met');
+    for (const failure of validation.failures) {
+      console.log(`  - ${failure}`);
+    }
+  }
+  console.log(
+    `\nFirst-attempt: ${(validation.actual.firstAttemptPassRate * 100).toFixed(1)}% (required: ${validation.criteria.firstAttemptPassRate * 100}%)`,
+  );
+  console.log(
+    `With-retry:    ${(validation.actual.withRetryPassRate * 100).toFixed(1)}% (required: ${validation.criteria.withRetryPassRate * 100}%)`,
+  );
+  console.log('═'.repeat(50));
+}
+
+function printQualitySummary(results: EvalResult[]): void {
+  const withQuality = results.filter((r) => r.qualityGrade);
+  if (withQuality.length === 0) return;
+
+  console.log('\nQuality Summary:');
+  console.log('─'.repeat(40));
+
+  // Average by dimension
+  const dimensionSums = { codeStyle: 0, minimalism: 0, errorHandling: 0, idiomatic: 0 };
+  for (const result of withQuality) {
+    for (const [dim, score] of Object.entries(result.qualityGrade!.dimensions)) {
+      dimensionSums[dim as keyof typeof dimensionSums] += score;
+    }
+  }
+
+  for (const [dim, sum] of Object.entries(dimensionSums)) {
+    const avg = sum / withQuality.length;
+    console.log(`  ${dim}: ${avg.toFixed(1)}/5`);
+  }
+
+  const overallAvg = withQuality.reduce((sum, r) => sum + r.qualityGrade!.score, 0) / withQuality.length;
+  console.log(`\n  Overall: ${overallAvg.toFixed(1)}/5`);
+}
+
+function buildToolCallSummary(result: EvalResult): string {
+  // We don't have tool call data on EvalResult currently,
+  // but latencyMetrics.toolBreakdown has aggregate counts
+  const breakdown = result.latencyMetrics?.toolBreakdown;
+  if (!breakdown || breakdown.length === 0) {
+    return 'No tool call data';
+  }
+
+  return breakdown
+    .map((t) => `${t.count} ${t.tool}`)
+    .join(', ');
+}
diff --git a/tests/evals/success-criteria.spec.ts b/tests/evals/success-criteria.spec.ts
new file mode 100644
index 00000000..ba626049
--- /dev/null
+++ b/tests/evals/success-criteria.spec.ts
@@ -0,0 +1,137 @@
+import { describe, it, expect } from 'vitest';
+import { validateResults, DEFAULT_CRITERIA, type SuccessCriteria } from './success-criteria.js';
+import type { EvalResult } from './types.js';
+
+function makeResult(passed: boolean, attempts: number = 1): EvalResult {
+  return {
+    scenario: `test-${Math.random().toString(36).slice(2)}`,
+    passed,
+    duration: 1000,
+    attempts,
+  };
+}
+
+describe('success-criteria', () => {
+  describe('DEFAULT_CRITERIA', () => {
+    it('has expected default thresholds', () => {
+      expect(DEFAULT_CRITERIA.firstAttemptPassRate).toBe(0.9);
+      expect(DEFAULT_CRITERIA.withRetryPassRate).toBe(0.95);
+    });
+  });
+
+  describe('validateResults', () => {
+    it('returns passed=true when all criteria met', () => {
+      // 10 results, 9 passed on first attempt, 1 passed on retry
+      const results: EvalResult[] = [
+        ...Array(9)
+          .fill(null)
+          .map(() => makeResult(true, 1)),
+        makeResult(true, 2),
+      ];
+
+      const validation = validateResults(results);
+
+      expect(validation.passed).toBe(true);
+      expect(validation.failures).toHaveLength(0);
+      expect(validation.actual.firstAttemptPassRate).toBe(0.9);
+      expect(validation.actual.withRetryPassRate).toBe(1);
+    });
+
+    it('returns passed=false when first-attempt rate below threshold', () => {
+      // 10 results, only 8 passed on first attempt
+      const results: EvalResult[] = [
+        ...Array(8)
+          .fill(null)
+          .map(() => makeResult(true, 1)),
+        makeResult(true, 2),
+        makeResult(true, 2),
+      ];
+
+      const validation = validateResults(results);
+
+      expect(validation.passed).toBe(false);
+      expect(validation.failures).toHaveLength(1);
+      expect(validation.failures[0]).toContain('First-attempt');
+      expect(validation.failures[0]).toContain('80.0%');
+    });
+
+    it('returns passed=false when with-retry rate below threshold', () => {
+      // 10 results, 9 passed first attempt, 1 failed entirely
+      const results: EvalResult[] = [
+        ...Array(9)
+          .fill(null)
+          .map(() => makeResult(true, 1)),
+        makeResult(false, 3),
+      ];
+
+      const validation = validateResults(results);
+
+      expect(validation.passed).toBe(false);
+      expect(validation.failures).toHaveLength(1);
+      expect(validation.failures[0]).toContain('With-retry');
+    });
+
+    it('returns both failures when both criteria not met', () => {
+      // 10 results, 7 passed first attempt, 1 failed
+      const results: EvalResult[] = [
+        ...Array(7)
+          .fill(null)
+          .map(() => makeResult(true, 1)),
+        makeResult(true, 2),
+        makeResult(true, 2),
+        makeResult(false, 3),
+      ];
+
+      const validation = validateResults(results);
+
+      expect(validation.passed).toBe(false);
+      expect(validation.failures).toHaveLength(2);
+    });
+
+    it('handles empty results array', () => {
+      const validation = validateResults([]);
+
+      expect(validation.passed).toBe(false);
+      expect(validation.actual.firstAttemptPassRate).toBe(0);
+      expect(validation.actual.withRetryPassRate).toBe(0);
+    });
+
+    it('respects custom criteria', () => {
+      const customCriteria: SuccessCriteria = {
+        firstAttemptPassRate: 0.5,
+        withRetryPassRate: 0.6,
+      };
+
+      // 5 out of 10 passed first attempt, 6 passed with retry
+      const results: EvalResult[] = [
+        ...Array(5)
+          .fill(null)
+          .map(() => makeResult(true, 1)),
+        makeResult(true, 2),
+        ...Array(4)
+          .fill(null)
+          .map(() => makeResult(false, 3)),
+      ];
+
+      const validation = validateResults(results, customCriteria);
+
+      expect(validation.passed).toBe(true);
+      expect(validation.criteria).toBe(customCriteria);
+    });
+
+    it('passes when exactly at threshold', () => {
+      // Exactly 90% first-attempt, 95% with-retry
+      const results: EvalResult[] = [
+        ...Array(18)
+          .fill(null)
+          .map(() => makeResult(true, 1)),
+        makeResult(true, 2),
+        makeResult(false, 3),
+      ];
+
+      const validation = validateResults(results);
+
+      expect(validation.passed).toBe(true);
+    });
+  });
+});
diff --git a/tests/evals/success-criteria.ts b/tests/evals/success-criteria.ts
new file mode 100644
index 00000000..0f67ed74
--- /dev/null
+++ b/tests/evals/success-criteria.ts
@@ -0,0 +1,64 @@
+import type { EvalResult } from './types.js';
+
+/**
+ * Success criteria thresholds for eval runs.
+ * Used to determine if an eval run meets quality bar for CI/CD.
+ */
+export interface SuccessCriteria {
+  /** Minimum pass rate on first attempt (0-1) */
+  firstAttemptPassRate: number;
+  /** Minimum pass rate with retries (0-1) */
+  withRetryPassRate: number;
+  /** Maximum duration per scenario in ms (optional, for future use) */
+  maxDurationMs?: number;
+}
+
+/** Default thresholds for CI enforcement */
+export const DEFAULT_CRITERIA: SuccessCriteria = {
+  firstAttemptPassRate: 0.9,
+  withRetryPassRate: 0.95,
+};
+
+export interface ValidationResult {
+  passed: boolean;
+  criteria: SuccessCriteria;
+  actual: {
+    firstAttemptPassRate: number;
+    withRetryPassRate: number;
+  };
+  failures: string[];
+}
+
+/**
+ * Validate eval results against success criteria thresholds.
+ * Returns detailed breakdown of pass/fail status with actionable messages.
+ */
+export function validateResults(
+  results: EvalResult[],
+  criteria: SuccessCriteria = DEFAULT_CRITERIA,
+): ValidationResult {
+  const firstAttemptPassed = results.filter((r) => r.attempts === 1 && r.passed).length;
+  const totalPassed = results.filter((r) => r.passed).length;
+
+  const firstAttemptRate = results.length > 0 ? firstAttemptPassed / results.length : 0;
+  const withRetryRate = results.length > 0 ? totalPassed / results.length : 0;
+
+  const failures: string[] = [];
+  if (firstAttemptRate < criteria.firstAttemptPassRate) {
+    failures.push(
+      `First-attempt pass rate ${(firstAttemptRate * 100).toFixed(1)}% < ${(criteria.firstAttemptPassRate * 100)}% required`,
+    );
+  }
+  if (withRetryRate < criteria.withRetryPassRate) {
+    failures.push(
+      `With-retry pass rate ${(withRetryRate * 100).toFixed(1)}% < ${(criteria.withRetryPassRate * 100)}% required`,
+    );
+  }
+
+  return {
+    passed: failures.length === 0,
+    criteria,
+    actual: { firstAttemptPassRate: firstAttemptRate, withRetryPassRate: withRetryRate },
+    failures,
+  };
+}
diff --git a/tests/evals/types.ts b/tests/evals/types.ts
index 106b42e2..3f626d29 100644
--- a/tests/evals/types.ts
+++ b/tests/evals/types.ts
@@ -23,6 +23,25 @@ export interface EvalResult {
   agentOutput?: string;
   error?: string;
   attempts?: number;
+  latencyMetrics?: LatencyMetrics;
+  qualityGrade?: QualityGrade;
+  /** Key integration files for quality grading (replaces raw diff) */
+  keyFiles?: Map<string, string>;
+}
+
+/** Input for quality grading - structured data instead of raw diff */
+export interface QualityInput {
+  framework: string;
+  /** Map of relative file paths to their contents */
+  keyFiles: Map<string, string>;
+  metadata: {
+    filesCreated: string[];
+    filesModified: string[];
+    /** Summary like "12 writes, 3 reads, 2 bash" */
+    toolCallSummary: string;
+    /** Check names that passed, e.g. ["middleware exists", "build succeeds"] */
+    checksPassed: string[];
+  };
 }
 
 export interface EvalOptions {
@@ -38,3 +57,41 @@ export interface ToolCall {
   output?: string;
   durationMs?: number;
 }
+
+/** Metadata captured at eval run start for version tracking */
+export interface EvalResultMetadata {
+  skillVersions: Record<string, string>;
+  cliVersion: string;
+  modelVersion: string;
+  timestamp: string;
+}
+
+/** Latency metrics for performance tracking (Phase 3 stub) */
+export interface LatencyMetrics {
+  ttftMs: number | null;
+  agentThinkingMs: number;
+  toolExecutionMs: number;
+  totalDurationMs: number;
+  tokenMetrics?: {
+    inputTokens: number;
+    outputTokens: number;
+    tokensPerSecond: number;
+  };
+  toolBreakdown?: Array<{
+    tool: string;
+    durationMs: number;
+    count: number;
+  }>;
+}
+
+/** Quality grading dimensions (Phase 4 stub) */
+export interface QualityGrade {
+  score: number;
+  dimensions: {
+    codeStyle: number;
+    minimalism: number;
+    errorHandling: number;
+    idiomatic: number;
+  };
+  reasoning: string;
+}
diff --git a/tests/evals/versioning.spec.ts b/tests/evals/versioning.spec.ts
new file mode 100644
index 00000000..615f50d8
--- /dev/null
+++ b/tests/evals/versioning.spec.ts
@@ -0,0 +1,114 @@
+import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
+import { captureVersionMetadata, getFileHash } from './versioning.js';
+
+// Mock exec-file to avoid actual git calls in tests
+vi.mock('../../src/utils/exec-file.js', () => ({
+  execFileNoThrow: vi.fn(),
+}));
+
+// Mock settings
+vi.mock('../../src/lib/settings.js', () => ({
+  getVersion: vi.fn(() => '1.2.3'),
+  getConfig: vi.fn(() => ({ model: 'claude-opus-4-5-20251101' })),
+}));
+
+import { execFileNoThrow } from '../../src/utils/exec-file.js';
+
+describe('versioning', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+  });
+
+  describe('getFileHash', () => {
+    it('returns short hash when git succeeds', async () => {
+      vi.mocked(execFileNoThrow).mockResolvedValue({
+        status: 0,
+        stdout: 'abc123def456789\n',
+        stderr: '',
+      });
+
+      const hash = await getFileHash('test/file.ts');
+
+      expect(hash).toBe('abc123de'); // First 8 chars
+      expect(execFileNoThrow).toHaveBeenCalledWith('git', ['hash-object', 'test/file.ts'], expect.any(Object));
+    });
+
+    it('returns "unknown" when git fails', async () => {
+      vi.mocked(execFileNoThrow).mockResolvedValue({
+        status: 128,
+        stdout: '',
+        stderr: 'fatal: not a git repository',
+      });
+
+      const hash = await getFileHash('test/file.ts');
+
+      expect(hash).toBe('unknown');
+    });
+
+    it('returns "unknown" when file not found', async () => {
+      vi.mocked(execFileNoThrow).mockResolvedValue({
+        status: 128,
+        stdout: '',
+        stderr: 'fatal: Cannot open',
+      });
+
+      const hash = await getFileHash('nonexistent.ts');
+
+      expect(hash).toBe('unknown');
+    });
+  });
+
+  describe('captureVersionMetadata', () => {
+    it('returns all required fields', async () => {
+      vi.mocked(execFileNoThrow).mockResolvedValue({
+        status: 0,
+        stdout: '12345678abcdef\n',
+        stderr: '',
+      });
+
+      const metadata = await captureVersionMetadata();
+
+      expect(metadata.cliVersion).toBe('1.2.3');
+      expect(metadata.modelVersion).toBe('claude-opus-4-5-20251101');
+      expect(metadata.skillVersions).toBeDefined();
+      expect(typeof metadata.skillVersions.nextjs).toBe('string');
+      expect(typeof metadata.skillVersions.react).toBe('string');
+      expect(typeof metadata.skillVersions['react-router']).toBe('string');
+      expect(typeof metadata.skillVersions['tanstack-start']).toBe('string');
+      expect(typeof metadata.skillVersions['vanilla-js']).toBe('string');
+    });
+
+    it('captures hashes for all frameworks', async () => {
+      let callCount = 0;
+      vi.mocked(execFileNoThrow).mockImplementation(async () => {
+        callCount++;
+        return {
+          status: 0,
+          stdout: `hash${callCount}000000\n`,
+          stderr: '',
+        };
+      });
+
+      const metadata = await captureVersionMetadata();
+
+      // Should have called git hash-object for each framework
+      expect(execFileNoThrow).toHaveBeenCalledTimes(5);
+      expect(Object.keys(metadata.skillVersions)).toHaveLength(5);
+    });
+
+    it('handles mixed success/failure gracefully', async () => {
+      vi.mocked(execFileNoThrow).mockImplementation(async (_cmd, args) => {
+        const path = args[1] as string;
+        if (path.includes('nextjs')) {
+          return { status: 0, stdout: 'abc12345\n', stderr: '' };
+        }
+        return { status: 128, stdout: '', stderr: 'not found' };
+      });
+
+      const metadata = await captureVersionMetadata();
+
+      expect(metadata.skillVersions.nextjs).toBe('abc12345');
+      expect(metadata.skillVersions.react).toBe('unknown');
+    });
+  });
+});
diff --git a/tests/evals/versioning.ts b/tests/evals/versioning.ts
new file mode 100644
index 00000000..6418a4fe
--- /dev/null
+++ b/tests/evals/versioning.ts
@@ -0,0 +1,59 @@
+import { execFileNoThrow } from '../../src/utils/exec-file.js';
+import { getConfig, getVersion } from '../../src/lib/settings.js';
+import { readFile } from 'node:fs/promises';
+import { join } from 'node:path';
+
+/**
+ * Mapping of framework names to their primary skill/agent files.
+ * Used to compute git hashes for version tracking.
+ */
+const SKILL_FILES: Record<string, string> = {
+  nextjs: 'src/nextjs/nextjs-installer-agent.ts',
+  react: 'src/react/react-installer-agent.ts',
+  'react-router': 'src/react-router/react-router-installer-agent.ts',
+  'tanstack-start': 'src/tanstack-start/tanstack-start-installer-agent.ts',
+  'vanilla-js': 'src/vanilla-js/vanilla-js-installer-agent.ts',
+};
+
+export interface VersionMetadata {
+  skillVersions: Record<string, string>;
+  cliVersion: string;
+  modelVersion: string;
+}
+
+/**
+ * Capture version metadata at eval start.
+ * Includes git hashes of skill files and CLI/model versions.
+ */
+export async function captureVersionMetadata(): Promise<VersionMetadata> {
+  const skillVersions: Record<string, string> = {};
+
+  for (const [framework, filePath] of Object.entries(SKILL_FILES)) {
+    const hash = await getFileHash(filePath);
+    skillVersions[framework] = hash;
+  }
+
+  return {
+    skillVersions,
+    cliVersion: getVersion(),
+    modelVersion: getConfig().model,
+  };
+}
+
+/**
+ * Get short git hash of a file using git hash-object.
+ * Returns 'unknown' if git is unavailable or file doesn't exist.
+ */
+async function getFileHash(filePath: string): Promise<string> {
+  const result = await execFileNoThrow('git', ['hash-object', filePath], {
+    cwd: process.cwd(),
+    timeout: 5000,
+  });
+
+  if (result.status === 0) {
+    return result.stdout.trim().slice(0, 8); // Short hash
+  }
+  return 'unknown';
+}
+
+export { getFileHash };
diff --git a/tests/fixtures/nextjs/conflicting-middleware/README.md b/tests/fixtures/nextjs/conflicting-middleware/README.md
new file mode 100644
index 00000000..529ffdab
--- /dev/null
+++ b/tests/fixtures/nextjs/conflicting-middleware/README.md
@@ -0,0 +1,35 @@
+# Next.js - Conflicting Middleware Fixture
+
+## Edge Case Description
+
+This fixture has existing middleware with rate limiting and custom header injection. The agent must MERGE AuthKit middleware with the existing logic, not replace it.
+
+## Expected Agent Behavior
+
+- Detect existing middleware.ts
+- Integrate authkitMiddleware while PRESERVING:
+  - Rate limiting logic
+  - Custom header injection (X-App-Version, X-Request-Id, X-RateLimit-*)
+  - Request logging
+- Combine the matcher configurations appropriately
+
+## Files of Interest
+
+- `middleware.ts` - Has rate limiting, logging, and custom headers that MUST be preserved
+
+## Success Criteria
+
+- [ ] AuthKit middleware is integrated
+- [ ] Rate limiting logic is preserved
+- [ ] Custom headers (X-App-Version, X-Request-Id) still added
+- [ ] Request logging still works
+- [ ] Build succeeds
+
+## Notes
+
+This is a critical edge case. Many production apps have existing middleware for security, monitoring, or custom logic. The agent should compose middleware rather than overwrite.
+
+Ideal solution patterns:
+1. Chain middlewares: Call authkitMiddleware first, then apply custom logic
+2. Wrap middlewares: Create a composed middleware function
+3. Conditional routing: Apply different middleware based on path
diff --git a/tests/fixtures/nextjs/conflicting-middleware/app/about/page.tsx b/tests/fixtures/nextjs/conflicting-middleware/app/about/page.tsx
new file mode 100644
index 00000000..e26b5ea3
--- /dev/null
+++ b/tests/fixtures/nextjs/conflicting-middleware/app/about/page.tsx
@@ -0,0 +1,8 @@
+export default function About() {
+  return (
+    <main>
+      <h1>About</h1>
+      <p>This is an existing Next.js application with custom middleware.</p>
+    </main>
+  );
+}
diff --git a/tests/fixtures/nextjs/conflicting-middleware/app/dashboard/page.tsx b/tests/fixtures/nextjs/conflicting-middleware/app/dashboard/page.tsx
new file mode 100644
index 00000000..34819842
--- /dev/null
+++ b/tests/fixtures/nextjs/conflicting-middleware/app/dashboard/page.tsx
@@ -0,0 +1,8 @@
+export default function Dashboard() {
+  return (
+    <main>
+      <h1>Dashboard</h1>
+      <p>Protected content would go here.</p>
+    </main>
+  );
+}
diff --git a/tests/fixtures/nextjs/conflicting-middleware/app/layout.tsx b/tests/fixtures/nextjs/conflicting-middleware/app/layout.tsx
new file mode 100644
index 00000000..9d0c20c8
--- /dev/null
+++ b/tests/fixtures/nextjs/conflicting-middleware/app/layout.tsx
@@ -0,0 +1,14 @@
+import Link from 'next/link';
+
+export default function RootLayout({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <body>
+        <nav>
+          <Link href="/">Home</Link> | <Link href="/about">About</Link> | <Link href="/dashboard">Dashboard</Link>
+        </nav>
+        {children}
+      </body>
+    </html>
+  );
+}
diff --git a/tests/fixtures/nextjs/conflicting-middleware/app/page.tsx b/tests/fixtures/nextjs/conflicting-middleware/app/page.tsx
new file mode 100644
index 00000000..12c593e3
--- /dev/null
+++ b/tests/fixtures/nextjs/conflicting-middleware/app/page.tsx
@@ -0,0 +1,8 @@
+export default function Home() {
+  return (
+    <main>
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </main>
+  );
+}
diff --git a/tests/fixtures/nextjs/conflicting-middleware/middleware.ts b/tests/fixtures/nextjs/conflicting-middleware/middleware.ts
new file mode 100644
index 00000000..5c0d8026
--- /dev/null
+++ b/tests/fixtures/nextjs/conflicting-middleware/middleware.ts
@@ -0,0 +1,58 @@
+import { NextResponse } from 'next/server';
+import type { NextRequest } from 'next/server';
+
+// Simple in-memory rate limiting (for demo purposes)
+const requestCounts = new Map<string, { count: number; resetTime: number }>();
+const RATE_LIMIT = 100;
+const WINDOW_MS = 60 * 1000; // 1 minute
+
+function getRateLimitInfo(ip: string): { allowed: boolean; remaining: number } {
+  const now = Date.now();
+  const record = requestCounts.get(ip);
+
+  if (!record || now > record.resetTime) {
+    requestCounts.set(ip, { count: 1, resetTime: now + WINDOW_MS });
+    return { allowed: true, remaining: RATE_LIMIT - 1 };
+  }
+
+  if (record.count >= RATE_LIMIT) {
+    return { allowed: false, remaining: 0 };
+  }
+
+  record.count++;
+  return { allowed: true, remaining: RATE_LIMIT - record.count };
+}
+
+export function middleware(request: NextRequest) {
+  const ip = request.headers.get('x-forwarded-for')?.split(',')[0] || 'unknown';
+  const { allowed, remaining } = getRateLimitInfo(ip);
+
+  // Log request for monitoring
+  console.log(`[${new Date().toISOString()}] ${request.method} ${request.nextUrl.pathname} from ${ip}`);
+
+  // Rate limit check
+  if (!allowed) {
+    return new NextResponse('Too Many Requests', {
+      status: 429,
+      headers: {
+        'Retry-After': '60',
+        'X-RateLimit-Limit': String(RATE_LIMIT),
+        'X-RateLimit-Remaining': '0',
+      },
+    });
+  }
+
+  const response = NextResponse.next();
+
+  // Add custom security headers
+  response.headers.set('X-App-Version', '1.0.0');
+  response.headers.set('X-Request-Id', crypto.randomUUID());
+  response.headers.set('X-RateLimit-Limit', String(RATE_LIMIT));
+  response.headers.set('X-RateLimit-Remaining', String(remaining));
+
+  return response;
+}
+
+export const config = {
+  matcher: ['/api/:path*', '/dashboard/:path*'],
+};
diff --git a/tests/fixtures/nextjs/conflicting-middleware/next.config.mjs b/tests/fixtures/nextjs/conflicting-middleware/next.config.mjs
new file mode 100644
index 00000000..4678774e
--- /dev/null
+++ b/tests/fixtures/nextjs/conflicting-middleware/next.config.mjs
@@ -0,0 +1,4 @@
+/** @type {import('next').NextConfig} */
+const nextConfig = {};
+
+export default nextConfig;
diff --git a/tests/fixtures/nextjs/conflicting-middleware/package.json b/tests/fixtures/nextjs/conflicting-middleware/package.json
new file mode 100644
index 00000000..40cd3b2e
--- /dev/null
+++ b/tests/fixtures/nextjs/conflicting-middleware/package.json
@@ -0,0 +1,21 @@
+{
+  "name": "nextjs-conflicting-middleware-fixture",
+  "version": "0.0.1",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start"
+  },
+  "dependencies": {
+    "next": "^14.2.0",
+    "react": "^18.3.0",
+    "react-dom": "^18.3.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "@types/react": "^18.3.0",
+    "@types/react-dom": "^18.3.0",
+    "typescript": "^5.4.0"
+  }
+}
diff --git a/tests/fixtures/nextjs/conflicting-middleware/tsconfig.json b/tests/fixtures/nextjs/conflicting-middleware/tsconfig.json
new file mode 100644
index 00000000..e7ff90fd
--- /dev/null
+++ b/tests/fixtures/nextjs/conflicting-middleware/tsconfig.json
@@ -0,0 +1,26 @@
+{
+  "compilerOptions": {
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "preserve",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./*"]
+    }
+  },
+  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
+  "exclude": ["node_modules"]
+}
diff --git a/tests/fixtures/nextjs/example-auth0/app/layout.tsx b/tests/fixtures/nextjs/example-auth0/app/layout.tsx
index 69bcb6b5..a90894d9 100644
--- a/tests/fixtures/nextjs/example-auth0/app/layout.tsx
+++ b/tests/fixtures/nextjs/example-auth0/app/layout.tsx
@@ -1,15 +1,25 @@
+'use client';
+
 import Link from 'next/link';
-import { UserProvider } from '@auth0/nextjs-auth0/client';
+import { UserProvider, useUser } from '@auth0/nextjs-auth0/client';
+
+function Nav() {
+  const { user, isLoading } = useUser();
+
+  return (
+    <nav>
+      <Link href="/">Home</Link> | <Link href="/about">About</Link> | <Link href="/dashboard">Dashboard</Link> |{' '}
+      {!isLoading && (user ? <a href="/api/auth/logout">Logout</a> : <a href="/api/auth/login">Login</a>)}
+    </nav>
+  );
+}
 
 export default function RootLayout({ children }: { children: React.ReactNode }) {
   return (
     <html lang="en">
       <body>
         <UserProvider>
-          <nav>
-            <Link href="/">Home</Link> | <Link href="/about">About</Link> | <Link href="/dashboard">Dashboard</Link> |{' '}
-            <a href="/api/auth/login">Login</a> | <a href="/api/auth/logout">Logout</a>
-          </nav>
+          <Nav />
           {children}
         </UserProvider>
       </body>
diff --git a/tests/fixtures/nextjs/example-auth0/next-env.d.ts b/tests/fixtures/nextjs/example-auth0/next-env.d.ts
new file mode 100644
index 00000000..40c3d680
--- /dev/null
+++ b/tests/fixtures/nextjs/example-auth0/next-env.d.ts
@@ -0,0 +1,5 @@
+/// <reference types="next" />
+/// <reference types="next/image-types/global" />
+
+// NOTE: This file should not be edited
+// see https://nextjs.org/docs/app/building-your-application/configuring/typescript for more information.
diff --git a/tests/fixtures/nextjs/example/next-env.d.ts b/tests/fixtures/nextjs/example/next-env.d.ts
new file mode 100644
index 00000000..40c3d680
--- /dev/null
+++ b/tests/fixtures/nextjs/example/next-env.d.ts
@@ -0,0 +1,5 @@
+/// <reference types="next" />
+/// <reference types="next/image-types/global" />
+
+// NOTE: This file should not be edited
+// see https://nextjs.org/docs/app/building-your-application/configuring/typescript for more information.
diff --git a/tests/fixtures/nextjs/partial-install/README.md b/tests/fixtures/nextjs/partial-install/README.md
new file mode 100644
index 00000000..d29519c6
--- /dev/null
+++ b/tests/fixtures/nextjs/partial-install/README.md
@@ -0,0 +1,31 @@
+# Next.js - Partial Install Fixture
+
+## Edge Case Description
+
+This fixture represents a project where AuthKit was partially installed - the package is in dependencies but integration was never completed. This tests the agent's ability to detect and complete abandoned installations.
+
+## Expected Agent Behavior
+
+- Detect that @workos-inc/authkit-nextjs is already installed
+- Complete the integration by:
+  - Adding AuthKitProvider to layout.tsx
+  - Creating middleware.ts
+  - Creating callback route
+- Should NOT reinstall the package
+
+## Files of Interest
+
+- `package.json` - Already has @workos-inc/authkit-nextjs dependency
+- `app/layout.tsx` - Has commented-out import as signal of abandoned attempt
+
+## Success Criteria
+
+- [ ] AuthKitProvider wraps the app in layout.tsx
+- [ ] middleware.ts is created with authkitMiddleware
+- [ ] Callback route is created at app/api/auth/callback/route.ts
+- [ ] Build succeeds with no type errors
+- [ ] Package is not reinstalled (already present)
+
+## Notes
+
+This is a common scenario when developers start integration but get interrupted or confused.
diff --git a/tests/fixtures/nextjs/partial-install/app/about/page.tsx b/tests/fixtures/nextjs/partial-install/app/about/page.tsx
new file mode 100644
index 00000000..b680e06d
--- /dev/null
+++ b/tests/fixtures/nextjs/partial-install/app/about/page.tsx
@@ -0,0 +1,8 @@
+export default function About() {
+  return (
+    <main>
+      <h1>About</h1>
+      <p>This is an existing Next.js application.</p>
+    </main>
+  );
+}
diff --git a/tests/fixtures/nextjs/partial-install/app/dashboard/page.tsx b/tests/fixtures/nextjs/partial-install/app/dashboard/page.tsx
new file mode 100644
index 00000000..34819842
--- /dev/null
+++ b/tests/fixtures/nextjs/partial-install/app/dashboard/page.tsx
@@ -0,0 +1,8 @@
+export default function Dashboard() {
+  return (
+    <main>
+      <h1>Dashboard</h1>
+      <p>Protected content would go here.</p>
+    </main>
+  );
+}
diff --git a/tests/fixtures/nextjs/partial-install/app/layout.tsx b/tests/fixtures/nextjs/partial-install/app/layout.tsx
new file mode 100644
index 00000000..aeeadb99
--- /dev/null
+++ b/tests/fixtures/nextjs/partial-install/app/layout.tsx
@@ -0,0 +1,16 @@
+import Link from 'next/link';
+// TODO: Set up AuthKit
+// import { AuthKitProvider } from '@workos-inc/authkit-nextjs';
+
+export default function RootLayout({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <body>
+        <nav>
+          <Link href="/">Home</Link> | <Link href="/about">About</Link> | <Link href="/dashboard">Dashboard</Link>
+        </nav>
+        {children}
+      </body>
+    </html>
+  );
+}
diff --git a/tests/fixtures/nextjs/partial-install/app/page.tsx b/tests/fixtures/nextjs/partial-install/app/page.tsx
new file mode 100644
index 00000000..12c593e3
--- /dev/null
+++ b/tests/fixtures/nextjs/partial-install/app/page.tsx
@@ -0,0 +1,8 @@
+export default function Home() {
+  return (
+    <main>
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </main>
+  );
+}
diff --git a/tests/fixtures/nextjs/partial-install/next.config.mjs b/tests/fixtures/nextjs/partial-install/next.config.mjs
new file mode 100644
index 00000000..4678774e
--- /dev/null
+++ b/tests/fixtures/nextjs/partial-install/next.config.mjs
@@ -0,0 +1,4 @@
+/** @type {import('next').NextConfig} */
+const nextConfig = {};
+
+export default nextConfig;
diff --git a/tests/fixtures/nextjs/partial-install/package.json b/tests/fixtures/nextjs/partial-install/package.json
new file mode 100644
index 00000000..e0332a1c
--- /dev/null
+++ b/tests/fixtures/nextjs/partial-install/package.json
@@ -0,0 +1,22 @@
+{
+  "name": "nextjs-partial-install-fixture",
+  "version": "0.0.1",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start"
+  },
+  "dependencies": {
+    "@workos-inc/authkit-nextjs": "^0.15.0",
+    "next": "^14.2.0",
+    "react": "^18.3.0",
+    "react-dom": "^18.3.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "@types/react": "^18.3.0",
+    "@types/react-dom": "^18.3.0",
+    "typescript": "^5.4.0"
+  }
+}
diff --git a/tests/fixtures/nextjs/partial-install/tsconfig.json b/tests/fixtures/nextjs/partial-install/tsconfig.json
new file mode 100644
index 00000000..e7ff90fd
--- /dev/null
+++ b/tests/fixtures/nextjs/partial-install/tsconfig.json
@@ -0,0 +1,26 @@
+{
+  "compilerOptions": {
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "preserve",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./*"]
+    }
+  },
+  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
+  "exclude": ["node_modules"]
+}
diff --git a/tests/fixtures/nextjs/typescript-strict/README.md b/tests/fixtures/nextjs/typescript-strict/README.md
new file mode 100644
index 00000000..abffb2bc
--- /dev/null
+++ b/tests/fixtures/nextjs/typescript-strict/README.md
@@ -0,0 +1,29 @@
+# Next.js - TypeScript Strict Fixture
+
+## Edge Case Description
+
+This fixture has the strictest TypeScript configuration possible. It tests whether the agent generates fully type-safe code that passes strict checks.
+
+## Expected Agent Behavior
+
+- Generate code with explicit return types
+- Use proper type annotations (no implicit any)
+- Handle null/undefined properly with strictNullChecks
+- Not introduce unused variables or parameters
+
+## Files of Interest
+
+- `tsconfig.json` - Has all strict flags enabled including exactOptionalPropertyTypes and noUncheckedIndexedAccess
+- All `.tsx` files - Have explicit return types that agent must maintain pattern
+
+## Success Criteria
+
+- [ ] `pnpm build` passes with zero type errors
+- [ ] Generated middleware.ts has proper types
+- [ ] Generated callback route has proper types
+- [ ] No implicit any errors
+- [ ] No unused variable/parameter errors
+
+## Notes
+
+This is critical for enterprise codebases that enforce strict TypeScript. Agent-generated code must not break the build.
diff --git a/tests/fixtures/nextjs/typescript-strict/app/about/page.tsx b/tests/fixtures/nextjs/typescript-strict/app/about/page.tsx
new file mode 100644
index 00000000..8c5a9cf9
--- /dev/null
+++ b/tests/fixtures/nextjs/typescript-strict/app/about/page.tsx
@@ -0,0 +1,8 @@
+export default function About(): JSX.Element {
+  return (
+    <main>
+      <h1>About</h1>
+      <p>This is an existing Next.js application with strict TypeScript.</p>
+    </main>
+  );
+}
diff --git a/tests/fixtures/nextjs/typescript-strict/app/dashboard/page.tsx b/tests/fixtures/nextjs/typescript-strict/app/dashboard/page.tsx
new file mode 100644
index 00000000..a0765233
--- /dev/null
+++ b/tests/fixtures/nextjs/typescript-strict/app/dashboard/page.tsx
@@ -0,0 +1,8 @@
+export default function Dashboard(): JSX.Element {
+  return (
+    <main>
+      <h1>Dashboard</h1>
+      <p>Protected content would go here.</p>
+    </main>
+  );
+}
diff --git a/tests/fixtures/nextjs/typescript-strict/app/layout.tsx b/tests/fixtures/nextjs/typescript-strict/app/layout.tsx
new file mode 100644
index 00000000..8ddc7296
--- /dev/null
+++ b/tests/fixtures/nextjs/typescript-strict/app/layout.tsx
@@ -0,0 +1,19 @@
+import Link from 'next/link';
+import type { ReactNode } from 'react';
+
+interface RootLayoutProps {
+  children: ReactNode;
+}
+
+export default function RootLayout({ children }: RootLayoutProps): JSX.Element {
+  return (
+    <html lang="en">
+      <body>
+        <nav>
+          <Link href="/">Home</Link> | <Link href="/about">About</Link> | <Link href="/dashboard">Dashboard</Link>
+        </nav>
+        {children}
+      </body>
+    </html>
+  );
+}
diff --git a/tests/fixtures/nextjs/typescript-strict/app/page.tsx b/tests/fixtures/nextjs/typescript-strict/app/page.tsx
new file mode 100644
index 00000000..4d9dda50
--- /dev/null
+++ b/tests/fixtures/nextjs/typescript-strict/app/page.tsx
@@ -0,0 +1,8 @@
+export default function Home(): JSX.Element {
+  return (
+    <main>
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </main>
+  );
+}
diff --git a/tests/fixtures/nextjs/typescript-strict/next.config.mjs b/tests/fixtures/nextjs/typescript-strict/next.config.mjs
new file mode 100644
index 00000000..4678774e
--- /dev/null
+++ b/tests/fixtures/nextjs/typescript-strict/next.config.mjs
@@ -0,0 +1,4 @@
+/** @type {import('next').NextConfig} */
+const nextConfig = {};
+
+export default nextConfig;
diff --git a/tests/fixtures/nextjs/typescript-strict/package.json b/tests/fixtures/nextjs/typescript-strict/package.json
new file mode 100644
index 00000000..2899353c
--- /dev/null
+++ b/tests/fixtures/nextjs/typescript-strict/package.json
@@ -0,0 +1,21 @@
+{
+  "name": "nextjs-typescript-strict-fixture",
+  "version": "0.0.1",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start"
+  },
+  "dependencies": {
+    "next": "^14.2.0",
+    "react": "^18.3.0",
+    "react-dom": "^18.3.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.0.0",
+    "@types/react": "^18.3.0",
+    "@types/react-dom": "^18.3.0",
+    "typescript": "^5.4.0"
+  }
+}
diff --git a/tests/fixtures/nextjs/typescript-strict/tsconfig.json b/tests/fixtures/nextjs/typescript-strict/tsconfig.json
new file mode 100644
index 00000000..2da1d6ae
--- /dev/null
+++ b/tests/fixtures/nextjs/typescript-strict/tsconfig.json
@@ -0,0 +1,38 @@
+{
+  "compilerOptions": {
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": false,
+    "skipLibCheck": true,
+    "strict": true,
+    "noImplicitAny": true,
+    "strictNullChecks": true,
+    "strictFunctionTypes": true,
+    "strictBindCallApply": true,
+    "strictPropertyInitialization": true,
+    "noImplicitThis": true,
+    "noImplicitReturns": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "exactOptionalPropertyTypes": true,
+    "noUncheckedIndexedAccess": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "preserve",
+    "incremental": true,
+    "plugins": [
+      {
+        "name": "next"
+      }
+    ],
+    "paths": {
+      "@/*": ["./*"]
+    },
+    "target": "ES2022"
+  },
+  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
+  "exclude": ["node_modules"]
+}
diff --git a/tests/fixtures/react-router/conflicting-middleware/README.md b/tests/fixtures/react-router/conflicting-middleware/README.md
new file mode 100644
index 00000000..f72618e5
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/README.md
@@ -0,0 +1,31 @@
+# React Router - Conflicting Middleware Fixture
+
+## Edge Case Description
+
+This fixture has existing server middleware logic for rate limiting, request logging, and security headers. The agent must integrate AuthKit while preserving this custom middleware.
+
+## Expected Agent Behavior
+
+- Detect existing middleware in `app/middleware.server.ts`
+- Integrate AuthKit while PRESERVING:
+  - Rate limiting logic
+  - Request logging
+  - Security headers (X-App-Version, X-Content-Type-Options, etc.)
+- Compose AuthKit with existing middleware, don't replace it
+
+## Files of Interest
+
+- `app/middleware.server.ts` - Custom middleware functions
+- `app/routes/dashboard.tsx` - Uses middleware in loader
+
+## Success Criteria
+
+- [ ] AuthKit is integrated
+- [ ] Rate limiting still works
+- [ ] Security headers still added
+- [ ] Request logging still works
+- [ ] Build succeeds
+
+## Notes
+
+React Router v7 uses loaders for server-side logic. The agent should compose AuthKit session checking with existing middleware patterns.
diff --git a/tests/fixtures/react-router/conflicting-middleware/app/middleware.server.ts b/tests/fixtures/react-router/conflicting-middleware/app/middleware.server.ts
new file mode 100644
index 00000000..d110cc31
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/app/middleware.server.ts
@@ -0,0 +1,67 @@
+// Custom server middleware for React Router
+// This file contains middleware logic that should be preserved when adding AuthKit
+
+interface RateLimitRecord {
+  count: number;
+  resetTime: number;
+}
+
+const rateLimitStore = new Map<string, RateLimitRecord>();
+const RATE_LIMIT = 100;
+const WINDOW_MS = 60 * 1000;
+
+export function checkRateLimit(ip: string): { allowed: boolean; remaining: number } {
+  const now = Date.now();
+  const record = rateLimitStore.get(ip);
+
+  if (!record || now > record.resetTime) {
+    rateLimitStore.set(ip, { count: 1, resetTime: now + WINDOW_MS });
+    return { allowed: true, remaining: RATE_LIMIT - 1 };
+  }
+
+  if (record.count >= RATE_LIMIT) {
+    return { allowed: false, remaining: 0 };
+  }
+
+  record.count++;
+  return { allowed: true, remaining: RATE_LIMIT - record.count };
+}
+
+export function logRequest(method: string, path: string, ip: string): void {
+  console.log(`[${new Date().toISOString()}] ${method} ${path} from ${ip}`);
+}
+
+export function addSecurityHeaders(headers: Headers): Headers {
+  headers.set('X-App-Version', '1.0.0');
+  headers.set('X-Content-Type-Options', 'nosniff');
+  headers.set('X-Frame-Options', 'DENY');
+  headers.set('X-XSS-Protection', '1; mode=block');
+  return headers;
+}
+
+export interface MiddlewareContext {
+  ip: string;
+  method: string;
+  path: string;
+}
+
+export async function runMiddleware(ctx: MiddlewareContext): Promise<Response | null> {
+  const { ip, method, path } = ctx;
+
+  // Log all requests
+  logRequest(method, path, ip);
+
+  // Check rate limit for API routes
+  if (path.startsWith('/api')) {
+    const { allowed } = checkRateLimit(ip);
+    if (!allowed) {
+      return new Response('Too Many Requests', {
+        status: 429,
+        headers: { 'Retry-After': '60' },
+      });
+    }
+  }
+
+  // Return null to continue to the route handler
+  return null;
+}
diff --git a/tests/fixtures/react-router/conflicting-middleware/app/root.tsx b/tests/fixtures/react-router/conflicting-middleware/app/root.tsx
new file mode 100644
index 00000000..5471beef
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/app/root.tsx
@@ -0,0 +1,27 @@
+import { Links, Meta, NavLink, Outlet, Scripts, ScrollRestoration } from 'react-router';
+
+export function Layout({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <head>
+        <meta charSet="utf-8" />
+        <meta name="viewport" content="width=device-width, initial-scale=1" />
+        <Meta />
+        <Links />
+      </head>
+      <body>
+        <nav>
+          <NavLink to="/">Home</NavLink> | <NavLink to="/about">About</NavLink> |{' '}
+          <NavLink to="/dashboard">Dashboard</NavLink>
+        </nav>
+        {children}
+        <ScrollRestoration />
+        <Scripts />
+      </body>
+    </html>
+  );
+}
+
+export default function Root() {
+  return <Outlet />;
+}
diff --git a/tests/fixtures/react-router/conflicting-middleware/app/routes.ts b/tests/fixtures/react-router/conflicting-middleware/app/routes.ts
new file mode 100644
index 00000000..3e22bc70
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/app/routes.ts
@@ -0,0 +1,7 @@
+import { type RouteConfig, index, route } from '@react-router/dev/routes';
+
+export default [
+  index('routes/home.tsx'),
+  route('about', 'routes/about.tsx'),
+  route('dashboard', 'routes/dashboard.tsx'),
+] satisfies RouteConfig;
diff --git a/tests/fixtures/react-router/conflicting-middleware/app/routes/about.tsx b/tests/fixtures/react-router/conflicting-middleware/app/routes/about.tsx
new file mode 100644
index 00000000..54aad687
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/app/routes/about.tsx
@@ -0,0 +1,8 @@
+export default function About() {
+  return (
+    <div>
+      <h1>About</h1>
+      <p>This is an existing React Router application with custom middleware.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react-router/conflicting-middleware/app/routes/dashboard.tsx b/tests/fixtures/react-router/conflicting-middleware/app/routes/dashboard.tsx
new file mode 100644
index 00000000..d0cc2ffb
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/app/routes/dashboard.tsx
@@ -0,0 +1,34 @@
+import type { LoaderFunctionArgs } from 'react-router';
+import { runMiddleware, addSecurityHeaders } from '../middleware.server';
+
+export async function loader({ request }: LoaderFunctionArgs) {
+  const url = new URL(request.url);
+  const ip = request.headers.get('x-forwarded-for')?.split(',')[0] || 'unknown';
+
+  // Run custom middleware
+  const middlewareResponse = await runMiddleware({
+    ip,
+    method: request.method,
+    path: url.pathname,
+  });
+
+  if (middlewareResponse) {
+    return middlewareResponse;
+  }
+
+  // Return dashboard data with custom headers
+  const headers = addSecurityHeaders(new Headers());
+
+  return new Response(JSON.stringify({ message: 'Dashboard data' }), {
+    headers,
+  });
+}
+
+export default function Dashboard() {
+  return (
+    <div>
+      <h1>Dashboard</h1>
+      <p>Protected content would go here.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react-router/conflicting-middleware/app/routes/home.tsx b/tests/fixtures/react-router/conflicting-middleware/app/routes/home.tsx
new file mode 100644
index 00000000..219c3ec9
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/app/routes/home.tsx
@@ -0,0 +1,8 @@
+export default function Home() {
+  return (
+    <div>
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react-router/conflicting-middleware/package.json b/tests/fixtures/react-router/conflicting-middleware/package.json
new file mode 100644
index 00000000..3a152d4e
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/package.json
@@ -0,0 +1,25 @@
+{
+  "name": "react-router-conflicting-middleware-fixture",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "react-router dev",
+    "build": "react-router build",
+    "start": "react-router-serve ./build/server/index.js"
+  },
+  "dependencies": {
+    "@react-router/node": "^7.1.1",
+    "@react-router/serve": "^7.1.1",
+    "isbot": "^5.1.17",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-router": "^7.1.1"
+  },
+  "devDependencies": {
+    "@react-router/dev": "^7.1.1",
+    "@types/react": "^18.3.12",
+    "@types/react-dom": "^18.3.1",
+    "typescript": "^5.6.2",
+    "vite": "^6.0.5"
+  }
+}
diff --git a/tests/fixtures/react-router/conflicting-middleware/react-router.config.ts b/tests/fixtures/react-router/conflicting-middleware/react-router.config.ts
new file mode 100644
index 00000000..51e89677
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/react-router.config.ts
@@ -0,0 +1,5 @@
+import type { Config } from '@react-router/dev/config';
+
+export default {
+  ssr: true,
+} satisfies Config;
diff --git a/tests/fixtures/react-router/conflicting-middleware/tsconfig.json b/tests/fixtures/react-router/conflicting-middleware/tsconfig.json
new file mode 100644
index 00000000..6c11bd90
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/tsconfig.json
@@ -0,0 +1,17 @@
+{
+  "compilerOptions": {
+    "lib": ["DOM", "DOM.Iterable", "ES2022"],
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "jsx": "react-jsx",
+    "strict": true,
+    "skipLibCheck": true,
+    "noEmit": true,
+    "isolatedModules": true,
+    "paths": {
+      "~/*": ["./app/*"]
+    }
+  },
+  "include": ["app/**/*", "*.ts"]
+}
diff --git a/tests/fixtures/react-router/conflicting-middleware/vite.config.ts b/tests/fixtures/react-router/conflicting-middleware/vite.config.ts
new file mode 100644
index 00000000..aa78decb
--- /dev/null
+++ b/tests/fixtures/react-router/conflicting-middleware/vite.config.ts
@@ -0,0 +1,12 @@
+import { reactRouter } from '@react-router/dev/vite';
+import { defineConfig } from 'vite';
+import { resolve } from 'path';
+
+export default defineConfig({
+  plugins: [reactRouter()],
+  resolve: {
+    alias: {
+      '~': resolve(__dirname, './app'),
+    },
+  },
+});
diff --git a/tests/fixtures/react-router/partial-install/README.md b/tests/fixtures/react-router/partial-install/README.md
new file mode 100644
index 00000000..6b0ee126
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/README.md
@@ -0,0 +1,30 @@
+# React Router - Partial Install Fixture
+
+## Edge Case Description
+
+This fixture represents a React Router project where AuthKit was partially installed - the package is in dependencies but integration was never completed.
+
+## Expected Agent Behavior
+
+- Detect that @workos-inc/authkit-react-router is already installed
+- Complete the integration by:
+  - Adding AuthKitProvider to root.tsx
+  - Creating callback route
+  - Adding middleware if needed
+- Should NOT reinstall the package
+
+## Files of Interest
+
+- `package.json` - Already has @workos-inc/authkit-react-router dependency
+- `app/root.tsx` - Has commented-out import as signal of abandoned attempt
+
+## Success Criteria
+
+- [ ] AuthKitProvider wraps the app
+- [ ] Callback route is created
+- [ ] Build succeeds
+- [ ] Package is not reinstalled
+
+## Notes
+
+Common scenario when developers start integration but don't finish.
diff --git a/tests/fixtures/react-router/partial-install/app/root.tsx b/tests/fixtures/react-router/partial-install/app/root.tsx
new file mode 100644
index 00000000..410c4ac3
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/app/root.tsx
@@ -0,0 +1,29 @@
+import { Links, Meta, NavLink, Outlet, Scripts, ScrollRestoration } from 'react-router';
+// TODO: Complete AuthKit setup
+// import { AuthKitProvider } from '@workos-inc/authkit-react-router';
+
+export function Layout({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <head>
+        <meta charSet="utf-8" />
+        <meta name="viewport" content="width=device-width, initial-scale=1" />
+        <Meta />
+        <Links />
+      </head>
+      <body>
+        <nav>
+          <NavLink to="/">Home</NavLink> | <NavLink to="/about">About</NavLink> |{' '}
+          <NavLink to="/dashboard">Dashboard</NavLink>
+        </nav>
+        {children}
+        <ScrollRestoration />
+        <Scripts />
+      </body>
+    </html>
+  );
+}
+
+export default function Root() {
+  return <Outlet />;
+}
diff --git a/tests/fixtures/react-router/partial-install/app/routes.ts b/tests/fixtures/react-router/partial-install/app/routes.ts
new file mode 100644
index 00000000..3e22bc70
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/app/routes.ts
@@ -0,0 +1,7 @@
+import { type RouteConfig, index, route } from '@react-router/dev/routes';
+
+export default [
+  index('routes/home.tsx'),
+  route('about', 'routes/about.tsx'),
+  route('dashboard', 'routes/dashboard.tsx'),
+] satisfies RouteConfig;
diff --git a/tests/fixtures/react-router/partial-install/app/routes/about.tsx b/tests/fixtures/react-router/partial-install/app/routes/about.tsx
new file mode 100644
index 00000000..31a98f44
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/app/routes/about.tsx
@@ -0,0 +1,8 @@
+export default function About() {
+  return (
+    <div>
+      <h1>About</h1>
+      <p>This is an existing React Router application.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react-router/partial-install/app/routes/dashboard.tsx b/tests/fixtures/react-router/partial-install/app/routes/dashboard.tsx
new file mode 100644
index 00000000..70f9ecef
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/app/routes/dashboard.tsx
@@ -0,0 +1,8 @@
+export default function Dashboard() {
+  return (
+    <div>
+      <h1>Dashboard</h1>
+      <p>Protected content would go here.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react-router/partial-install/app/routes/home.tsx b/tests/fixtures/react-router/partial-install/app/routes/home.tsx
new file mode 100644
index 00000000..219c3ec9
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/app/routes/home.tsx
@@ -0,0 +1,8 @@
+export default function Home() {
+  return (
+    <div>
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react-router/partial-install/package.json b/tests/fixtures/react-router/partial-install/package.json
new file mode 100644
index 00000000..6b776229
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/package.json
@@ -0,0 +1,26 @@
+{
+  "name": "react-router-partial-install-fixture",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "react-router dev",
+    "build": "react-router build",
+    "start": "react-router-serve ./build/server/index.js"
+  },
+  "dependencies": {
+    "@react-router/node": "^7.1.1",
+    "@react-router/serve": "^7.1.1",
+    "@workos-inc/authkit-react-router": "^0.3.0",
+    "isbot": "^5.1.17",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-router": "^7.1.1"
+  },
+  "devDependencies": {
+    "@react-router/dev": "^7.1.1",
+    "@types/react": "^18.3.12",
+    "@types/react-dom": "^18.3.1",
+    "typescript": "^5.6.2",
+    "vite": "^6.0.5"
+  }
+}
diff --git a/tests/fixtures/react-router/partial-install/react-router.config.ts b/tests/fixtures/react-router/partial-install/react-router.config.ts
new file mode 100644
index 00000000..51e89677
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/react-router.config.ts
@@ -0,0 +1,5 @@
+import type { Config } from '@react-router/dev/config';
+
+export default {
+  ssr: true,
+} satisfies Config;
diff --git a/tests/fixtures/react-router/partial-install/tsconfig.json b/tests/fixtures/react-router/partial-install/tsconfig.json
new file mode 100644
index 00000000..6c11bd90
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/tsconfig.json
@@ -0,0 +1,17 @@
+{
+  "compilerOptions": {
+    "lib": ["DOM", "DOM.Iterable", "ES2022"],
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "jsx": "react-jsx",
+    "strict": true,
+    "skipLibCheck": true,
+    "noEmit": true,
+    "isolatedModules": true,
+    "paths": {
+      "~/*": ["./app/*"]
+    }
+  },
+  "include": ["app/**/*", "*.ts"]
+}
diff --git a/tests/fixtures/react-router/partial-install/vite.config.ts b/tests/fixtures/react-router/partial-install/vite.config.ts
new file mode 100644
index 00000000..aa78decb
--- /dev/null
+++ b/tests/fixtures/react-router/partial-install/vite.config.ts
@@ -0,0 +1,12 @@
+import { reactRouter } from '@react-router/dev/vite';
+import { defineConfig } from 'vite';
+import { resolve } from 'path';
+
+export default defineConfig({
+  plugins: [reactRouter()],
+  resolve: {
+    alias: {
+      '~': resolve(__dirname, './app'),
+    },
+  },
+});
diff --git a/tests/fixtures/react-router/typescript-strict/README.md b/tests/fixtures/react-router/typescript-strict/README.md
new file mode 100644
index 00000000..76223aa7
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/README.md
@@ -0,0 +1,27 @@
+# React Router - TypeScript Strict Fixture
+
+## Edge Case Description
+
+This fixture has the strictest TypeScript configuration. Tests whether agent generates fully type-safe code.
+
+## Expected Agent Behavior
+
+- Generate code with explicit return types
+- Use proper type annotations
+- Handle null/undefined properly
+- Not introduce unused variables
+
+## Files of Interest
+
+- `tsconfig.json` - Has all strict flags
+- All `.tsx` files - Have explicit return types
+
+## Success Criteria
+
+- [ ] `pnpm build` passes with zero type errors
+- [ ] Generated code has proper types
+- [ ] No implicit any errors
+
+## Notes
+
+Critical for enterprise React Router apps with strict TypeScript.
diff --git a/tests/fixtures/react-router/typescript-strict/app/root.tsx b/tests/fixtures/react-router/typescript-strict/app/root.tsx
new file mode 100644
index 00000000..5bbd0aee
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/app/root.tsx
@@ -0,0 +1,32 @@
+import { Links, Meta, NavLink, Outlet, Scripts, ScrollRestoration } from 'react-router';
+import type { ReactNode, JSX } from 'react';
+
+interface LayoutProps {
+  children: ReactNode;
+}
+
+export function Layout({ children }: LayoutProps): JSX.Element {
+  return (
+    <html lang="en">
+      <head>
+        <meta charSet="utf-8" />
+        <meta name="viewport" content="width=device-width, initial-scale=1" />
+        <Meta />
+        <Links />
+      </head>
+      <body>
+        <nav>
+          <NavLink to="/">Home</NavLink> | <NavLink to="/about">About</NavLink> |{' '}
+          <NavLink to="/dashboard">Dashboard</NavLink>
+        </nav>
+        {children}
+        <ScrollRestoration />
+        <Scripts />
+      </body>
+    </html>
+  );
+}
+
+export default function Root(): JSX.Element {
+  return <Outlet />;
+}
diff --git a/tests/fixtures/react-router/typescript-strict/app/routes.ts b/tests/fixtures/react-router/typescript-strict/app/routes.ts
new file mode 100644
index 00000000..3e22bc70
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/app/routes.ts
@@ -0,0 +1,7 @@
+import { type RouteConfig, index, route } from '@react-router/dev/routes';
+
+export default [
+  index('routes/home.tsx'),
+  route('about', 'routes/about.tsx'),
+  route('dashboard', 'routes/dashboard.tsx'),
+] satisfies RouteConfig;
diff --git a/tests/fixtures/react-router/typescript-strict/app/routes/about.tsx b/tests/fixtures/react-router/typescript-strict/app/routes/about.tsx
new file mode 100644
index 00000000..84f2cf2a
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/app/routes/about.tsx
@@ -0,0 +1,10 @@
+import type { JSX } from 'react';
+
+export default function About(): JSX.Element {
+  return (
+    <div>
+      <h1>About</h1>
+      <p>This is an existing React Router application with strict TypeScript.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react-router/typescript-strict/app/routes/dashboard.tsx b/tests/fixtures/react-router/typescript-strict/app/routes/dashboard.tsx
new file mode 100644
index 00000000..846e5a8e
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/app/routes/dashboard.tsx
@@ -0,0 +1,10 @@
+import type { JSX } from 'react';
+
+export default function Dashboard(): JSX.Element {
+  return (
+    <div>
+      <h1>Dashboard</h1>
+      <p>Protected content would go here.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react-router/typescript-strict/app/routes/home.tsx b/tests/fixtures/react-router/typescript-strict/app/routes/home.tsx
new file mode 100644
index 00000000..c93e9e4b
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/app/routes/home.tsx
@@ -0,0 +1,10 @@
+import type { JSX } from 'react';
+
+export default function Home(): JSX.Element {
+  return (
+    <div>
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react-router/typescript-strict/package.json b/tests/fixtures/react-router/typescript-strict/package.json
new file mode 100644
index 00000000..8fe499bf
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/package.json
@@ -0,0 +1,25 @@
+{
+  "name": "react-router-typescript-strict-fixture",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "react-router dev",
+    "build": "react-router build",
+    "start": "react-router-serve ./build/server/index.js"
+  },
+  "dependencies": {
+    "@react-router/node": "^7.1.1",
+    "@react-router/serve": "^7.1.1",
+    "isbot": "^5.1.17",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-router": "^7.1.1"
+  },
+  "devDependencies": {
+    "@react-router/dev": "^7.1.1",
+    "@types/react": "^18.3.12",
+    "@types/react-dom": "^18.3.1",
+    "typescript": "^5.6.2",
+    "vite": "^6.0.5"
+  }
+}
diff --git a/tests/fixtures/react-router/typescript-strict/react-router.config.ts b/tests/fixtures/react-router/typescript-strict/react-router.config.ts
new file mode 100644
index 00000000..51e89677
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/react-router.config.ts
@@ -0,0 +1,5 @@
+import type { Config } from '@react-router/dev/config';
+
+export default {
+  ssr: true,
+} satisfies Config;
diff --git a/tests/fixtures/react-router/typescript-strict/tsconfig.json b/tests/fixtures/react-router/typescript-strict/tsconfig.json
new file mode 100644
index 00000000..943b5581
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/tsconfig.json
@@ -0,0 +1,28 @@
+{
+  "compilerOptions": {
+    "lib": ["DOM", "DOM.Iterable", "ES2022"],
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "bundler",
+    "jsx": "react-jsx",
+    "strict": true,
+    "noImplicitAny": true,
+    "strictNullChecks": true,
+    "strictFunctionTypes": true,
+    "strictBindCallApply": true,
+    "strictPropertyInitialization": true,
+    "noImplicitThis": true,
+    "noImplicitReturns": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "exactOptionalPropertyTypes": true,
+    "noUncheckedIndexedAccess": true,
+    "skipLibCheck": true,
+    "noEmit": true,
+    "isolatedModules": true,
+    "paths": {
+      "~/*": ["./app/*"]
+    }
+  },
+  "include": ["app/**/*", "*.ts"]
+}
diff --git a/tests/fixtures/react-router/typescript-strict/vite.config.ts b/tests/fixtures/react-router/typescript-strict/vite.config.ts
new file mode 100644
index 00000000..aa78decb
--- /dev/null
+++ b/tests/fixtures/react-router/typescript-strict/vite.config.ts
@@ -0,0 +1,12 @@
+import { reactRouter } from '@react-router/dev/vite';
+import { defineConfig } from 'vite';
+import { resolve } from 'path';
+
+export default defineConfig({
+  plugins: [reactRouter()],
+  resolve: {
+    alias: {
+      '~': resolve(__dirname, './app'),
+    },
+  },
+});
diff --git a/tests/fixtures/react/conflicting-auth/README.md b/tests/fixtures/react/conflicting-auth/README.md
new file mode 100644
index 00000000..24789e29
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/README.md
@@ -0,0 +1,37 @@
+# React SPA - Conflicting Auth Fixture
+
+## Edge Case Description
+
+This fixture has an existing custom AuthProvider with user preferences, roles, and localStorage-based session management. The agent must integrate AuthKit while preserving or migrating existing functionality.
+
+## Expected Agent Behavior
+
+- Detect existing auth implementation in `src/auth/AuthProvider.tsx`
+- Integrate AuthKitProvider while handling:
+  - Existing `useAuth` hook that components depend on
+  - User preferences (theme, notifications)
+  - Role-based access patterns
+- Should NOT simply delete existing auth code without migration plan
+
+## Files of Interest
+
+- `src/auth/AuthProvider.tsx` - Full auth implementation with useAuth hook
+- `src/main.tsx` - Uses AuthProvider to wrap app
+- `src/App.tsx` - Uses useAuth for logout button
+- `src/pages/Dashboard.tsx` - Uses useAuth for protected content
+
+## Success Criteria
+
+- [ ] AuthKit is integrated
+- [ ] Existing useAuth consumers don't break
+- [ ] User preferences pattern is preserved or migrated
+- [ ] Build succeeds
+
+## Notes
+
+This is a realistic scenario - many apps have custom auth before adopting a third-party solution. The agent should recognize this and propose a migration strategy rather than simply replacing.
+
+Ideal approaches:
+1. Wrap existing AuthProvider with AuthKitProvider
+2. Migrate user data from custom auth to AuthKit user profile
+3. Create adapter that maps AuthKit user to existing User type
diff --git a/tests/fixtures/react/conflicting-auth/index.html b/tests/fixtures/react/conflicting-auth/index.html
new file mode 100644
index 00000000..5da656cb
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/index.html
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>React App</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
diff --git a/tests/fixtures/react/conflicting-auth/package.json b/tests/fixtures/react/conflicting-auth/package.json
new file mode 100644
index 00000000..70785f7c
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/package.json
@@ -0,0 +1,23 @@
+{
+  "name": "react-conflicting-auth-fixture",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc -b && vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-router-dom": "^6.28.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.3.12",
+    "@types/react-dom": "^18.3.1",
+    "@vitejs/plugin-react": "^4.3.4",
+    "typescript": "~5.6.2",
+    "vite": "^6.0.5"
+  }
+}
diff --git a/tests/fixtures/react/conflicting-auth/src/App.tsx b/tests/fixtures/react/conflicting-auth/src/App.tsx
new file mode 100644
index 00000000..57e4bde7
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/src/App.tsx
@@ -0,0 +1,31 @@
+import { Routes, Route, Link } from 'react-router-dom';
+import { useAuth } from './auth/AuthProvider';
+import { Home } from './pages/Home';
+import { About } from './pages/About';
+import { Dashboard } from './pages/Dashboard';
+
+function App() {
+  const { isAuthenticated, user, logout } = useAuth();
+
+  return (
+    <div>
+      <nav>
+        <Link to="/">Home</Link> | <Link to="/about">About</Link> | <Link to="/dashboard">Dashboard</Link>
+        {isAuthenticated && (
+          <>
+            {' '}
+            | <span>Welcome, {user?.name}</span> |{' '}
+            <button onClick={() => logout()}>Logout</button>
+          </>
+        )}
+      </nav>
+      <Routes>
+        <Route path="/" element={<Home />} />
+        <Route path="/about" element={<About />} />
+        <Route path="/dashboard" element={<Dashboard />} />
+      </Routes>
+    </div>
+  );
+}
+
+export default App;
diff --git a/tests/fixtures/react/conflicting-auth/src/auth/AuthProvider.tsx b/tests/fixtures/react/conflicting-auth/src/auth/AuthProvider.tsx
new file mode 100644
index 00000000..2de8e79f
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/src/auth/AuthProvider.tsx
@@ -0,0 +1,123 @@
+import { createContext, useContext, useState, useCallback, useEffect, type ReactNode } from 'react';
+
+interface User {
+  id: string;
+  email: string;
+  name: string;
+  role: 'admin' | 'user' | 'guest';
+  preferences: {
+    theme: 'light' | 'dark';
+    notifications: boolean;
+  };
+}
+
+interface Credentials {
+  email: string;
+  password: string;
+}
+
+interface AuthContextValue {
+  user: User | null;
+  isLoading: boolean;
+  isAuthenticated: boolean;
+  login: (credentials: Credentials) => Promise<void>;
+  logout: () => Promise<void>;
+  updatePreferences: (prefs: Partial<User['preferences']>) => Promise<void>;
+}
+
+const AuthContext = createContext<AuthContextValue | null>(null);
+
+interface AuthProviderProps {
+  children: ReactNode;
+}
+
+export function AuthProvider({ children }: AuthProviderProps) {
+  const [user, setUser] = useState<User | null>(null);
+  const [isLoading, setIsLoading] = useState(true);
+
+  // Check for existing session on mount
+  useEffect(() => {
+    const checkSession = async () => {
+      try {
+        const stored = localStorage.getItem('auth_user');
+        if (stored) {
+          setUser(JSON.parse(stored));
+        }
+      } finally {
+        setIsLoading(false);
+      }
+    };
+    checkSession();
+  }, []);
+
+  const login = useCallback(async (credentials: Credentials) => {
+    setIsLoading(true);
+    try {
+      // Simulated API call - in real app this would hit your backend
+      await new Promise((resolve) => setTimeout(resolve, 500));
+
+      const mockUser: User = {
+        id: '123',
+        email: credentials.email,
+        name: credentials.email.split('@')[0] ?? 'User',
+        role: 'user',
+        preferences: {
+          theme: 'light',
+          notifications: true,
+        },
+      };
+
+      localStorage.setItem('auth_user', JSON.stringify(mockUser));
+      setUser(mockUser);
+    } finally {
+      setIsLoading(false);
+    }
+  }, []);
+
+  const logout = useCallback(async () => {
+    setIsLoading(true);
+    try {
+      localStorage.removeItem('auth_user');
+      setUser(null);
+    } finally {
+      setIsLoading(false);
+    }
+  }, []);
+
+  const updatePreferences = useCallback(
+    async (prefs: Partial<User['preferences']>) => {
+      if (!user) return;
+
+      const updatedUser = {
+        ...user,
+        preferences: { ...user.preferences, ...prefs },
+      };
+      localStorage.setItem('auth_user', JSON.stringify(updatedUser));
+      setUser(updatedUser);
+    },
+    [user],
+  );
+
+  return (
+    <AuthContext.Provider
+      value={{
+        user,
+        isLoading,
+        isAuthenticated: !!user,
+        login,
+        logout,
+        updatePreferences,
+      }}
+    >
+      {children}
+    </AuthContext.Provider>
+  );
+}
+
+export function useAuth(): AuthContextValue {
+  const context = useContext(AuthContext);
+  if (!context) {
+    throw new Error('useAuth must be used within an AuthProvider');
+  }
+  return context;
+}
diff --git a/tests/fixtures/react/conflicting-auth/src/main.tsx b/tests/fixtures/react/conflicting-auth/src/main.tsx
new file mode 100644
index 00000000..a7541f0d
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/src/main.tsx
@@ -0,0 +1,15 @@
+import { StrictMode } from 'react';
+import { createRoot } from 'react-dom/client';
+import { BrowserRouter } from 'react-router-dom';
+import { AuthProvider } from './auth/AuthProvider';
+import App from './App.tsx';
+
+createRoot(document.getElementById('root')!).render(
+  <StrictMode>
+    <BrowserRouter>
+      <AuthProvider>
+        <App />
+      </AuthProvider>
+    </BrowserRouter>
+  </StrictMode>,
+);
diff --git a/tests/fixtures/react/conflicting-auth/src/pages/About.tsx b/tests/fixtures/react/conflicting-auth/src/pages/About.tsx
new file mode 100644
index 00000000..20ddb997
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/src/pages/About.tsx
@@ -0,0 +1,8 @@
+export function About() {
+  return (
+    <div>
+      <h1>About</h1>
+      <p>This is an existing React application with custom auth.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react/conflicting-auth/src/pages/Dashboard.tsx b/tests/fixtures/react/conflicting-auth/src/pages/Dashboard.tsx
new file mode 100644
index 00000000..ad52c245
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/src/pages/Dashboard.tsx
@@ -0,0 +1,27 @@
+import { useAuth } from '../auth/AuthProvider';
+
+export function Dashboard() {
+  const { isAuthenticated, user, isLoading } = useAuth();
+
+  if (isLoading) {
+    return <div>Loading...</div>;
+  }
+
+  if (!isAuthenticated) {
+    return (
+      <div>
+        <h1>Dashboard</h1>
+        <p>Please log in to view the dashboard.</p>
+      </div>
+    );
+  }
+
+  return (
+    <div>
+      <h1>Dashboard</h1>
+      <p>Welcome back, {user?.name}!</p>
+      <p>Role: {user?.role}</p>
+      <p>Theme: {user?.preferences.theme}</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react/conflicting-auth/src/pages/Home.tsx b/tests/fixtures/react/conflicting-auth/src/pages/Home.tsx
new file mode 100644
index 00000000..ed07ba17
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/src/pages/Home.tsx
@@ -0,0 +1,8 @@
+export function Home() {
+  return (
+    <div>
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react/conflicting-auth/src/vite-env.d.ts b/tests/fixtures/react/conflicting-auth/src/vite-env.d.ts
new file mode 100644
index 00000000..11f02fe2
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/src/vite-env.d.ts
@@ -0,0 +1 @@
+/// <reference types="vite/client" />
diff --git a/tests/fixtures/react/conflicting-auth/tsconfig.json b/tests/fixtures/react/conflicting-auth/tsconfig.json
new file mode 100644
index 00000000..d5419222
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/tsconfig.json
@@ -0,0 +1,23 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["ES2020", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "isolatedModules": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true,
+    "paths": {
+      "@/*": ["./src/*"]
+    }
+  },
+  "include": ["src"]
+}
diff --git a/tests/fixtures/react/conflicting-auth/vite.config.ts b/tests/fixtures/react/conflicting-auth/vite.config.ts
new file mode 100644
index 00000000..d192dba1
--- /dev/null
+++ b/tests/fixtures/react/conflicting-auth/vite.config.ts
@@ -0,0 +1,12 @@
+import { defineConfig } from 'vite';
+import react from '@vitejs/plugin-react';
+import { resolve } from 'path';
+
+export default defineConfig({
+  plugins: [react()],
+  resolve: {
+    alias: {
+      '@': resolve(__dirname, './src'),
+    },
+  },
+});
diff --git a/tests/fixtures/react/partial-install/README.md b/tests/fixtures/react/partial-install/README.md
new file mode 100644
index 00000000..94f8c125
--- /dev/null
+++ b/tests/fixtures/react/partial-install/README.md
@@ -0,0 +1,29 @@
+# React SPA - Partial Install Fixture
+
+## Edge Case Description
+
+This fixture represents a React SPA where AuthKit was partially installed - the package is in dependencies but integration was never completed.
+
+## Expected Agent Behavior
+
+- Detect that @workos-inc/authkit-react is already installed
+- Complete the integration by:
+  - Adding AuthKitProvider to main.tsx
+  - Creating callback route
+- Should NOT reinstall the package
+
+## Files of Interest
+
+- `package.json` - Already has @workos-inc/authkit-react dependency
+- `src/main.tsx` - Has commented-out import as signal of abandoned attempt
+
+## Success Criteria
+
+- [ ] AuthKitProvider wraps the app in main.tsx
+- [ ] Callback route is configured
+- [ ] Build succeeds with no type errors
+- [ ] Package is not reinstalled
+
+## Notes
+
+Common scenario when developers start integration but don't finish.
diff --git a/tests/fixtures/react/partial-install/index.html b/tests/fixtures/react/partial-install/index.html
new file mode 100644
index 00000000..5da656cb
--- /dev/null
+++ b/tests/fixtures/react/partial-install/index.html
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>React App</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
diff --git a/tests/fixtures/react/partial-install/package.json b/tests/fixtures/react/partial-install/package.json
new file mode 100644
index 00000000..ed132ca0
--- /dev/null
+++ b/tests/fixtures/react/partial-install/package.json
@@ -0,0 +1,24 @@
+{
+  "name": "react-partial-install-fixture",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc -b && vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@workos-inc/authkit-react": "^0.5.0",
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-router-dom": "^6.28.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.3.12",
+    "@types/react-dom": "^18.3.1",
+    "@vitejs/plugin-react": "^4.3.4",
+    "typescript": "~5.6.2",
+    "vite": "^6.0.5"
+  }
+}
diff --git a/tests/fixtures/react/partial-install/src/App.tsx b/tests/fixtures/react/partial-install/src/App.tsx
new file mode 100644
index 00000000..97221e77
--- /dev/null
+++ b/tests/fixtures/react/partial-install/src/App.tsx
@@ -0,0 +1,21 @@
+import { Routes, Route, Link } from 'react-router-dom';
+import { Home } from './pages/Home';
+import { About } from './pages/About';
+import { Dashboard } from './pages/Dashboard';
+
+function App() {
+  return (
+    <div>
+      <nav>
+        <Link to="/">Home</Link> | <Link to="/about">About</Link> | <Link to="/dashboard">Dashboard</Link>
+      </nav>
+      <Routes>
+        <Route path="/" element={<Home />} />
+        <Route path="/about" element={<About />} />
+        <Route path="/dashboard" element={<Dashboard />} />
+      </Routes>
+    </div>
+  );
+}
+
+export default App;
diff --git a/tests/fixtures/react/partial-install/src/main.tsx b/tests/fixtures/react/partial-install/src/main.tsx
new file mode 100644
index 00000000..ff2401bf
--- /dev/null
+++ b/tests/fixtures/react/partial-install/src/main.tsx
@@ -0,0 +1,14 @@
+import { StrictMode } from 'react';
+import { createRoot } from 'react-dom/client';
+import { BrowserRouter } from 'react-router-dom';
+// TODO: Add AuthKitProvider
+// import { AuthKitProvider } from '@workos-inc/authkit-react';
+import App from './App.tsx';
+
+createRoot(document.getElementById('root')!).render(
+  <StrictMode>
+    <BrowserRouter>
+      <App />
+    </BrowserRouter>
+  </StrictMode>,
+);
diff --git a/tests/fixtures/react/partial-install/src/pages/About.tsx b/tests/fixtures/react/partial-install/src/pages/About.tsx
new file mode 100644
index 00000000..9c57a60f
--- /dev/null
+++ b/tests/fixtures/react/partial-install/src/pages/About.tsx
@@ -0,0 +1,8 @@
+export function About() {
+  return (
+    <div>
+      <h1>About</h1>
+      <p>This is an existing React application.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react/partial-install/src/pages/Dashboard.tsx b/tests/fixtures/react/partial-install/src/pages/Dashboard.tsx
new file mode 100644
index 00000000..42409fdc
--- /dev/null
+++ b/tests/fixtures/react/partial-install/src/pages/Dashboard.tsx
@@ -0,0 +1,8 @@
+export function Dashboard() {
+  return (
+    <div>
+      <h1>Dashboard</h1>
+      <p>Protected content would go here.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react/partial-install/src/pages/Home.tsx b/tests/fixtures/react/partial-install/src/pages/Home.tsx
new file mode 100644
index 00000000..ed07ba17
--- /dev/null
+++ b/tests/fixtures/react/partial-install/src/pages/Home.tsx
@@ -0,0 +1,8 @@
+export function Home() {
+  return (
+    <div>
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react/partial-install/src/vite-env.d.ts b/tests/fixtures/react/partial-install/src/vite-env.d.ts
new file mode 100644
index 00000000..11f02fe2
--- /dev/null
+++ b/tests/fixtures/react/partial-install/src/vite-env.d.ts
@@ -0,0 +1 @@
+/// <reference types="vite/client" />
diff --git a/tests/fixtures/react/partial-install/tsconfig.json b/tests/fixtures/react/partial-install/tsconfig.json
new file mode 100644
index 00000000..d5419222
--- /dev/null
+++ b/tests/fixtures/react/partial-install/tsconfig.json
@@ -0,0 +1,23 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["ES2020", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "isolatedModules": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true,
+    "paths": {
+      "@/*": ["./src/*"]
+    }
+  },
+  "include": ["src"]
+}
diff --git a/tests/fixtures/react/partial-install/vite.config.ts b/tests/fixtures/react/partial-install/vite.config.ts
new file mode 100644
index 00000000..d192dba1
--- /dev/null
+++ b/tests/fixtures/react/partial-install/vite.config.ts
@@ -0,0 +1,12 @@
+import { defineConfig } from 'vite';
+import react from '@vitejs/plugin-react';
+import { resolve } from 'path';
+
+export default defineConfig({
+  plugins: [react()],
+  resolve: {
+    alias: {
+      '@': resolve(__dirname, './src'),
+    },
+  },
+});
diff --git a/tests/fixtures/react/typescript-strict/README.md b/tests/fixtures/react/typescript-strict/README.md
new file mode 100644
index 00000000..ca16ce54
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/README.md
@@ -0,0 +1,28 @@
+# React SPA - TypeScript Strict Fixture
+
+## Edge Case Description
+
+This fixture has the strictest TypeScript configuration. It tests whether the agent generates fully type-safe code.
+
+## Expected Agent Behavior
+
+- Generate code with explicit return types
+- Use proper type annotations
+- Handle null/undefined properly
+- Not introduce unused variables
+
+## Files of Interest
+
+- `tsconfig.json` - Has all strict flags including exactOptionalPropertyTypes and noUncheckedIndexedAccess
+- All `.tsx` files - Have explicit return types
+
+## Success Criteria
+
+- [ ] `pnpm build` passes with zero type errors
+- [ ] Generated code has proper types
+- [ ] No implicit any errors
+- [ ] No unused variable errors
+
+## Notes
+
+Critical for enterprise React apps with strict TypeScript.
diff --git a/tests/fixtures/react/typescript-strict/index.html b/tests/fixtures/react/typescript-strict/index.html
new file mode 100644
index 00000000..5da656cb
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/index.html
@@ -0,0 +1,12 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>React App</title>
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.tsx"></script>
+  </body>
+</html>
diff --git a/tests/fixtures/react/typescript-strict/package.json b/tests/fixtures/react/typescript-strict/package.json
new file mode 100644
index 00000000..864f987e
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/package.json
@@ -0,0 +1,23 @@
+{
+  "name": "react-typescript-strict-fixture",
+  "private": true,
+  "version": "0.0.0",
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "tsc -b && vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "react": "^18.3.1",
+    "react-dom": "^18.3.1",
+    "react-router-dom": "^6.28.0"
+  },
+  "devDependencies": {
+    "@types/react": "^18.3.12",
+    "@types/react-dom": "^18.3.1",
+    "@vitejs/plugin-react": "^4.3.4",
+    "typescript": "~5.6.2",
+    "vite": "^6.0.5"
+  }
+}
diff --git a/tests/fixtures/react/typescript-strict/src/App.tsx b/tests/fixtures/react/typescript-strict/src/App.tsx
new file mode 100644
index 00000000..a6fc1fc5
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/src/App.tsx
@@ -0,0 +1,22 @@
+import { Routes, Route, Link } from 'react-router-dom';
+import type { JSX } from 'react';
+import { Home } from './pages/Home';
+import { About } from './pages/About';
+import { Dashboard } from './pages/Dashboard';
+
+function App(): JSX.Element {
+  return (
+    <div>
+      <nav>
+        <Link to="/">Home</Link> | <Link to="/about">About</Link> | <Link to="/dashboard">Dashboard</Link>
+      </nav>
+      <Routes>
+        <Route path="/" element={<Home />} />
+        <Route path="/about" element={<About />} />
+        <Route path="/dashboard" element={<Dashboard />} />
+      </Routes>
+    </div>
+  );
+}
+
+export default App;
diff --git a/tests/fixtures/react/typescript-strict/src/main.tsx b/tests/fixtures/react/typescript-strict/src/main.tsx
new file mode 100644
index 00000000..954c6f0c
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/src/main.tsx
@@ -0,0 +1,17 @@
+import { StrictMode } from 'react';
+import { createRoot } from 'react-dom/client';
+import { BrowserRouter } from 'react-router-dom';
+import App from './App.tsx';
+
+const rootElement = document.getElementById('root');
+if (!rootElement) {
+  throw new Error('Root element not found');
+}
+
+createRoot(rootElement).render(
+  <StrictMode>
+    <BrowserRouter>
+      <App />
+    </BrowserRouter>
+  </StrictMode>,
+);
diff --git a/tests/fixtures/react/typescript-strict/src/pages/About.tsx b/tests/fixtures/react/typescript-strict/src/pages/About.tsx
new file mode 100644
index 00000000..bf07559f
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/src/pages/About.tsx
@@ -0,0 +1,10 @@
+import type { JSX } from 'react';
+
+export function About(): JSX.Element {
+  return (
+    <div>
+      <h1>About</h1>
+      <p>This is an existing React application with strict TypeScript.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react/typescript-strict/src/pages/Dashboard.tsx b/tests/fixtures/react/typescript-strict/src/pages/Dashboard.tsx
new file mode 100644
index 00000000..27ffcdb2
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/src/pages/Dashboard.tsx
@@ -0,0 +1,10 @@
+import type { JSX } from 'react';
+
+export function Dashboard(): JSX.Element {
+  return (
+    <div>
+      <h1>Dashboard</h1>
+      <p>Protected content would go here.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react/typescript-strict/src/pages/Home.tsx b/tests/fixtures/react/typescript-strict/src/pages/Home.tsx
new file mode 100644
index 00000000..a1129ee5
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/src/pages/Home.tsx
@@ -0,0 +1,10 @@
+import type { JSX } from 'react';
+
+export function Home(): JSX.Element {
+  return (
+    <div>
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </div>
+  );
+}
diff --git a/tests/fixtures/react/typescript-strict/src/vite-env.d.ts b/tests/fixtures/react/typescript-strict/src/vite-env.d.ts
new file mode 100644
index 00000000..11f02fe2
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/src/vite-env.d.ts
@@ -0,0 +1 @@
+/// <reference types="vite/client" />
diff --git a/tests/fixtures/react/typescript-strict/tsconfig.json b/tests/fixtures/react/typescript-strict/tsconfig.json
new file mode 100644
index 00000000..9058ee71
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/tsconfig.json
@@ -0,0 +1,32 @@
+{
+  "compilerOptions": {
+    "target": "ES2020",
+    "useDefineForClassFields": true,
+    "lib": ["ES2020", "DOM", "DOM.Iterable"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "isolatedModules": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+    "strict": true,
+    "noImplicitAny": true,
+    "strictNullChecks": true,
+    "strictFunctionTypes": true,
+    "strictBindCallApply": true,
+    "strictPropertyInitialization": true,
+    "noImplicitThis": true,
+    "noImplicitReturns": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "exactOptionalPropertyTypes": true,
+    "noUncheckedIndexedAccess": true,
+    "noFallthroughCasesInSwitch": true,
+    "paths": {
+      "@/*": ["./src/*"]
+    }
+  },
+  "include": ["src"]
+}
diff --git a/tests/fixtures/react/typescript-strict/vite.config.ts b/tests/fixtures/react/typescript-strict/vite.config.ts
new file mode 100644
index 00000000..d192dba1
--- /dev/null
+++ b/tests/fixtures/react/typescript-strict/vite.config.ts
@@ -0,0 +1,12 @@
+import { defineConfig } from 'vite';
+import react from '@vitejs/plugin-react';
+import { resolve } from 'path';
+
+export default defineConfig({
+  plugins: [react()],
+  resolve: {
+    alias: {
+      '@': resolve(__dirname, './src'),
+    },
+  },
+});
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/README.md b/tests/fixtures/tanstack-start/conflicting-middleware/README.md
new file mode 100644
index 00000000..aa5e5b54
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/README.md
@@ -0,0 +1,32 @@
+# TanStack Start - Conflicting Middleware Fixture
+
+## Edge Case Description
+
+This fixture has existing server middleware using TanStack Start's server functions for rate limiting, request logging, and security headers. The agent must integrate AuthKit while preserving this custom middleware.
+
+## Expected Agent Behavior
+
+- Detect existing middleware in `src/middleware.server.ts`
+- Integrate AuthKit while PRESERVING:
+  - Rate limiting logic via server functions
+  - Request logging
+  - Security headers
+  - Existing route loaders
+- Compose AuthKit with existing server functions
+
+## Files of Interest
+
+- `src/middleware.server.ts` - Custom server functions for middleware
+- `src/routes/dashboard.tsx` - Uses middleware in loader
+
+## Success Criteria
+
+- [ ] AuthKit is integrated
+- [ ] Rate limiting still works
+- [ ] Security headers still added
+- [ ] Request logging still works
+- [ ] Build succeeds
+
+## Notes
+
+TanStack Start uses server functions for server-side logic. The agent should compose AuthKit session management with existing patterns.
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/package.json b/tests/fixtures/tanstack-start/conflicting-middleware/package.json
new file mode 100644
index 00000000..fdd08c28
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/package.json
@@ -0,0 +1,25 @@
+{
+  "name": "tanstack-start-conflicting-middleware-fixture",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite dev --port 3000",
+    "build": "vite build",
+    "start": "vite preview"
+  },
+  "dependencies": {
+    "@tanstack/react-router": "latest",
+    "@tanstack/react-start": "latest",
+    "@tanstack/router-plugin": "latest",
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
+    "vite-tsconfig-paths": "^6.0.0"
+  },
+  "devDependencies": {
+    "@types/react": "^19.0.0",
+    "@types/react-dom": "^19.0.0",
+    "@vitejs/plugin-react": "^5.0.0",
+    "typescript": "^5.7.0",
+    "vite": "^7.0.0"
+  }
+}
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/src/middleware.server.ts b/tests/fixtures/tanstack-start/conflicting-middleware/src/middleware.server.ts
new file mode 100644
index 00000000..7ef0893c
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/src/middleware.server.ts
@@ -0,0 +1,75 @@
+// Custom server middleware for TanStack Start
+// This file contains middleware logic that should be preserved when adding AuthKit
+
+import { createServerFn } from '@tanstack/react-start';
+
+interface RateLimitRecord {
+  count: number;
+  resetTime: number;
+}
+
+const rateLimitStore = new Map<string, RateLimitRecord>();
+const RATE_LIMIT = 100;
+const WINDOW_MS = 60 * 1000;
+
+export function checkRateLimit(ip: string): { allowed: boolean; remaining: number } {
+  const now = Date.now();
+  const record = rateLimitStore.get(ip);
+
+  if (!record || now > record.resetTime) {
+    rateLimitStore.set(ip, { count: 1, resetTime: now + WINDOW_MS });
+    return { allowed: true, remaining: RATE_LIMIT - 1 };
+  }
+
+  if (record.count >= RATE_LIMIT) {
+    return { allowed: false, remaining: 0 };
+  }
+
+  record.count++;
+  return { allowed: true, remaining: RATE_LIMIT - record.count };
+}
+
+export function logRequest(method: string, path: string, ip: string): void {
+  console.log(`[${new Date().toISOString()}] ${method} ${path} from ${ip}`);
+}
+
+export interface SecurityContext {
+  ip: string;
+  userAgent: string;
+  timestamp: number;
+}
+
+export const getSecurityContext = createServerFn({ method: 'GET' }).handler(async (): Promise<SecurityContext> => {
+  // In a real app, this would get request headers from the server context
+  return {
+    ip: 'server-rendered',
+    userAgent: 'server',
+    timestamp: Date.now(),
+  };
+});
+
+export const validateRequest = createServerFn({ method: 'POST' })
+  .validator((data: { ip: string; path: string }) => data)
+  .handler(async ({ data }): Promise<{ valid: boolean; error?: string }> => {
+    const { ip, path } = data;
+
+    // Log the request
+    logRequest('POST', path, ip);
+
+    // Check rate limit
+    const { allowed } = checkRateLimit(ip);
+    if (!allowed) {
+      return { valid: false, error: 'Rate limit exceeded' };
+    }
+
+    return { valid: true };
+  });
+
+export const getServerHeaders = (): Record<string, string> => {
+  return {
+    'X-App-Version': '1.0.0',
+    'X-Content-Type-Options': 'nosniff',
+    'X-Frame-Options': 'DENY',
+    'X-XSS-Protection': '1; mode=block',
+  };
+};
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/src/routeTree.gen.ts b/tests/fixtures/tanstack-start/conflicting-middleware/src/routeTree.gen.ts
new file mode 100644
index 00000000..fba95d05
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/src/routeTree.gen.ts
@@ -0,0 +1,84 @@
+/* eslint-disable */
+
+// @ts-nocheck
+
+// noinspection JSUnusedGlobalSymbols
+
+// This file was automatically generated by TanStack Router.
+// You should NOT make any changes in this file as it will be overwritten.
+// Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.
+
+import { Route as rootRouteImport } from './routes/__root';
+import { Route as DashboardRouteImport } from './routes/dashboard';
+import { Route as IndexRouteImport } from './routes/index';
+
+const DashboardRoute = DashboardRouteImport.update({
+  id: '/dashboard',
+  path: '/dashboard',
+  getParentRoute: () => rootRouteImport,
+} as any);
+const IndexRoute = IndexRouteImport.update({
+  id: '/',
+  path: '/',
+  getParentRoute: () => rootRouteImport,
+} as any);
+
+export interface FileRoutesByFullPath {
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
+}
+export interface FileRoutesByTo {
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
+}
+export interface FileRoutesById {
+  __root__: typeof rootRouteImport;
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
+}
+export interface FileRouteTypes {
+  fileRoutesByFullPath: FileRoutesByFullPath;
+  fullPaths: '/' | '/dashboard';
+  fileRoutesByTo: FileRoutesByTo;
+  to: '/' | '/dashboard';
+  id: '__root__' | '/' | '/dashboard';
+  fileRoutesById: FileRoutesById;
+}
+export interface RootRouteChildren {
+  IndexRoute: typeof IndexRoute;
+  DashboardRoute: typeof DashboardRoute;
+}
+
+declare module '@tanstack/react-router' {
+  interface FileRoutesByPath {
+    '/dashboard': {
+      id: '/dashboard';
+      path: '/dashboard';
+      fullPath: '/dashboard';
+      preLoaderRoute: typeof DashboardRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
+    '/': {
+      id: '/';
+      path: '/';
+      fullPath: '/';
+      preLoaderRoute: typeof IndexRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
+  }
+}
+
+const rootRouteChildren: RootRouteChildren = {
+  IndexRoute: IndexRoute,
+  DashboardRoute: DashboardRoute,
+};
+export const routeTree = rootRouteImport._addFileChildren(rootRouteChildren)._addFileTypes<FileRouteTypes>();
+
+import type { getRouter } from './router.tsx';
+import type { createStart } from '@tanstack/react-start';
+declare module '@tanstack/react-start' {
+  interface Register {
+    ssr: true;
+    router: Awaited<ReturnType<typeof getRouter>>;
+  }
+}
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/src/router.tsx b/tests/fixtures/tanstack-start/conflicting-middleware/src/router.tsx
new file mode 100644
index 00000000..4df65543
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/src/router.tsx
@@ -0,0 +1,13 @@
+import { createRouter } from '@tanstack/react-router';
+import { routeTree } from './routeTree.gen';
+
+export const getRouter = () => {
+  const router = createRouter({
+    routeTree,
+    context: {},
+    scrollRestoration: true,
+    defaultPreloadStaleTime: 0,
+  });
+
+  return router;
+};
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/src/routes/__root.tsx b/tests/fixtures/tanstack-start/conflicting-middleware/src/routes/__root.tsx
new file mode 100644
index 00000000..22319ff9
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/src/routes/__root.tsx
@@ -0,0 +1,28 @@
+import { HeadContent, Scripts, createRootRoute } from '@tanstack/react-router';
+import appCss from '../styles.css?url';
+
+export const Route = createRootRoute({
+  head: () => ({
+    meta: [
+      { charSet: 'utf-8' },
+      { name: 'viewport', content: 'width=device-width, initial-scale=1' },
+      { title: 'TanStack Start App' },
+    ],
+    links: [{ rel: 'stylesheet', href: appCss }],
+  }),
+  shellComponent: RootDocument,
+});
+
+function RootDocument({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <head>
+        <HeadContent />
+      </head>
+      <body>
+        {children}
+        <Scripts />
+      </body>
+    </html>
+  );
+}
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/src/routes/dashboard.tsx b/tests/fixtures/tanstack-start/conflicting-middleware/src/routes/dashboard.tsx
new file mode 100644
index 00000000..659e2008
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/src/routes/dashboard.tsx
@@ -0,0 +1,51 @@
+import { createFileRoute } from '@tanstack/react-router';
+import { getSecurityContext, validateRequest } from '../middleware.server';
+
+export const Route = createFileRoute('/dashboard')({
+  loader: async () => {
+    // Get security context from server
+    const securityContext = await getSecurityContext();
+
+    // Validate the request through our middleware
+    const validation = await validateRequest({
+      data: {
+        ip: securityContext.ip,
+        path: '/dashboard',
+      },
+    });
+
+    if (!validation.valid) {
+      throw new Error(validation.error ?? 'Request validation failed');
+    }
+
+    return {
+      securityContext,
+      stats: {
+        users: 1234,
+        revenue: 12345,
+      },
+    };
+  },
+  component: Dashboard,
+});
+
+function Dashboard() {
+  const { stats } = Route.useLoaderData();
+
+  return (
+    <div className="container">
+      <h1>Dashboard</h1>
+      <p>This is a protected dashboard page.</p>
+      <div className="stats">
+        <div className="stat">
+          <h3>Users</h3>
+          <p>{stats.users.toLocaleString()}</p>
+        </div>
+        <div className="stat">
+          <h3>Revenue</h3>
+          <p>${stats.revenue.toLocaleString()}</p>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/src/routes/index.tsx b/tests/fixtures/tanstack-start/conflicting-middleware/src/routes/index.tsx
new file mode 100644
index 00000000..6d4a097f
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/src/routes/index.tsx
@@ -0,0 +1,17 @@
+import { createFileRoute, Link } from '@tanstack/react-router';
+
+export const Route = createFileRoute('/')({
+  component: Home,
+});
+
+function Home() {
+  return (
+    <div className="container">
+      <h1>Welcome to My App</h1>
+      <p>This is an existing TanStack Start application with custom middleware.</p>
+      <nav>
+        <Link to="/dashboard">Go to Dashboard</Link>
+      </nav>
+    </div>
+  );
+}
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/src/styles.css b/tests/fixtures/tanstack-start/conflicting-middleware/src/styles.css
new file mode 100644
index 00000000..3ac4e274
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/src/styles.css
@@ -0,0 +1,21 @@
+body {
+  margin: 0;
+  font-family:
+    -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans',
+    'Helvetica Neue', sans-serif;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+
+.container {
+  max-width: 800px;
+  margin: 0 auto;
+  padding: 2rem;
+}
+
+code {
+  font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', monospace;
+  background: #f4f4f4;
+  padding: 0.2em 0.4em;
+  border-radius: 3px;
+}
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/tsconfig.json b/tests/fixtures/tanstack-start/conflicting-middleware/tsconfig.json
new file mode 100644
index 00000000..2b33c015
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/tsconfig.json
@@ -0,0 +1,22 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "lib": ["DOM", "DOM.Iterable", "ES2022"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "isolatedModules": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true,
+    "paths": {
+      "~/*": ["./src/*"]
+    }
+  },
+  "include": ["src"]
+}
diff --git a/tests/fixtures/tanstack-start/conflicting-middleware/vite.config.ts b/tests/fixtures/tanstack-start/conflicting-middleware/vite.config.ts
new file mode 100644
index 00000000..edd01ec9
--- /dev/null
+++ b/tests/fixtures/tanstack-start/conflicting-middleware/vite.config.ts
@@ -0,0 +1,14 @@
+import { defineConfig } from 'vite';
+import { tanstackStart } from '@tanstack/react-start/plugin/vite';
+import viteReact from '@vitejs/plugin-react';
+import viteTsConfigPaths from 'vite-tsconfig-paths';
+
+export default defineConfig({
+  plugins: [
+    viteTsConfigPaths({
+      projects: ['./tsconfig.json'],
+    }),
+    tanstackStart(),
+    viteReact(),
+  ],
+});
diff --git a/tests/fixtures/tanstack-start/example-auth0/src/routeTree.gen.ts b/tests/fixtures/tanstack-start/example-auth0/src/routeTree.gen.ts
index 1acbf7ff..fba95d05 100644
--- a/tests/fixtures/tanstack-start/example-auth0/src/routeTree.gen.ts
+++ b/tests/fixtures/tanstack-start/example-auth0/src/routeTree.gen.ts
@@ -8,79 +8,77 @@
 // You should NOT make any changes in this file as it will be overwritten.
 // Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.
 
-import { Route as rootRouteImport } from './routes/__root'
-import { Route as DashboardRouteImport } from './routes/dashboard'
-import { Route as IndexRouteImport } from './routes/index'
+import { Route as rootRouteImport } from './routes/__root';
+import { Route as DashboardRouteImport } from './routes/dashboard';
+import { Route as IndexRouteImport } from './routes/index';
 
 const DashboardRoute = DashboardRouteImport.update({
   id: '/dashboard',
   path: '/dashboard',
   getParentRoute: () => rootRouteImport,
-} as any)
+} as any);
 const IndexRoute = IndexRouteImport.update({
   id: '/',
   path: '/',
   getParentRoute: () => rootRouteImport,
-} as any)
+} as any);
 
 export interface FileRoutesByFullPath {
-  '/': typeof IndexRoute
-  '/dashboard': typeof DashboardRoute
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
 }
 export interface FileRoutesByTo {
-  '/': typeof IndexRoute
-  '/dashboard': typeof DashboardRoute
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
 }
 export interface FileRoutesById {
-  __root__: typeof rootRouteImport
-  '/': typeof IndexRoute
-  '/dashboard': typeof DashboardRoute
+  __root__: typeof rootRouteImport;
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
 }
 export interface FileRouteTypes {
-  fileRoutesByFullPath: FileRoutesByFullPath
-  fullPaths: '/' | '/dashboard'
-  fileRoutesByTo: FileRoutesByTo
-  to: '/' | '/dashboard'
-  id: '__root__' | '/' | '/dashboard'
-  fileRoutesById: FileRoutesById
+  fileRoutesByFullPath: FileRoutesByFullPath;
+  fullPaths: '/' | '/dashboard';
+  fileRoutesByTo: FileRoutesByTo;
+  to: '/' | '/dashboard';
+  id: '__root__' | '/' | '/dashboard';
+  fileRoutesById: FileRoutesById;
 }
 export interface RootRouteChildren {
-  IndexRoute: typeof IndexRoute
-  DashboardRoute: typeof DashboardRoute
+  IndexRoute: typeof IndexRoute;
+  DashboardRoute: typeof DashboardRoute;
 }
 
 declare module '@tanstack/react-router' {
   interface FileRoutesByPath {
     '/dashboard': {
-      id: '/dashboard'
-      path: '/dashboard'
-      fullPath: '/dashboard'
-      preLoaderRoute: typeof DashboardRouteImport
-      parentRoute: typeof rootRouteImport
-    }
+      id: '/dashboard';
+      path: '/dashboard';
+      fullPath: '/dashboard';
+      preLoaderRoute: typeof DashboardRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
     '/': {
-      id: '/'
-      path: '/'
-      fullPath: '/'
-      preLoaderRoute: typeof IndexRouteImport
-      parentRoute: typeof rootRouteImport
-    }
+      id: '/';
+      path: '/';
+      fullPath: '/';
+      preLoaderRoute: typeof IndexRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
   }
 }
 
 const rootRouteChildren: RootRouteChildren = {
   IndexRoute: IndexRoute,
   DashboardRoute: DashboardRoute,
-}
-export const routeTree = rootRouteImport
-  ._addFileChildren(rootRouteChildren)
-  ._addFileTypes<FileRouteTypes>()
+};
+export const routeTree = rootRouteImport._addFileChildren(rootRouteChildren)._addFileTypes<FileRouteTypes>();
 
-import type { getRouter } from './router.tsx'
-import type { createStart } from '@tanstack/react-start'
+import type { getRouter } from './router.tsx';
+import type { createStart } from '@tanstack/react-start';
 declare module '@tanstack/react-start' {
   interface Register {
-    ssr: true
-    router: Awaited<ReturnType<typeof getRouter>>
+    ssr: true;
+    router: Awaited<ReturnType<typeof getRouter>>;
   }
 }
diff --git a/tests/fixtures/tanstack-start/example-auth0/src/router.tsx b/tests/fixtures/tanstack-start/example-auth0/src/router.tsx
index 0c83bf0d..4df65543 100644
--- a/tests/fixtures/tanstack-start/example-auth0/src/router.tsx
+++ b/tests/fixtures/tanstack-start/example-auth0/src/router.tsx
@@ -1,5 +1,5 @@
-import { createRouter } from '@tanstack/react-router'
-import { routeTree } from './routeTree.gen'
+import { createRouter } from '@tanstack/react-router';
+import { routeTree } from './routeTree.gen';
 
 export const getRouter = () => {
   const router = createRouter({
@@ -7,7 +7,7 @@ export const getRouter = () => {
     context: {},
     scrollRestoration: true,
     defaultPreloadStaleTime: 0,
-  })
+  });
 
-  return router
-}
+  return router;
+};
diff --git a/tests/fixtures/tanstack-start/example-auth0/src/routes/dashboard.tsx b/tests/fixtures/tanstack-start/example-auth0/src/routes/dashboard.tsx
index 3f372064..f820fe20 100644
--- a/tests/fixtures/tanstack-start/example-auth0/src/routes/dashboard.tsx
+++ b/tests/fixtures/tanstack-start/example-auth0/src/routes/dashboard.tsx
@@ -1,12 +1,12 @@
-import { createFileRoute } from '@tanstack/react-router'
-import { useAuth0, withAuthenticationRequired } from '@auth0/auth0-react'
+import { createFileRoute } from '@tanstack/react-router';
+import { useAuth0, withAuthenticationRequired } from '@auth0/auth0-react';
 
 export const Route = createFileRoute('/dashboard')({
   component: withAuthenticationRequired(Dashboard),
-})
+});
 
 function Dashboard() {
-  const { user } = useAuth0()
+  const { user } = useAuth0();
 
   return (
     <div className="container">
@@ -23,5 +23,5 @@ function Dashboard() {
         </div>
       </div>
     </div>
-  )
+  );
 }
diff --git a/tests/fixtures/tanstack-start/example-auth0/src/routes/index.tsx b/tests/fixtures/tanstack-start/example-auth0/src/routes/index.tsx
index b3abc830..e12b17bf 100644
--- a/tests/fixtures/tanstack-start/example-auth0/src/routes/index.tsx
+++ b/tests/fixtures/tanstack-start/example-auth0/src/routes/index.tsx
@@ -1,12 +1,12 @@
-import { createFileRoute, Link } from '@tanstack/react-router'
-import { useAuth0 } from '@auth0/auth0-react'
+import { createFileRoute, Link } from '@tanstack/react-router';
+import { useAuth0 } from '@auth0/auth0-react';
 
 export const Route = createFileRoute('/')({
   component: Home,
-})
+});
 
 function Home() {
-  const { isAuthenticated, loginWithRedirect, logout, user } = useAuth0()
+  const { isAuthenticated, loginWithRedirect, logout, user } = useAuth0();
 
   return (
     <div className="container">
@@ -25,5 +25,5 @@ function Home() {
         <button onClick={() => loginWithRedirect()}>Log In</button>
       )}
     </div>
-  )
+  );
 }
diff --git a/tests/fixtures/tanstack-start/example-auth0/src/styles.css b/tests/fixtures/tanstack-start/example-auth0/src/styles.css
index c48d4c36..3ac4e274 100644
--- a/tests/fixtures/tanstack-start/example-auth0/src/styles.css
+++ b/tests/fixtures/tanstack-start/example-auth0/src/styles.css
@@ -1,7 +1,8 @@
 body {
   margin: 0;
-  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen,
-    Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif;
+  font-family:
+    -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans',
+    'Helvetica Neue', sans-serif;
   -webkit-font-smoothing: antialiased;
   -moz-osx-font-smoothing: grayscale;
 }
diff --git a/tests/fixtures/tanstack-start/example-auth0/vite.config.ts b/tests/fixtures/tanstack-start/example-auth0/vite.config.ts
index a3594f99..edd01ec9 100644
--- a/tests/fixtures/tanstack-start/example-auth0/vite.config.ts
+++ b/tests/fixtures/tanstack-start/example-auth0/vite.config.ts
@@ -1,7 +1,7 @@
-import { defineConfig } from 'vite'
-import { tanstackStart } from '@tanstack/react-start/plugin/vite'
-import viteReact from '@vitejs/plugin-react'
-import viteTsConfigPaths from 'vite-tsconfig-paths'
+import { defineConfig } from 'vite';
+import { tanstackStart } from '@tanstack/react-start/plugin/vite';
+import viteReact from '@vitejs/plugin-react';
+import viteTsConfigPaths from 'vite-tsconfig-paths';
 
 export default defineConfig({
   plugins: [
@@ -11,4 +11,4 @@ export default defineConfig({
     tanstackStart(),
     viteReact(),
   ],
-})
+});
diff --git a/tests/fixtures/tanstack-start/example/src/routeTree.gen.ts b/tests/fixtures/tanstack-start/example/src/routeTree.gen.ts
index 1acbf7ff..fba95d05 100644
--- a/tests/fixtures/tanstack-start/example/src/routeTree.gen.ts
+++ b/tests/fixtures/tanstack-start/example/src/routeTree.gen.ts
@@ -8,79 +8,77 @@
 // You should NOT make any changes in this file as it will be overwritten.
 // Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.
 
-import { Route as rootRouteImport } from './routes/__root'
-import { Route as DashboardRouteImport } from './routes/dashboard'
-import { Route as IndexRouteImport } from './routes/index'
+import { Route as rootRouteImport } from './routes/__root';
+import { Route as DashboardRouteImport } from './routes/dashboard';
+import { Route as IndexRouteImport } from './routes/index';
 
 const DashboardRoute = DashboardRouteImport.update({
   id: '/dashboard',
   path: '/dashboard',
   getParentRoute: () => rootRouteImport,
-} as any)
+} as any);
 const IndexRoute = IndexRouteImport.update({
   id: '/',
   path: '/',
   getParentRoute: () => rootRouteImport,
-} as any)
+} as any);
 
 export interface FileRoutesByFullPath {
-  '/': typeof IndexRoute
-  '/dashboard': typeof DashboardRoute
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
 }
 export interface FileRoutesByTo {
-  '/': typeof IndexRoute
-  '/dashboard': typeof DashboardRoute
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
 }
 export interface FileRoutesById {
-  __root__: typeof rootRouteImport
-  '/': typeof IndexRoute
-  '/dashboard': typeof DashboardRoute
+  __root__: typeof rootRouteImport;
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
 }
 export interface FileRouteTypes {
-  fileRoutesByFullPath: FileRoutesByFullPath
-  fullPaths: '/' | '/dashboard'
-  fileRoutesByTo: FileRoutesByTo
-  to: '/' | '/dashboard'
-  id: '__root__' | '/' | '/dashboard'
-  fileRoutesById: FileRoutesById
+  fileRoutesByFullPath: FileRoutesByFullPath;
+  fullPaths: '/' | '/dashboard';
+  fileRoutesByTo: FileRoutesByTo;
+  to: '/' | '/dashboard';
+  id: '__root__' | '/' | '/dashboard';
+  fileRoutesById: FileRoutesById;
 }
 export interface RootRouteChildren {
-  IndexRoute: typeof IndexRoute
-  DashboardRoute: typeof DashboardRoute
+  IndexRoute: typeof IndexRoute;
+  DashboardRoute: typeof DashboardRoute;
 }
 
 declare module '@tanstack/react-router' {
   interface FileRoutesByPath {
     '/dashboard': {
-      id: '/dashboard'
-      path: '/dashboard'
-      fullPath: '/dashboard'
-      preLoaderRoute: typeof DashboardRouteImport
-      parentRoute: typeof rootRouteImport
-    }
+      id: '/dashboard';
+      path: '/dashboard';
+      fullPath: '/dashboard';
+      preLoaderRoute: typeof DashboardRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
     '/': {
-      id: '/'
-      path: '/'
-      fullPath: '/'
-      preLoaderRoute: typeof IndexRouteImport
-      parentRoute: typeof rootRouteImport
-    }
+      id: '/';
+      path: '/';
+      fullPath: '/';
+      preLoaderRoute: typeof IndexRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
   }
 }
 
 const rootRouteChildren: RootRouteChildren = {
   IndexRoute: IndexRoute,
   DashboardRoute: DashboardRoute,
-}
-export const routeTree = rootRouteImport
-  ._addFileChildren(rootRouteChildren)
-  ._addFileTypes<FileRouteTypes>()
+};
+export const routeTree = rootRouteImport._addFileChildren(rootRouteChildren)._addFileTypes<FileRouteTypes>();
 
-import type { getRouter } from './router.tsx'
-import type { createStart } from '@tanstack/react-start'
+import type { getRouter } from './router.tsx';
+import type { createStart } from '@tanstack/react-start';
 declare module '@tanstack/react-start' {
   interface Register {
-    ssr: true
-    router: Awaited<ReturnType<typeof getRouter>>
+    ssr: true;
+    router: Awaited<ReturnType<typeof getRouter>>;
   }
 }
diff --git a/tests/fixtures/tanstack-start/example/src/router.tsx b/tests/fixtures/tanstack-start/example/src/router.tsx
index 0c83bf0d..4df65543 100644
--- a/tests/fixtures/tanstack-start/example/src/router.tsx
+++ b/tests/fixtures/tanstack-start/example/src/router.tsx
@@ -1,5 +1,5 @@
-import { createRouter } from '@tanstack/react-router'
-import { routeTree } from './routeTree.gen'
+import { createRouter } from '@tanstack/react-router';
+import { routeTree } from './routeTree.gen';
 
 export const getRouter = () => {
   const router = createRouter({
@@ -7,7 +7,7 @@ export const getRouter = () => {
     context: {},
     scrollRestoration: true,
     defaultPreloadStaleTime: 0,
-  })
+  });
 
-  return router
-}
+  return router;
+};
diff --git a/tests/fixtures/tanstack-start/example/src/routes/__root.tsx b/tests/fixtures/tanstack-start/example/src/routes/__root.tsx
index 4c191584..22319ff9 100644
--- a/tests/fixtures/tanstack-start/example/src/routes/__root.tsx
+++ b/tests/fixtures/tanstack-start/example/src/routes/__root.tsx
@@ -1,5 +1,5 @@
-import { HeadContent, Scripts, createRootRoute } from '@tanstack/react-router'
-import appCss from '../styles.css?url'
+import { HeadContent, Scripts, createRootRoute } from '@tanstack/react-router';
+import appCss from '../styles.css?url';
 
 export const Route = createRootRoute({
   head: () => ({
@@ -11,7 +11,7 @@ export const Route = createRootRoute({
     links: [{ rel: 'stylesheet', href: appCss }],
   }),
   shellComponent: RootDocument,
-})
+});
 
 function RootDocument({ children }: { children: React.ReactNode }) {
   return (
@@ -24,5 +24,5 @@ function RootDocument({ children }: { children: React.ReactNode }) {
         <Scripts />
       </body>
     </html>
-  )
+  );
 }
diff --git a/tests/fixtures/tanstack-start/example/src/routes/dashboard.tsx b/tests/fixtures/tanstack-start/example/src/routes/dashboard.tsx
index 2d523ae6..34a7b445 100644
--- a/tests/fixtures/tanstack-start/example/src/routes/dashboard.tsx
+++ b/tests/fixtures/tanstack-start/example/src/routes/dashboard.tsx
@@ -1,8 +1,8 @@
-import { createFileRoute } from '@tanstack/react-router'
+import { createFileRoute } from '@tanstack/react-router';
 
 export const Route = createFileRoute('/dashboard')({
   component: Dashboard,
-})
+});
 
 function Dashboard() {
   return (
@@ -20,5 +20,5 @@ function Dashboard() {
         </div>
       </div>
     </div>
-  )
+  );
 }
diff --git a/tests/fixtures/tanstack-start/example/src/routes/index.tsx b/tests/fixtures/tanstack-start/example/src/routes/index.tsx
index aaec5a53..2edc996d 100644
--- a/tests/fixtures/tanstack-start/example/src/routes/index.tsx
+++ b/tests/fixtures/tanstack-start/example/src/routes/index.tsx
@@ -1,8 +1,8 @@
-import { createFileRoute, Link } from '@tanstack/react-router'
+import { createFileRoute, Link } from '@tanstack/react-router';
 
 export const Route = createFileRoute('/')({
   component: Home,
-})
+});
 
 function Home() {
   return (
@@ -13,5 +13,5 @@ function Home() {
         <Link to="/dashboard">Go to Dashboard</Link>
       </nav>
     </div>
-  )
+  );
 }
diff --git a/tests/fixtures/tanstack-start/example/src/styles.css b/tests/fixtures/tanstack-start/example/src/styles.css
index c48d4c36..3ac4e274 100644
--- a/tests/fixtures/tanstack-start/example/src/styles.css
+++ b/tests/fixtures/tanstack-start/example/src/styles.css
@@ -1,7 +1,8 @@
 body {
   margin: 0;
-  font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen,
-    Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans', 'Helvetica Neue', sans-serif;
+  font-family:
+    -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans',
+    'Helvetica Neue', sans-serif;
   -webkit-font-smoothing: antialiased;
   -moz-osx-font-smoothing: grayscale;
 }
diff --git a/tests/fixtures/tanstack-start/example/vite.config.ts b/tests/fixtures/tanstack-start/example/vite.config.ts
index a3594f99..edd01ec9 100644
--- a/tests/fixtures/tanstack-start/example/vite.config.ts
+++ b/tests/fixtures/tanstack-start/example/vite.config.ts
@@ -1,7 +1,7 @@
-import { defineConfig } from 'vite'
-import { tanstackStart } from '@tanstack/react-start/plugin/vite'
-import viteReact from '@vitejs/plugin-react'
-import viteTsConfigPaths from 'vite-tsconfig-paths'
+import { defineConfig } from 'vite';
+import { tanstackStart } from '@tanstack/react-start/plugin/vite';
+import viteReact from '@vitejs/plugin-react';
+import viteTsConfigPaths from 'vite-tsconfig-paths';
 
 export default defineConfig({
   plugins: [
@@ -11,4 +11,4 @@ export default defineConfig({
     tanstackStart(),
     viteReact(),
   ],
-})
+});
diff --git a/tests/fixtures/tanstack-start/partial-install/README.md b/tests/fixtures/tanstack-start/partial-install/README.md
new file mode 100644
index 00000000..8c249c0e
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/README.md
@@ -0,0 +1,30 @@
+# TanStack Start - Partial Install Fixture
+
+## Edge Case Description
+
+This fixture represents a TanStack Start project where AuthKit was partially installed - the package is in dependencies but integration was never completed.
+
+## Expected Agent Behavior
+
+- Detect that @workos-inc/authkit-react is already installed
+- Complete the integration by:
+  - Adding AuthKitProvider
+  - Creating callback route
+  - Setting up server functions for auth
+- Should NOT reinstall the package
+
+## Files of Interest
+
+- `package.json` - Already has @workos-inc/authkit-react dependency
+- `src/router.tsx` - Has commented-out import
+
+## Success Criteria
+
+- [ ] AuthKitProvider wraps the app
+- [ ] Callback route is created
+- [ ] Build succeeds
+- [ ] Package is not reinstalled
+
+## Notes
+
+Common scenario when developers start integration but don't finish.
diff --git a/tests/fixtures/tanstack-start/partial-install/package.json b/tests/fixtures/tanstack-start/partial-install/package.json
new file mode 100644
index 00000000..35ec5589
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/package.json
@@ -0,0 +1,26 @@
+{
+  "name": "tanstack-start-partial-install-fixture",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite dev --port 3000",
+    "build": "vite build",
+    "start": "vite preview"
+  },
+  "dependencies": {
+    "@tanstack/react-router": "latest",
+    "@tanstack/react-start": "latest",
+    "@tanstack/router-plugin": "latest",
+    "@workos-inc/authkit-react": "^0.5.0",
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
+    "vite-tsconfig-paths": "^6.0.0"
+  },
+  "devDependencies": {
+    "@types/react": "^19.0.0",
+    "@types/react-dom": "^19.0.0",
+    "@vitejs/plugin-react": "^5.0.0",
+    "typescript": "^5.7.0",
+    "vite": "^7.0.0"
+  }
+}
diff --git a/tests/fixtures/tanstack-start/partial-install/src/routeTree.gen.ts b/tests/fixtures/tanstack-start/partial-install/src/routeTree.gen.ts
new file mode 100644
index 00000000..fba95d05
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/src/routeTree.gen.ts
@@ -0,0 +1,84 @@
+/* eslint-disable */
+
+// @ts-nocheck
+
+// noinspection JSUnusedGlobalSymbols
+
+// This file was automatically generated by TanStack Router.
+// You should NOT make any changes in this file as it will be overwritten.
+// Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.
+
+import { Route as rootRouteImport } from './routes/__root';
+import { Route as DashboardRouteImport } from './routes/dashboard';
+import { Route as IndexRouteImport } from './routes/index';
+
+const DashboardRoute = DashboardRouteImport.update({
+  id: '/dashboard',
+  path: '/dashboard',
+  getParentRoute: () => rootRouteImport,
+} as any);
+const IndexRoute = IndexRouteImport.update({
+  id: '/',
+  path: '/',
+  getParentRoute: () => rootRouteImport,
+} as any);
+
+export interface FileRoutesByFullPath {
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
+}
+export interface FileRoutesByTo {
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
+}
+export interface FileRoutesById {
+  __root__: typeof rootRouteImport;
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
+}
+export interface FileRouteTypes {
+  fileRoutesByFullPath: FileRoutesByFullPath;
+  fullPaths: '/' | '/dashboard';
+  fileRoutesByTo: FileRoutesByTo;
+  to: '/' | '/dashboard';
+  id: '__root__' | '/' | '/dashboard';
+  fileRoutesById: FileRoutesById;
+}
+export interface RootRouteChildren {
+  IndexRoute: typeof IndexRoute;
+  DashboardRoute: typeof DashboardRoute;
+}
+
+declare module '@tanstack/react-router' {
+  interface FileRoutesByPath {
+    '/dashboard': {
+      id: '/dashboard';
+      path: '/dashboard';
+      fullPath: '/dashboard';
+      preLoaderRoute: typeof DashboardRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
+    '/': {
+      id: '/';
+      path: '/';
+      fullPath: '/';
+      preLoaderRoute: typeof IndexRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
+  }
+}
+
+const rootRouteChildren: RootRouteChildren = {
+  IndexRoute: IndexRoute,
+  DashboardRoute: DashboardRoute,
+};
+export const routeTree = rootRouteImport._addFileChildren(rootRouteChildren)._addFileTypes<FileRouteTypes>();
+
+import type { getRouter } from './router.tsx';
+import type { createStart } from '@tanstack/react-start';
+declare module '@tanstack/react-start' {
+  interface Register {
+    ssr: true;
+    router: Awaited<ReturnType<typeof getRouter>>;
+  }
+}
diff --git a/tests/fixtures/tanstack-start/partial-install/src/router.tsx b/tests/fixtures/tanstack-start/partial-install/src/router.tsx
new file mode 100644
index 00000000..865a4d49
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/src/router.tsx
@@ -0,0 +1,15 @@
+import { createRouter } from '@tanstack/react-router';
+import { routeTree } from './routeTree.gen';
+// TODO: Complete AuthKit integration
+// import { AuthKitProvider } from '@workos-inc/authkit-react';
+
+export const getRouter = () => {
+  const router = createRouter({
+    routeTree,
+    context: {},
+    scrollRestoration: true,
+    defaultPreloadStaleTime: 0,
+  });
+
+  return router;
+};
diff --git a/tests/fixtures/tanstack-start/partial-install/src/routes/__root.tsx b/tests/fixtures/tanstack-start/partial-install/src/routes/__root.tsx
new file mode 100644
index 00000000..22319ff9
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/src/routes/__root.tsx
@@ -0,0 +1,28 @@
+import { HeadContent, Scripts, createRootRoute } from '@tanstack/react-router';
+import appCss from '../styles.css?url';
+
+export const Route = createRootRoute({
+  head: () => ({
+    meta: [
+      { charSet: 'utf-8' },
+      { name: 'viewport', content: 'width=device-width, initial-scale=1' },
+      { title: 'TanStack Start App' },
+    ],
+    links: [{ rel: 'stylesheet', href: appCss }],
+  }),
+  shellComponent: RootDocument,
+});
+
+function RootDocument({ children }: { children: React.ReactNode }) {
+  return (
+    <html lang="en">
+      <head>
+        <HeadContent />
+      </head>
+      <body>
+        {children}
+        <Scripts />
+      </body>
+    </html>
+  );
+}
diff --git a/tests/fixtures/tanstack-start/partial-install/src/routes/dashboard.tsx b/tests/fixtures/tanstack-start/partial-install/src/routes/dashboard.tsx
new file mode 100644
index 00000000..34a7b445
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/src/routes/dashboard.tsx
@@ -0,0 +1,24 @@
+import { createFileRoute } from '@tanstack/react-router';
+
+export const Route = createFileRoute('/dashboard')({
+  component: Dashboard,
+});
+
+function Dashboard() {
+  return (
+    <div className="container">
+      <h1>Dashboard</h1>
+      <p>This is a protected dashboard page.</p>
+      <div className="stats">
+        <div className="stat">
+          <h3>Users</h3>
+          <p>1,234</p>
+        </div>
+        <div className="stat">
+          <h3>Revenue</h3>
+          <p>$12,345</p>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/tests/fixtures/tanstack-start/partial-install/src/routes/index.tsx b/tests/fixtures/tanstack-start/partial-install/src/routes/index.tsx
new file mode 100644
index 00000000..2edc996d
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/src/routes/index.tsx
@@ -0,0 +1,17 @@
+import { createFileRoute, Link } from '@tanstack/react-router';
+
+export const Route = createFileRoute('/')({
+  component: Home,
+});
+
+function Home() {
+  return (
+    <div className="container">
+      <h1>Welcome to My App</h1>
+      <p>This is an existing TanStack Start application.</p>
+      <nav>
+        <Link to="/dashboard">Go to Dashboard</Link>
+      </nav>
+    </div>
+  );
+}
diff --git a/tests/fixtures/tanstack-start/partial-install/src/styles.css b/tests/fixtures/tanstack-start/partial-install/src/styles.css
new file mode 100644
index 00000000..3ac4e274
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/src/styles.css
@@ -0,0 +1,21 @@
+body {
+  margin: 0;
+  font-family:
+    -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans',
+    'Helvetica Neue', sans-serif;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+
+.container {
+  max-width: 800px;
+  margin: 0 auto;
+  padding: 2rem;
+}
+
+code {
+  font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', monospace;
+  background: #f4f4f4;
+  padding: 0.2em 0.4em;
+  border-radius: 3px;
+}
diff --git a/tests/fixtures/tanstack-start/partial-install/tsconfig.json b/tests/fixtures/tanstack-start/partial-install/tsconfig.json
new file mode 100644
index 00000000..2b33c015
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/tsconfig.json
@@ -0,0 +1,22 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "lib": ["DOM", "DOM.Iterable", "ES2022"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "isolatedModules": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+    "strict": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noFallthroughCasesInSwitch": true,
+    "paths": {
+      "~/*": ["./src/*"]
+    }
+  },
+  "include": ["src"]
+}
diff --git a/tests/fixtures/tanstack-start/partial-install/vite.config.ts b/tests/fixtures/tanstack-start/partial-install/vite.config.ts
new file mode 100644
index 00000000..edd01ec9
--- /dev/null
+++ b/tests/fixtures/tanstack-start/partial-install/vite.config.ts
@@ -0,0 +1,14 @@
+import { defineConfig } from 'vite';
+import { tanstackStart } from '@tanstack/react-start/plugin/vite';
+import viteReact from '@vitejs/plugin-react';
+import viteTsConfigPaths from 'vite-tsconfig-paths';
+
+export default defineConfig({
+  plugins: [
+    viteTsConfigPaths({
+      projects: ['./tsconfig.json'],
+    }),
+    tanstackStart(),
+    viteReact(),
+  ],
+});
diff --git a/tests/fixtures/tanstack-start/typescript-strict/README.md b/tests/fixtures/tanstack-start/typescript-strict/README.md
new file mode 100644
index 00000000..0228c15f
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/README.md
@@ -0,0 +1,27 @@
+# TanStack Start - TypeScript Strict Fixture
+
+## Edge Case Description
+
+This fixture has the strictest TypeScript configuration. Tests whether agent generates fully type-safe code.
+
+## Expected Agent Behavior
+
+- Generate code with explicit return types
+- Use proper type annotations
+- Handle null/undefined properly
+- Not introduce unused variables
+
+## Files of Interest
+
+- `tsconfig.json` - Has all strict flags
+- All `.tsx` files - Have explicit return types
+
+## Success Criteria
+
+- [ ] `pnpm build` passes with zero type errors
+- [ ] Generated code has proper types
+- [ ] No implicit any errors
+
+## Notes
+
+Critical for enterprise TanStack Start apps with strict TypeScript.
diff --git a/tests/fixtures/tanstack-start/typescript-strict/package.json b/tests/fixtures/tanstack-start/typescript-strict/package.json
new file mode 100644
index 00000000..7c540afe
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/package.json
@@ -0,0 +1,25 @@
+{
+  "name": "tanstack-start-typescript-strict-fixture",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite dev --port 3000",
+    "build": "vite build",
+    "start": "vite preview"
+  },
+  "dependencies": {
+    "@tanstack/react-router": "latest",
+    "@tanstack/react-start": "latest",
+    "@tanstack/router-plugin": "latest",
+    "react": "^19.0.0",
+    "react-dom": "^19.0.0",
+    "vite-tsconfig-paths": "^6.0.0"
+  },
+  "devDependencies": {
+    "@types/react": "^19.0.0",
+    "@types/react-dom": "^19.0.0",
+    "@vitejs/plugin-react": "^5.0.0",
+    "typescript": "^5.7.0",
+    "vite": "^7.0.0"
+  }
+}
diff --git a/tests/fixtures/tanstack-start/typescript-strict/src/routeTree.gen.ts b/tests/fixtures/tanstack-start/typescript-strict/src/routeTree.gen.ts
new file mode 100644
index 00000000..fba95d05
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/src/routeTree.gen.ts
@@ -0,0 +1,84 @@
+/* eslint-disable */
+
+// @ts-nocheck
+
+// noinspection JSUnusedGlobalSymbols
+
+// This file was automatically generated by TanStack Router.
+// You should NOT make any changes in this file as it will be overwritten.
+// Additionally, you should also exclude this file from your linter and/or formatter to prevent it from being checked or modified.
+
+import { Route as rootRouteImport } from './routes/__root';
+import { Route as DashboardRouteImport } from './routes/dashboard';
+import { Route as IndexRouteImport } from './routes/index';
+
+const DashboardRoute = DashboardRouteImport.update({
+  id: '/dashboard',
+  path: '/dashboard',
+  getParentRoute: () => rootRouteImport,
+} as any);
+const IndexRoute = IndexRouteImport.update({
+  id: '/',
+  path: '/',
+  getParentRoute: () => rootRouteImport,
+} as any);
+
+export interface FileRoutesByFullPath {
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
+}
+export interface FileRoutesByTo {
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
+}
+export interface FileRoutesById {
+  __root__: typeof rootRouteImport;
+  '/': typeof IndexRoute;
+  '/dashboard': typeof DashboardRoute;
+}
+export interface FileRouteTypes {
+  fileRoutesByFullPath: FileRoutesByFullPath;
+  fullPaths: '/' | '/dashboard';
+  fileRoutesByTo: FileRoutesByTo;
+  to: '/' | '/dashboard';
+  id: '__root__' | '/' | '/dashboard';
+  fileRoutesById: FileRoutesById;
+}
+export interface RootRouteChildren {
+  IndexRoute: typeof IndexRoute;
+  DashboardRoute: typeof DashboardRoute;
+}
+
+declare module '@tanstack/react-router' {
+  interface FileRoutesByPath {
+    '/dashboard': {
+      id: '/dashboard';
+      path: '/dashboard';
+      fullPath: '/dashboard';
+      preLoaderRoute: typeof DashboardRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
+    '/': {
+      id: '/';
+      path: '/';
+      fullPath: '/';
+      preLoaderRoute: typeof IndexRouteImport;
+      parentRoute: typeof rootRouteImport;
+    };
+  }
+}
+
+const rootRouteChildren: RootRouteChildren = {
+  IndexRoute: IndexRoute,
+  DashboardRoute: DashboardRoute,
+};
+export const routeTree = rootRouteImport._addFileChildren(rootRouteChildren)._addFileTypes<FileRouteTypes>();
+
+import type { getRouter } from './router.tsx';
+import type { createStart } from '@tanstack/react-start';
+declare module '@tanstack/react-start' {
+  interface Register {
+    ssr: true;
+    router: Awaited<ReturnType<typeof getRouter>>;
+  }
+}
diff --git a/tests/fixtures/tanstack-start/typescript-strict/src/router.tsx b/tests/fixtures/tanstack-start/typescript-strict/src/router.tsx
new file mode 100644
index 00000000..85328dc0
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/src/router.tsx
@@ -0,0 +1,13 @@
+import { createRouter, type Router } from '@tanstack/react-router';
+import { routeTree } from './routeTree.gen';
+
+export const getRouter = (): Router<typeof routeTree> => {
+  const router = createRouter({
+    routeTree,
+    context: {},
+    scrollRestoration: true,
+    defaultPreloadStaleTime: 0,
+  });
+
+  return router;
+};
diff --git a/tests/fixtures/tanstack-start/typescript-strict/src/routes/__root.tsx b/tests/fixtures/tanstack-start/typescript-strict/src/routes/__root.tsx
new file mode 100644
index 00000000..0986bfcd
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/src/routes/__root.tsx
@@ -0,0 +1,33 @@
+import { HeadContent, Scripts, createRootRoute } from '@tanstack/react-router';
+import type { ReactNode, JSX } from 'react';
+import appCss from '../styles.css?url';
+
+interface RootDocumentProps {
+  children: ReactNode;
+}
+
+export const Route = createRootRoute({
+  head: () => ({
+    meta: [
+      { charSet: 'utf-8' },
+      { name: 'viewport', content: 'width=device-width, initial-scale=1' },
+      { title: 'TanStack Start App' },
+    ],
+    links: [{ rel: 'stylesheet', href: appCss }],
+  }),
+  shellComponent: RootDocument,
+});
+
+function RootDocument({ children }: RootDocumentProps): JSX.Element {
+  return (
+    <html lang="en">
+      <head>
+        <HeadContent />
+      </head>
+      <body>
+        {children}
+        <Scripts />
+      </body>
+    </html>
+  );
+}
diff --git a/tests/fixtures/tanstack-start/typescript-strict/src/routes/dashboard.tsx b/tests/fixtures/tanstack-start/typescript-strict/src/routes/dashboard.tsx
new file mode 100644
index 00000000..9357587a
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/src/routes/dashboard.tsx
@@ -0,0 +1,25 @@
+import { createFileRoute } from '@tanstack/react-router';
+import type { JSX } from 'react';
+
+export const Route = createFileRoute('/dashboard')({
+  component: Dashboard,
+});
+
+function Dashboard(): JSX.Element {
+  return (
+    <div className="container">
+      <h1>Dashboard</h1>
+      <p>This is a protected dashboard page.</p>
+      <div className="stats">
+        <div className="stat">
+          <h3>Users</h3>
+          <p>1,234</p>
+        </div>
+        <div className="stat">
+          <h3>Revenue</h3>
+          <p>$12,345</p>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/tests/fixtures/tanstack-start/typescript-strict/src/routes/index.tsx b/tests/fixtures/tanstack-start/typescript-strict/src/routes/index.tsx
new file mode 100644
index 00000000..4a1f734a
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/src/routes/index.tsx
@@ -0,0 +1,18 @@
+import { createFileRoute, Link } from '@tanstack/react-router';
+import type { JSX } from 'react';
+
+export const Route = createFileRoute('/')({
+  component: Home,
+});
+
+function Home(): JSX.Element {
+  return (
+    <div className="container">
+      <h1>Welcome to My App</h1>
+      <p>This is an existing TanStack Start application with strict TypeScript.</p>
+      <nav>
+        <Link to="/dashboard">Go to Dashboard</Link>
+      </nav>
+    </div>
+  );
+}
diff --git a/tests/fixtures/tanstack-start/typescript-strict/src/styles.css b/tests/fixtures/tanstack-start/typescript-strict/src/styles.css
new file mode 100644
index 00000000..3ac4e274
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/src/styles.css
@@ -0,0 +1,21 @@
+body {
+  margin: 0;
+  font-family:
+    -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Fira Sans', 'Droid Sans',
+    'Helvetica Neue', sans-serif;
+  -webkit-font-smoothing: antialiased;
+  -moz-osx-font-smoothing: grayscale;
+}
+
+.container {
+  max-width: 800px;
+  margin: 0 auto;
+  padding: 2rem;
+}
+
+code {
+  font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', monospace;
+  background: #f4f4f4;
+  padding: 0.2em 0.4em;
+  border-radius: 3px;
+}
diff --git a/tests/fixtures/tanstack-start/typescript-strict/tsconfig.json b/tests/fixtures/tanstack-start/typescript-strict/tsconfig.json
new file mode 100644
index 00000000..6195aaa2
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/tsconfig.json
@@ -0,0 +1,31 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "lib": ["DOM", "DOM.Iterable", "ES2022"],
+    "module": "ESNext",
+    "skipLibCheck": true,
+    "moduleResolution": "bundler",
+    "allowImportingTsExtensions": true,
+    "isolatedModules": true,
+    "moduleDetection": "force",
+    "noEmit": true,
+    "jsx": "react-jsx",
+    "strict": true,
+    "noImplicitAny": true,
+    "strictNullChecks": true,
+    "strictFunctionTypes": true,
+    "strictBindCallApply": true,
+    "strictPropertyInitialization": true,
+    "noImplicitThis": true,
+    "noImplicitReturns": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "exactOptionalPropertyTypes": true,
+    "noUncheckedIndexedAccess": true,
+    "noFallthroughCasesInSwitch": true,
+    "paths": {
+      "~/*": ["./src/*"]
+    }
+  },
+  "include": ["src"]
+}
diff --git a/tests/fixtures/tanstack-start/typescript-strict/vite.config.ts b/tests/fixtures/tanstack-start/typescript-strict/vite.config.ts
new file mode 100644
index 00000000..edd01ec9
--- /dev/null
+++ b/tests/fixtures/tanstack-start/typescript-strict/vite.config.ts
@@ -0,0 +1,14 @@
+import { defineConfig } from 'vite';
+import { tanstackStart } from '@tanstack/react-start/plugin/vite';
+import viteReact from '@vitejs/plugin-react';
+import viteTsConfigPaths from 'vite-tsconfig-paths';
+
+export default defineConfig({
+  plugins: [
+    viteTsConfigPaths({
+      projects: ['./tsconfig.json'],
+    }),
+    tanstackStart(),
+    viteReact(),
+  ],
+});
diff --git a/tests/fixtures/vanilla-js/conflicting-auth/README.md b/tests/fixtures/vanilla-js/conflicting-auth/README.md
new file mode 100644
index 00000000..8bbe8bf2
--- /dev/null
+++ b/tests/fixtures/vanilla-js/conflicting-auth/README.md
@@ -0,0 +1,38 @@
+# Vanilla JS - Conflicting Auth Fixture
+
+## Edge Case Description
+
+This fixture has an existing custom authentication module with localStorage-based session management, auth state listeners, and protected routes. The agent must integrate AuthKit while preserving or migrating this functionality.
+
+## Expected Agent Behavior
+
+- Detect existing auth implementation in `auth.js`
+- Integrate AuthKit while handling:
+  - Existing `onAuthStateChange` listeners
+  - User preferences storage
+  - Protected route patterns (`requireAuth`)
+- Should NOT simply delete existing auth code without migration
+
+## Files of Interest
+
+- `auth.js` - Full auth module with login, logout, session management
+- `main.js` - Uses auth for login form and UI updates
+- `dashboard.js` - Uses `requireAuth` for page protection
+- All HTML files - Reference auth status in nav
+
+## Success Criteria
+
+- [ ] AuthKit is integrated
+- [ ] Existing auth state listeners still work
+- [ ] Dashboard protection still works
+- [ ] User preferences are migrated or preserved
+- [ ] Build succeeds
+
+## Notes
+
+This is a realistic scenario - many vanilla JS apps have custom auth before adopting a third-party solution. The agent should recognize this and propose a migration strategy.
+
+Ideal approaches:
+1. Replace custom auth with AuthKit but preserve the listener pattern
+2. Migrate user preferences to AuthKit user profile
+3. Update requireAuth to use AuthKit session checking
diff --git a/tests/fixtures/vanilla-js/conflicting-auth/about.html b/tests/fixtures/vanilla-js/conflicting-auth/about.html
new file mode 100644
index 00000000..1c9cae91
--- /dev/null
+++ b/tests/fixtures/vanilla-js/conflicting-auth/about.html
@@ -0,0 +1,20 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>About - Vanilla JS App</title>
+    <link rel="stylesheet" href="/styles.css" />
+  </head>
+  <body>
+    <nav>
+      <a href="/">Home</a> | <a href="/about.html">About</a> | <a href="/dashboard.html">Dashboard</a>
+      <span id="auth-status"></span>
+    </nav>
+    <div id="app">
+      <h1>About</h1>
+      <p>This is an existing Vanilla JS application with custom auth.</p>
+    </div>
+    <script type="module" src="/main.js"></script>
+  </body>
+</html>
diff --git a/tests/fixtures/vanilla-js/conflicting-auth/auth.js b/tests/fixtures/vanilla-js/conflicting-auth/auth.js
new file mode 100644
index 00000000..2475e9c9
--- /dev/null
+++ b/tests/fixtures/vanilla-js/conflicting-auth/auth.js
@@ -0,0 +1,95 @@
+// Custom authentication module for Vanilla JS
+// This module provides existing auth functionality that should be preserved
+
+const AUTH_STORAGE_KEY = 'app_auth_state';
+
+// Simple event system for auth state changes
+const authListeners = new Set();
+
+export function onAuthStateChange(callback) {
+  authListeners.add(callback);
+  return () => authListeners.delete(callback);
+}
+
+function notifyAuthStateChange(user) {
+  authListeners.forEach((callback) => callback(user));
+}
+
+// Get current auth state from localStorage
+export function getCurrentUser() {
+  try {
+    const stored = localStorage.getItem(AUTH_STORAGE_KEY);
+    if (stored) {
+      const data = JSON.parse(stored);
+      // Check if session is expired
+      if (data.expiresAt && Date.now() > data.expiresAt) {
+        logout();
+        return null;
+      }
+      return data.user;
+    }
+  } catch {
+    // Invalid stored data
+    localStorage.removeItem(AUTH_STORAGE_KEY);
+  }
+  return null;
+}
+
+// Simple login function (mock implementation)
+export async function login(credentials) {
+  // Simulate API call
+  await new Promise((resolve) => setTimeout(resolve, 500));
+
+  if (!credentials.email || !credentials.password) {
+    throw new Error('Email and password required');
+  }
+
+  const user = {
+    id: 'user-' + Math.random().toString(36).substr(2, 9),
+    email: credentials.email,
+    name: credentials.email.split('@')[0],
+    role: 'user',
+    preferences: {
+      theme: 'light',
+      notifications: true,
+    },
+  };
+
+  const authState = {
+    user,
+    expiresAt: Date.now() + 24 * 60 * 60 * 1000, // 24 hours
+  };
+
+  localStorage.setItem(AUTH_STORAGE_KEY, JSON.stringify(authState));
+  notifyAuthStateChange(user);
+
+  return user;
+}
+
+export function logout() {
+  localStorage.removeItem(AUTH_STORAGE_KEY);
+  notifyAuthStateChange(null);
+}
+
+// Check if user is authenticated
+export function isAuthenticated() {
+  return getCurrentUser() !== null;
+}
+
+// Protect a page - redirect if not authenticated
+export function requireAuth(redirectTo = '/') {
+  if (!isAuthenticated()) {
+    window.location.href = redirectTo;
+    return false;
+  }
+  return true;
+}
+
+// Initialize auth state on page load
+export function initAuth() {
+  const user = getCurrentUser();
+  if (user) {
+    notifyAuthStateChange(user);
+  }
+  return user;
+}
diff --git a/tests/fixtures/vanilla-js/conflicting-auth/dashboard.html b/tests/fixtures/vanilla-js/conflicting-auth/dashboard.html
new file mode 100644
index 00000000..eded59b6
--- /dev/null
+++ b/tests/fixtures/vanilla-js/conflicting-auth/dashboard.html
@@ -0,0 +1,22 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Dashboard - Vanilla JS App</title>
+    <link rel="stylesheet" href="/styles.css" />
+  </head>
+  <body>
+    <nav>
+      <a href="/">Home</a> | <a href="/about.html">About</a> | <a href="/dashboard.html">Dashboard</a>
+      <span id="auth-status"></span>
+    </nav>
+    <div id="app">
+      <h1>Dashboard</h1>
+      <div id="dashboard-content">
+        <p>Loading...</p>
+      </div>
+    </div>
+    <script type="module" src="/dashboard.js"></script>
+  </body>
+</html>
diff --git a/tests/fixtures/vanilla-js/conflicting-auth/dashboard.js b/tests/fixtures/vanilla-js/conflicting-auth/dashboard.js
new file mode 100644
index 00000000..a50f7944
--- /dev/null
+++ b/tests/fixtures/vanilla-js/conflicting-auth/dashboard.js
@@ -0,0 +1,76 @@
+import { initAuth, requireAuth, onAuthStateChange, getCurrentUser, logout } from './auth.js';
+
+// Simple utilities
+function $(selector) {
+  return document.querySelector(selector);
+}
+
+function createElement(tag, options = {}) {
+  const el = document.createElement(tag);
+  if (options.text) el.textContent = options.text;
+  if (options.className) el.className = options.className;
+  if (options.id) el.id = options.id;
+  return el;
+}
+
+// Initialize auth and protect this page
+const user = initAuth();
+
+// Require authentication for dashboard
+if (!requireAuth('/')) {
+  throw new Error('Authentication required');
+}
+
+// Update UI based on auth state using safe DOM methods
+function updateAuthUI(user) {
+  const authStatus = $('#auth-status');
+  const dashboardContent = $('#dashboard-content');
+
+  if (user) {
+    if (authStatus) {
+      authStatus.textContent = '';
+      const welcome = createElement('span', { text: ` | Welcome, ${user.name} | ` });
+      const logoutBtn = createElement('button', { id: 'logout-btn', text: 'Logout' });
+      logoutBtn.addEventListener('click', () => {
+        logout();
+        window.location.href = '/';
+      });
+      authStatus.appendChild(welcome);
+      authStatus.appendChild(logoutBtn);
+    }
+    if (dashboardContent) {
+      dashboardContent.textContent = '';
+
+      const welcomeP = createElement('p');
+      const strong = createElement('strong', { text: user.name });
+      welcomeP.appendChild(document.createTextNode('Welcome back, '));
+      welcomeP.appendChild(strong);
+      welcomeP.appendChild(document.createTextNode('!'));
+      dashboardContent.appendChild(welcomeP);
+
+      dashboardContent.appendChild(createElement('p', { text: `Role: ${user.role}` }));
+      dashboardContent.appendChild(createElement('p', { text: `Theme: ${user.preferences.theme}` }));
+      dashboardContent.appendChild(createElement('h2', { text: 'Your Stats' }));
+
+      const statsDiv = createElement('div', { className: 'stats' });
+
+      const projectsCard = createElement('div', { className: 'stat-card' });
+      projectsCard.appendChild(createElement('h3', { text: 'Projects' }));
+      projectsCard.appendChild(createElement('p', { text: '12' }));
+      statsDiv.appendChild(projectsCard);
+
+      const tasksCard = createElement('div', { className: 'stat-card' });
+      tasksCard.appendChild(createElement('h3', { text: 'Tasks' }));
+      tasksCard.appendChild(createElement('p', { text: '47' }));
+      statsDiv.appendChild(tasksCard);
+
+      dashboardContent.appendChild(statsDiv);
+    }
+  }
+}
+
+// Listen for auth state changes
+onAuthStateChange(updateAuthUI);
+
+// Initial UI update
+updateAuthUI(user);
diff --git a/tests/fixtures/vanilla-js/conflicting-auth/index.html b/tests/fixtures/vanilla-js/conflicting-auth/index.html
new file mode 100644
index 00000000..430fa392
--- /dev/null
+++ b/tests/fixtures/vanilla-js/conflicting-auth/index.html
@@ -0,0 +1,28 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Vanilla JS App</title>
+    <link rel="stylesheet" href="/styles.css" />
+  </head>
+  <body>
+    <nav>
+      <a href="/">Home</a> | <a href="/about.html">About</a> | <a href="/dashboard.html">Dashboard</a>
+      <span id="auth-status"></span>
+    </nav>
+    <div id="app">
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+      <div id="login-section">
+        <h2>Login</h2>
+        <form id="login-form">
+          <input type="email" id="email" placeholder="Email" required />
+          <input type="password" id="password" placeholder="Password" required />
+          <button type="submit">Login</button>
+        </form>
+      </div>
+    </div>
+    <script type="module" src="/main.js"></script>
+  </body>
+</html>
diff --git a/tests/fixtures/vanilla-js/conflicting-auth/main.js b/tests/fixtures/vanilla-js/conflicting-auth/main.js
new file mode 100644
index 00000000..f36037e2
--- /dev/null
+++ b/tests/fixtures/vanilla-js/conflicting-auth/main.js
@@ -0,0 +1,68 @@
+import { initAuth, login, logout, onAuthStateChange, isAuthenticated, getCurrentUser } from './auth.js';
+
+console.log('Vanilla JS app loaded');
+
+// Simple utilities
+export function $(selector) {
+  return document.querySelector(selector);
+}
+
+export function $$(selector) {
+  return document.querySelectorAll(selector);
+}
+
+// Initialize authentication
+const user = initAuth();
+
+// Update UI based on auth state
+function updateAuthUI(user) {
+  const authStatus = $('#auth-status');
+  const loginSection = $('#login-section');
+
+  if (user) {
+    if (authStatus) {
+      // Note: This uses innerHTML which should be sanitized in production
+      authStatus.textContent = '';
+      const welcome = document.createElement('span');
+      welcome.textContent = ` | Welcome, ${user.name} | `;
+      const logoutBtn = document.createElement('button');
+      logoutBtn.id = 'logout-btn';
+      logoutBtn.textContent = 'Logout';
+      logoutBtn.addEventListener('click', () => logout());
+      authStatus.appendChild(welcome);
+      authStatus.appendChild(logoutBtn);
+    }
+    if (loginSection) {
+      loginSection.style.display = 'none';
+    }
+  } else {
+    if (authStatus) {
+      authStatus.textContent = '';
+    }
+    if (loginSection) {
+      loginSection.style.display = 'block';
+    }
+  }
+}
+
+// Listen for auth state changes
+onAuthStateChange(updateAuthUI);
+
+// Initial UI update
+updateAuthUI(user);
+
+// Handle login form
+const loginForm = $('#login-form');
+if (loginForm) {
+  loginForm.addEventListener('submit', async (e) => {
+    e.preventDefault();
+    const email = $('#email').value;
+    const password = $('#password').value;
+
+    try {
+      await login({ email, password });
+    } catch (error) {
+      alert('Login failed: ' + error.message);
+    }
+  });
+}
diff --git a/tests/fixtures/vanilla-js/conflicting-auth/package.json b/tests/fixtures/vanilla-js/conflicting-auth/package.json
new file mode 100644
index 00000000..ff25dd23
--- /dev/null
+++ b/tests/fixtures/vanilla-js/conflicting-auth/package.json
@@ -0,0 +1,13 @@
+{
+  "name": "vanilla-js-conflicting-auth-fixture",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "devDependencies": {
+    "vite": "^6.0.5"
+  }
+}
diff --git a/tests/fixtures/vanilla-js/conflicting-auth/styles.css b/tests/fixtures/vanilla-js/conflicting-auth/styles.css
new file mode 100644
index 00000000..58b4c040
--- /dev/null
+++ b/tests/fixtures/vanilla-js/conflicting-auth/styles.css
@@ -0,0 +1,89 @@
+body {
+  font-family:
+    system-ui,
+    -apple-system,
+    sans-serif;
+  max-width: 800px;
+  margin: 0 auto;
+  padding: 20px;
+}
+
+nav {
+  margin-bottom: 20px;
+  padding-bottom: 10px;
+  border-bottom: 1px solid #eee;
+}
+
+nav a {
+  color: #333;
+  text-decoration: none;
+}
+
+nav a:hover {
+  text-decoration: underline;
+}
+
+#login-section {
+  margin-top: 20px;
+  padding: 20px;
+  background: #f5f5f5;
+  border-radius: 8px;
+}
+
+#login-form {
+  display: flex;
+  flex-direction: column;
+  gap: 10px;
+  max-width: 300px;
+}
+
+#login-form input {
+  padding: 8px;
+  border: 1px solid #ddd;
+  border-radius: 4px;
+}
+
+#login-form button {
+  padding: 10px;
+  background: #333;
+  color: white;
+  border: none;
+  border-radius: 4px;
+  cursor: pointer;
+}
+
+#login-form button:hover {
+  background: #555;
+}
+
+#logout-btn {
+  background: none;
+  border: none;
+  color: #666;
+  cursor: pointer;
+  text-decoration: underline;
+}
+
+.stats {
+  display: flex;
+  gap: 20px;
+  margin-top: 20px;
+}
+
+.stat-card {
+  padding: 20px;
+  background: #f5f5f5;
+  border-radius: 8px;
+  text-align: center;
+}
+
+.stat-card h3 {
+  margin: 0 0 10px 0;
+  color: #666;
+}
+
+.stat-card p {
+  margin: 0;
+  font-size: 2em;
+  font-weight: bold;
+}
diff --git a/tests/fixtures/vanilla-js/partial-install/README.md b/tests/fixtures/vanilla-js/partial-install/README.md
new file mode 100644
index 00000000..7b2bcc0b
--- /dev/null
+++ b/tests/fixtures/vanilla-js/partial-install/README.md
@@ -0,0 +1,31 @@
+# Vanilla JS - Partial Install Fixture
+
+## Edge Case Description
+
+This fixture represents a Vanilla JS project where AuthKit was partially installed - the package is in dependencies but integration was never completed.
+
+## Expected Agent Behavior
+
+- Detect that @workos-inc/authkit-js is already installed
+- Complete the integration by:
+  - Creating AuthKit client in main.js
+  - Setting up login/logout buttons
+  - Creating callback handler
+- Should NOT reinstall the package
+
+## Files of Interest
+
+- `package.json` - Already has @workos-inc/authkit-js dependency
+- `main.js` - Has commented-out import
+
+## Success Criteria
+
+- [ ] AuthKit client is created
+- [ ] Login/logout functionality works
+- [ ] Callback route is handled
+- [ ] Build succeeds
+- [ ] Package is not reinstalled
+
+## Notes
+
+Common scenario when developers start integration but don't finish.
diff --git a/tests/fixtures/vanilla-js/partial-install/about.html b/tests/fixtures/vanilla-js/partial-install/about.html
new file mode 100644
index 00000000..c9874ffa
--- /dev/null
+++ b/tests/fixtures/vanilla-js/partial-install/about.html
@@ -0,0 +1,17 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>About - Vanilla JS App</title>
+    <link rel="stylesheet" href="/styles.css" />
+  </head>
+  <body>
+    <nav><a href="/">Home</a> | <a href="/about.html">About</a> | <a href="/dashboard.html">Dashboard</a></nav>
+    <div id="app">
+      <h1>About</h1>
+      <p>This is an existing Vanilla JS application.</p>
+    </div>
+    <script type="module" src="/main.js"></script>
+  </body>
+</html>
diff --git a/tests/fixtures/vanilla-js/partial-install/dashboard.html b/tests/fixtures/vanilla-js/partial-install/dashboard.html
new file mode 100644
index 00000000..6641a6e4
--- /dev/null
+++ b/tests/fixtures/vanilla-js/partial-install/dashboard.html
@@ -0,0 +1,17 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Dashboard - Vanilla JS App</title>
+    <link rel="stylesheet" href="/styles.css" />
+  </head>
+  <body>
+    <nav><a href="/">Home</a> | <a href="/about.html">About</a> | <a href="/dashboard.html">Dashboard</a></nav>
+    <div id="app">
+      <h1>Dashboard</h1>
+      <p>Protected content would go here.</p>
+    </div>
+    <script type="module" src="/main.js"></script>
+  </body>
+</html>
diff --git a/tests/fixtures/vanilla-js/partial-install/index.html b/tests/fixtures/vanilla-js/partial-install/index.html
new file mode 100644
index 00000000..f737c60e
--- /dev/null
+++ b/tests/fixtures/vanilla-js/partial-install/index.html
@@ -0,0 +1,17 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Vanilla JS App</title>
+    <link rel="stylesheet" href="/styles.css" />
+  </head>
+  <body>
+    <nav><a href="/">Home</a> | <a href="/about.html">About</a> | <a href="/dashboard.html">Dashboard</a></nav>
+    <div id="app">
+      <h1>Home</h1>
+      <p>Welcome to the home page.</p>
+    </div>
+    <script type="module" src="/main.js"></script>
+  </body>
+</html>
diff --git a/tests/fixtures/vanilla-js/partial-install/main.js b/tests/fixtures/vanilla-js/partial-install/main.js
new file mode 100644
index 00000000..14c1b9ba
--- /dev/null
+++ b/tests/fixtures/vanilla-js/partial-install/main.js
@@ -0,0 +1,13 @@
+// TODO: Complete AuthKit setup
+// import { createClient } from '@workos-inc/authkit-js';
+
+console.log('Vanilla JS app loaded');
+
+// Simple utilities
+export function $(selector) {
+  return document.querySelector(selector);
+}
+
+export function $$(selector) {
+  return document.querySelectorAll(selector);
+}
diff --git a/tests/fixtures/vanilla-js/partial-install/package.json b/tests/fixtures/vanilla-js/partial-install/package.json
new file mode 100644
index 00000000..c5957891
--- /dev/null
+++ b/tests/fixtures/vanilla-js/partial-install/package.json
@@ -0,0 +1,16 @@
+{
+  "name": "vanilla-js-partial-install-fixture",
+  "private": true,
+  "type": "module",
+  "scripts": {
+    "dev": "vite",
+    "build": "vite build",
+    "preview": "vite preview"
+  },
+  "dependencies": {
+    "@workos-inc/authkit-js": "^0.3.0"
+  },
+  "devDependencies": {
+    "vite": "^6.0.5"
+  }
+}
diff --git a/tests/fixtures/vanilla-js/partial-install/styles.css b/tests/fixtures/vanilla-js/partial-install/styles.css
new file mode 100644
index 00000000..16c2e835
--- /dev/null
+++ b/tests/fixtures/vanilla-js/partial-install/styles.css
@@ -0,0 +1,24 @@
+body {
+  font-family:
+    system-ui,
+    -apple-system,
+    sans-serif;
+  max-width: 800px;
+  margin: 0 auto;
+  padding: 20px;
+}
+
+nav {
+  margin-bottom: 20px;
+  padding-bottom: 10px;
+  border-bottom: 1px solid #eee;
+}
+
+nav a {
+  color: #333;
+  text-decoration: none;
+}
+
+nav a:hover {
+  text-decoration: underline;
+}
diff --git a/vitest.config.ts b/vitest.config.ts
index 8340b2be..d43a60f3 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -4,7 +4,7 @@ export default defineConfig({
   test: {
     globals: true,
     environment: 'node',
-    include: ['src/**/*.spec.ts'],
+    include: ['src/**/*.spec.ts', 'tests/evals/**/*.spec.ts'],
     coverage: {
       provider: 'v8',
       reporter: ['text', 'json', 'html'],