From 52628b062235e88a63f042cf45d75b19a62056b4 Mon Sep 17 00:00:00 2001
From: Adnan Rashid Hussain <ahussain@chanzuckerberg.com>
Date: Mon, 9 Feb 2026 09:47:14 -0800
Subject: [PATCH 1/4] feat: Implement Batch Evaluator

---
 sdks/typescript/package-lock.json             |  49 ++
 sdks/typescript/package.json                  |   7 +
 sdks/typescript/src/batch/README.md           | 361 +++++++++++++++
 sdks/typescript/src/batch/evaluator.ts        | 323 ++++++++++++++
 sdks/typescript/src/batch/formatters.ts       | 399 +++++++++++++++++
 sdks/typescript/src/batch/index.ts            | 422 ++++++++++++++++++
 sdks/typescript/src/batch/progress.ts         | 167 +++++++
 sdks/typescript/src/batch/types.ts            |  71 +++
 sdks/typescript/src/evaluators/base.ts        |  11 +
 sdks/typescript/tests/fixtures/batch-test.csv |   3 +
 .../integration/batch.integration.test.ts     | 192 ++++++++
 .../tests/unit/batch/csv-parsing.test.ts      |  91 ++++
 .../tests/unit/batch/formatters.test.ts       | 252 +++++++++++
 .../tests/unit/batch/limits.test.ts           | 107 +++++
 sdks/typescript/tsup.config.ts                |   2 +-
 15 files changed, 2456 insertions(+), 1 deletion(-)
 create mode 100644 sdks/typescript/src/batch/README.md
 create mode 100644 sdks/typescript/src/batch/evaluator.ts
 create mode 100644 sdks/typescript/src/batch/formatters.ts
 create mode 100644 sdks/typescript/src/batch/index.ts
 create mode 100644 sdks/typescript/src/batch/progress.ts
 create mode 100644 sdks/typescript/src/batch/types.ts
 create mode 100644 sdks/typescript/tests/fixtures/batch-test.csv
 create mode 100644 sdks/typescript/tests/integration/batch.integration.test.ts
 create mode 100644 sdks/typescript/tests/unit/batch/csv-parsing.test.ts
 create mode 100644 sdks/typescript/tests/unit/batch/formatters.test.ts
 create mode 100644 sdks/typescript/tests/unit/batch/limits.test.ts

diff --git a/sdks/typescript/package-lock.json b/sdks/typescript/package-lock.json
index d8815a0..80f178f 100644
--- a/sdks/typescript/package-lock.json
+++ b/sdks/typescript/package-lock.json
@@ -10,7 +10,10 @@
       "license": "MIT",
       "dependencies": {
         "compromise": "^14.13.0",
+        "csv-parse": "^6.1.0",
+        "csv-stringify": "^6.6.0",
         "p-limit": "^5.0.0",
+        "prompts": "^2.4.2",
         "syllable": "^5.0.1",
         "zod": "^3.22.4"
       },
@@ -19,6 +22,7 @@
         "@ai-sdk/google": "^3.0.7",
         "@ai-sdk/openai": "^3.0.9",
         "@types/node": "^20.11.5",
+        "@types/prompts": "^2.4.9",
         "@typescript-eslint/eslint-plugin": "^6.19.0",
         "@typescript-eslint/parser": "^6.19.0",
         "@vitest/coverage-v8": "^4.0.17",
@@ -1190,6 +1194,16 @@
       "resolved": "https://registry.npmjs.org/@types/pluralize/-/pluralize-0.0.29.tgz",
       "integrity": "sha512-BYOID+l2Aco2nBik+iYS4SZX0Lf20KPILP5RGmM1IgzdwNdTs0eebiFriOPcej1sX9mLnSoiNte5zcFxssgpGA=="
     },
+    "node_modules/@types/prompts": {
+      "version": "2.4.9",
+      "resolved": "https://registry.npmjs.org/@types/prompts/-/prompts-2.4.9.tgz",
+      "integrity": "sha512-qTxFi6Buiu8+50/+3DGIWLHM6QuWsEKugJnnP6iv2Mc4ncxE4A/OJkjuVOA+5X0X1S/nq5VJRa8Lu+nwcvbrKA==",
+      "dev": true,
+      "dependencies": {
+        "@types/node": "*",
+        "kleur": "^3.0.3"
+      }
+    },
     "node_modules/@types/semver": {
       "version": "7.7.1",
       "resolved": "https://registry.npmjs.org/@types/semver/-/semver-7.7.1.tgz",
@@ -1830,6 +1844,16 @@
         "node": ">= 8"
       }
     },
+    "node_modules/csv-parse": {
+      "version": "6.1.0",
+      "resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-6.1.0.tgz",
+      "integrity": "sha512-CEE+jwpgLn+MmtCpVcPtiCZpVtB6Z2OKPTr34pycYYoL7sxdOkXDdQ4lRiw6ioC0q6BLqhc6cKweCVvral8yhw=="
+    },
+    "node_modules/csv-stringify": {
+      "version": "6.6.0",
+      "resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-6.6.0.tgz",
+      "integrity": "sha512-YW32lKOmIBgbxtu3g5SaiqWNwa/9ISQt2EcgOq0+RAIFufFp9is6tqNnKahqE5kuKvrnYAzs28r+s6pXJR8Vcw=="
+    },
     "node_modules/debug": {
       "version": "4.4.3",
       "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
@@ -2593,6 +2617,14 @@
         "json-buffer": "3.0.1"
       }
     },
+    "node_modules/kleur": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/kleur/-/kleur-3.0.3.tgz",
+      "integrity": "sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
     "node_modules/levn": {
       "version": "0.4.1",
       "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
@@ -3064,6 +3096,18 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/prompts": {
+      "version": "2.4.2",
+      "resolved": "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz",
+      "integrity": "sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==",
+      "dependencies": {
+        "kleur": "^3.0.3",
+        "sisteransi": "^1.0.5"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
     "node_modules/punycode": {
       "version": "2.3.1",
       "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
@@ -3247,6 +3291,11 @@
       "integrity": "sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==",
       "dev": true
     },
+    "node_modules/sisteransi": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz",
+      "integrity": "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg=="
+    },
     "node_modules/slash": {
       "version": "3.0.0",
       "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz",
diff --git a/sdks/typescript/package.json b/sdks/typescript/package.json
index d293235..5f1fbc3 100644
--- a/sdks/typescript/package.json
+++ b/sdks/typescript/package.json
@@ -12,6 +12,9 @@
     }
   },
   "sideEffects": false,
+  "bin": {
+    "evaluators-batch": "./dist/batch/index.js"
+  },
   "files": [
     "dist",
     "README.md",
@@ -60,7 +63,10 @@
   },
   "dependencies": {
     "compromise": "^14.13.0",
+    "csv-parse": "^6.1.0",
+    "csv-stringify": "^6.6.0",
     "p-limit": "^5.0.0",
+    "prompts": "^2.4.2",
     "syllable": "^5.0.1",
     "zod": "^3.22.4"
   },
@@ -69,6 +75,7 @@
     "@ai-sdk/google": "^3.0.7",
     "@ai-sdk/openai": "^3.0.9",
     "@types/node": "^20.11.5",
+    "@types/prompts": "^2.4.9",
     "@typescript-eslint/eslint-plugin": "^6.19.0",
     "@typescript-eslint/parser": "^6.19.0",
     "@vitest/coverage-v8": "^4.0.17",
diff --git a/sdks/typescript/src/batch/README.md b/sdks/typescript/src/batch/README.md
new file mode 100644
index 0000000..8b4fc91
--- /dev/null
+++ b/sdks/typescript/src/batch/README.md
@@ -0,0 +1,361 @@
+# Batch CSV Evaluator
+
+Evaluate multiple texts from a CSV file using one or more evaluators, with results output in CSV, JSON, and HTML formats.
+
+## Usage
+
+### Installation
+
+After publishing to npm:
+
+```bash
+# Install globally
+npm install -g @learning-commons/evaluators
+
+# Or run directly with npx
+npx @learning-commons/evaluators-batch
+```
+
+### Interactive Mode
+
+Run the batch evaluator interactively from any directory:
+
+```bash
+# If installed globally
+evaluators-batch
+
+# Or with npx
+npx @learning-commons/evaluators-batch
+```
+
+**Important:** Run this command from the directory containing your CSV file, or provide an absolute path to your CSV.
+
+The CLI will guide you through:
+1. **CSV File Path**: Location of your input CSV file
+2. **Evaluator Selection**: Choose which evaluators to run (multi-select)
+3. **API Keys**: Enter required API keys (only prompted for needed keys)
+4. **Output Directory**: Where to save results (default: timestamped folder in current directory)
+5. **Confirmation**: Review summary before starting
+
+The output directory is automatically created with a human-readable timestamp:
+```
+batch-results-2024-02-07_14-30-22/
+├── results.csv
+├── results.json
+└── results.html
+```
+
+### Input CSV Format
+
+Your CSV must have these columns:
+- `text` (or `TEXT`): The text content to evaluate
+- `grade` (or `GRADE`): The grade level for evaluation
+
+Example `input.csv`:
+```csv
+text,grade
+"The cat sat on the mat.",3
+"Photosynthesis is the process by which plants convert sunlight into energy.",5
+"The mitochondria are the powerhouse of the cell.",8
+```
+
+See `tests/fixtures/sample-batch-input.csv` for a complete example.
+
+### Available Evaluators
+
+- **vocabulary**: Analyzes vocabulary complexity (requires Google + OpenAI keys)
+- **sentence-structure**: Analyzes sentence structure complexity (requires OpenAI key)
+- **grade-level-appropriateness**: Determines appropriate grade level (requires Google key)
+
+### Output Files
+
+Three files are generated:
+
+1. **CSV** (`batch-results-YYYY-MM-DD.csv`):
+   - Spreadsheet-compatible format
+   - Columns: Row, Text, Grade, Evaluator, Status, Score, Reasoning, Error, Processing Time
+
+2. **JSON** (`batch-results-YYYY-MM-DD.json`):
+   - Structured data with full results and summary statistics
+   - Easy to parse programmatically
+
+3. **HTML** (`batch-results-YYYY-MM-DD.html`):
+   - Interactive table with sorting and filtering (AG Grid)
+   - Color-coded status indicators
+   - Summary statistics dashboard
+   - Self-contained (works offline)
+
+### Progress Display
+
+During evaluation, you'll see real-time progress:
+
+```
+Processing evaluations...
+████████████░░░░ 60% (180/300)
+  ✓ vocabulary: 95/100 successful
+  ✓ sentence-structure: 85/100 successful
+  ⏳ grade-level: 0/100 successful
+
+⏱  Elapsed: 2m 15s | Estimated remaining: 1m 30s
+```
+
+### Batch Size Limits
+
+**Hard Limit: 500 tasks maximum**
+- Tasks = Rows × Evaluators
+- Example: 166 rows × 3 evaluators = 498 tasks ✓
+- Example: 167 rows × 3 evaluators = 501 tasks ❌
+
+**Warnings:**
+- Batches > 100 tasks show estimated time and cost
+- Large batches default to "No" in confirmation prompt
+
+If you exceed the limit:
+```
+❌ Batch too large!
+
+  Maximum allowed: 500 tasks
+  Your batch: 600 tasks (200 rows × 3 evaluators)
+
+Suggestions:
+  • Reduce number of rows in CSV
+  • Select fewer evaluators
+  • Split into multiple smaller batches
+```
+
+### Parallelization
+
+The batch evaluator runs tasks in parallel with a concurrency limit of 3:
+- **3 evaluators per row**: All 3 run simultaneously for each row
+- **1 evaluator per row**: 3 different rows processed simultaneously
+- Optimizes throughput while respecting API rate limits
+
+### API Keys
+
+You can provide API keys in three ways:
+1. **Environment variables**: `GOOGLE_API_KEY`, `OPENAI_API_KEY`
+2. **Interactive prompts**: Enter when prompted (keys are masked)
+3. **Pre-filled prompts**: If env vars exist, they're used as defaults
+
+Only required keys are prompted:
+- Select only `sentence-structure`: Only OpenAI key needed
+- Select only `grade-level-appropriateness`: Only Google key needed
+- Select multiple evaluators: All required keys prompted
+
+### Example Session
+
+```bash
+$ npx evaluators-batch
+
+📊 Batch CSV Evaluator
+
+This tool will evaluate multiple texts using one or more evaluators.
+
+? Where is your CSV file? ./input.csv
+✓ Found 10 rows in CSV
+
+? Which evaluators do you want to run?
+  ◉ Vocabulary
+  ◯ Sentence Structure
+  ◉ Grade Level Appropriateness
+
+✓ Selected: vocabulary, grade-level-appropriateness
+
+? Google API Key: ••••••••••••
+? Output directory: ./batch-results-2024-02-07_14-30-22
+
+📝 Summary:
+  Input rows: 10
+  Evaluators: 2
+  Total tasks: 20
+  Output: ./batch-results-2024-02-07_14-30-22
+
+? Start batch evaluation? Yes
+
+# For larger batches (>100 tasks):
+⚠️  Warning: Large batch detected
+
+  API calls: 150
+  Estimated time: ~5 minutes
+  Estimated cost: ~$2.25
+
+? Start batch evaluation? (y/N)
+
+Processing evaluations...
+████████████████████ 100% (20/20)
+  ✓ vocabulary: 10/10 successful
+  ✓ grade-level-appropriateness: 10/10 successful
+
+⏱  Elapsed: 45s | Estimated remaining: 0s
+
+✅ Batch evaluation completed!
+
+Total tasks: 20
+Successful: 20 ✓
+Failed: 0 ✗
+Duration: 45s
+
+📄 Output files generated:
+  ./batch-results-2024-02-07_14-30-22/
+    ├── results.csv
+    ├── results.json
+    └── results.html
+```
+
+### Error Handling
+
+- **Individual failures**: Continue processing, mark as failed in output
+- **Invalid CSV**: Validation before starting
+- **Missing API keys**: Prompt for required keys
+- **Partial results**: Save all results even if some fail
+
+### Graceful Shutdown
+
+Press `Ctrl+C` during evaluation to gracefully shutdown:
+
+1. **In-flight tasks complete**: Running evaluations finish processing
+2. **New tasks cancelled**: Pending tasks are skipped
+3. **Partial results saved**: All completed results are saved to `results-partial.*` files
+4. **Progress preserved**: No loss of work done so far
+
+Example:
+```bash
+# Press Ctrl+C during a long batch evaluation
+
+⚠️  Shutdown requested. Saving partial results...
+   (Press Ctrl+C again to force quit)
+
+✓ Saved 15 results to:
+  ./batch-results-2024-02-07_14-30-22/
+    ├── results-partial.csv
+    ├── results-partial.json
+    └── results-partial.html
+```
+
+Press `Ctrl+C` twice to force quit immediately (not recommended - may lose in-flight results).
+
+## Programmatic API
+
+You can also use the batch evaluator programmatically:
+
+```typescript
+import { BatchEvaluator } from '@learning-commons/evaluators/batch';
+
+const evaluator = new BatchEvaluator({
+  googleApiKey: process.env.GOOGLE_API_KEY,
+  openaiApiKey: process.env.OPENAI_API_KEY,
+  concurrency: 3,
+  maxRetries: 2,
+  telemetry: false,
+});
+
+const inputs = [
+  { text: 'Sample text 1', grade: '3', rowIndex: 1 },
+  { text: 'Sample text 2', grade: '5', rowIndex: 2 },
+];
+
+const output = await evaluator.evaluate(
+  inputs,
+  ['vocabulary', 'sentence-structure'],
+  (result) => {
+    console.log(`Completed: ${result.evaluatorId} for row ${result.rowIndex}`);
+  }
+);
+
+console.log(output.summary);
+
+// Programmatic cancellation example
+const evaluator2 = new BatchEvaluator({ openaiApiKey: 'key' });
+
+const evaluationPromise = evaluator2.evaluate(inputs, ['vocabulary']);
+
+// Cancel after 5 seconds
+setTimeout(() => {
+  const partialResults = evaluator2.cancel();
+  console.log(`Cancelled with ${partialResults.length} completed results`);
+}, 5000);
+
+await evaluationPromise;
+```
+
+## Cross-Platform Compatibility
+
+The batch evaluator works on:
+- **macOS** ✓
+- **Windows** ✓
+- **Linux** ✓
+
+All file paths are handled with Node.js `path` module for cross-platform compatibility.
+
+---
+
+## Development & Testing
+
+### Running Locally (Before Publishing)
+
+When developing or testing the batch evaluator locally:
+
+```bash
+# From the SDK root directory (sdks/typescript/)
+cd sdks/typescript
+
+# Build the project
+npm run build
+
+# Run the batch CLI directly
+node dist/batch/index.js
+
+# Or test with a sample CSV
+node dist/batch/index.js
+# When prompted, enter: test-input.csv
+```
+
+**Important:** Always run from the SDK root directory (`sdks/typescript/`) so the CLI can find the prompt files at `dist/prompts/`.
+
+### Creating Test CSVs
+
+Create a test CSV in the SDK root:
+
+```bash
+# From sdks/typescript/
+cat > test-input.csv << 'EOF'
+text,grade
+"The cat sat on the mat.",3
+"Photosynthesis converts light energy into chemical energy.",5
+EOF
+
+# Then run the CLI
+node dist/batch/index.js
+# Enter: test-input.csv
+```
+
+### Testing the Package
+
+Test the package locally before publishing:
+
+```bash
+# Build the package
+npm run build
+
+# Create a tarball
+npm pack
+# Creates: learning-commons-evaluators-0.1.0.tgz
+
+# Test installation in another directory
+cd /tmp
+npm install /path/to/learning-commons-evaluators-0.1.0.tgz
+
+# Test the CLI
+npx @learning-commons/evaluators-batch
+```
+
+### After Publishing to npm
+
+Once published, users can run from any directory:
+
+```bash
+# Run from anywhere
+cd ~/Documents/my-data
+npx @learning-commons/evaluators-batch
+# Works! Prompts are bundled with the package
+```
diff --git a/sdks/typescript/src/batch/evaluator.ts b/sdks/typescript/src/batch/evaluator.ts
new file mode 100644
index 0000000..09942cd
--- /dev/null
+++ b/sdks/typescript/src/batch/evaluator.ts
@@ -0,0 +1,323 @@
+import pLimit from 'p-limit';
+import {
+  VocabularyEvaluator,
+  SentenceStructureEvaluator,
+  GradeLevelAppropriatenessEvaluator,
+} from '../evaluators/index.js';
+import type { BaseEvaluator } from '../evaluators/base.js';
+import type {
+  BatchInput,
+  BatchTask,
+  BatchResult,
+  BatchOutput,
+  BatchConfig,
+  BatchSummary,
+} from './types.js';
+
+/**
+ * Available evaluators for batch processing
+ */
+const EVALUATORS = [
+  VocabularyEvaluator,
+  SentenceStructureEvaluator,
+  GradeLevelAppropriatenessEvaluator,
+] as const;
+
+/**
+ * Map of evaluator IDs to their constructors
+ */
+const EVALUATOR_MAP = new Map(EVALUATORS.map((E) => [E.metadata.id, E]));
+
+/**
+ * Get all available evaluator IDs
+ */
+export function getAvailableEvaluators(): Array<{
+  id: string;
+  name: string;
+  requiresGoogleKey: boolean;
+  requiresOpenAIKey: boolean;
+}> {
+  return EVALUATORS.map((E) => ({
+    id: E.metadata.id,
+    name: E.metadata.name,
+    requiresGoogleKey: E.metadata.requiresGoogleKey,
+    requiresOpenAIKey: E.metadata.requiresOpenAIKey,
+  }));
+}
+
+/**
+ * Validate that selected evaluators exist
+ */
+export function validateEvaluators(evaluatorIds: string[]): void {
+  const invalid = evaluatorIds.filter((id) => !EVALUATOR_MAP.has(id));
+  if (invalid.length > 0) {
+    throw new Error(
+      `Invalid evaluator IDs: ${invalid.join(', ')}. Available: ${Array.from(EVALUATOR_MAP.keys()).join(', ')}`
+    );
+  }
+}
+
+/**
+ * Determine required API keys for selected evaluators
+ */
+export function getRequiredApiKeys(evaluatorIds: string[]): {
+  requiresGoogle: boolean;
+  requiresOpenAI: boolean;
+} {
+  const requiresGoogle = evaluatorIds.some(
+    (id) => EVALUATOR_MAP.get(id)!.metadata.requiresGoogleKey
+  );
+  const requiresOpenAI = evaluatorIds.some(
+    (id) => EVALUATOR_MAP.get(id)!.metadata.requiresOpenAIKey
+  );
+
+  return { requiresGoogle, requiresOpenAI };
+}
+
+/**
+ * Batch evaluator class
+ *
+ * Processes multiple texts with multiple evaluators in parallel
+ */
+export class BatchEvaluator {
+  private config: BatchConfig;
+  private limit: ReturnType<typeof pLimit>;
+  private evaluatorInstances = new Map<string, BaseEvaluator>();
+  private isCancelled = false;
+  private completedResults: BatchResult[] = [];
+
+  constructor(config: BatchConfig) {
+    this.config = {
+      concurrency: 3,
+      maxRetries: 2,
+      telemetry: false,
+      ...config,
+    };
+
+    this.limit = pLimit(this.config.concurrency!);
+  }
+
+  /**
+   * Cancel ongoing evaluation
+   * Returns partial results collected so far
+   */
+  cancel(): BatchResult[] {
+    this.isCancelled = true;
+    return [...this.completedResults];
+  }
+
+  /**
+   * Initialize evaluator instances
+   */
+  private initializeEvaluators(evaluatorIds: string[]): void {
+    for (const id of evaluatorIds) {
+      if (this.evaluatorInstances.has(id)) continue;
+
+      const EvaluatorClass = EVALUATOR_MAP.get(id);
+      if (!EvaluatorClass) {
+        throw new Error(`Unknown evaluator: ${id}`);
+      }
+
+      const evaluator = new EvaluatorClass({
+        googleApiKey: this.config.googleApiKey,
+        openaiApiKey: this.config.openaiApiKey,
+        maxRetries: this.config.maxRetries,
+        telemetry: this.config.telemetry,
+      });
+
+      this.evaluatorInstances.set(id, evaluator);
+    }
+  }
+
+  /**
+   * Create tasks from inputs and evaluator IDs
+   */
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  private createTasks(inputs: BatchInput[], evaluatorIds: string[]): Array<BatchTask & { originalRow: Record<string, any> }> {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const tasks: Array<BatchTask & { originalRow: Record<string, any> }> = [];
+
+    for (const input of inputs) {
+      for (const evaluatorId of evaluatorIds) {
+        tasks.push({
+          text: input.text,
+          grade: input.grade,
+          evaluatorId,
+          rowIndex: input.rowIndex,
+          originalRow: input.originalRow,
+        });
+      }
+    }
+
+    return tasks;
+  }
+
+  /**
+   * Execute a single evaluation task
+   */
+  private async executeTask(
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    task: BatchTask & { originalRow: Record<string, any> },
+    onProgress?: (result: BatchResult) => void
+  ): Promise<BatchResult> {
+    // Check if cancelled before starting
+    if (this.isCancelled) {
+      const batchResult: BatchResult = {
+        rowIndex: task.rowIndex,
+        text: task.text,
+        grade: task.grade,
+        evaluatorId: task.evaluatorId,
+        status: 'error',
+        error: 'Cancelled by user',
+        processingTimeMs: 0,
+        originalRow: task.originalRow,
+      };
+      return batchResult;
+    }
+
+    const startTime = Date.now();
+    const evaluator = this.evaluatorInstances.get(task.evaluatorId);
+
+    if (!evaluator) {
+      throw new Error(`Evaluator not initialized: ${task.evaluatorId}`);
+    }
+
+    try {
+      const result = await evaluator.evaluate(task.text, task.grade);
+
+      // Handle different score types
+      let scoreString: string;
+      if (typeof result.score === 'string') {
+        scoreString = result.score;
+      } else if (typeof result.score === 'object' && result.score !== null) {
+        // For grade-level-appropriateness, extract the grade field
+        if ('grade' in result.score) {
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          scoreString = (result.score as any).grade;
+        } else {
+          // Fallback: JSON stringify the object
+          scoreString = JSON.stringify(result.score);
+        }
+      } else {
+        scoreString = String(result.score);
+      }
+
+      const batchResult: BatchResult = {
+        rowIndex: task.rowIndex,
+        text: task.text,
+        grade: task.grade,
+        evaluatorId: task.evaluatorId,
+        status: 'success',
+        score: scoreString,
+        reasoning: result.reasoning,
+        processingTimeMs: Date.now() - startTime,
+        originalRow: task.originalRow,
+      };
+
+      // Store completed result
+      this.completedResults.push(batchResult);
+
+      // Report progress
+      if (onProgress) {
+        onProgress(batchResult);
+      }
+
+      return batchResult;
+    } catch (error) {
+      const batchResult: BatchResult = {
+        rowIndex: task.rowIndex,
+        text: task.text,
+        grade: task.grade,
+        evaluatorId: task.evaluatorId,
+        status: 'error',
+        error: error instanceof Error ? error.message : String(error),
+        processingTimeMs: Date.now() - startTime,
+        originalRow: task.originalRow,
+      };
+
+      // Store completed result (even errors)
+      this.completedResults.push(batchResult);
+
+      // Report progress
+      if (onProgress) {
+        onProgress(batchResult);
+      }
+
+      return batchResult;
+    }
+  }
+
+  /**
+   * Calculate summary statistics
+   */
+  private calculateSummary(results: BatchResult[], durationMs: number): BatchSummary {
+    const summary: BatchSummary = {
+      totalTasks: results.length,
+      successful: results.filter((r) => r.status === 'success').length,
+      failed: results.filter((r) => r.status === 'error').length,
+      durationMs,
+      resultsPerEvaluator: {},
+    };
+
+    // Calculate per-evaluator stats
+    const evaluatorIds = Array.from(new Set(results.map((r) => r.evaluatorId)));
+    for (const id of evaluatorIds) {
+      const evalResults = results.filter((r) => r.evaluatorId === id);
+      summary.resultsPerEvaluator[id] = {
+        successful: evalResults.filter((r) => r.status === 'success').length,
+        failed: evalResults.filter((r) => r.status === 'error').length,
+      };
+    }
+
+    return summary;
+  }
+
+  /**
+   * Run batch evaluation
+   *
+   * @param inputs - Array of input rows
+   * @param evaluatorIds - Array of evaluator IDs to run
+   * @param onProgress - Optional callback for progress updates
+   * @returns Batch evaluation results and summary
+   */
+  async evaluate(
+    inputs: BatchInput[],
+    evaluatorIds: string[],
+    onProgress?: (result: BatchResult) => void
+  ): Promise<BatchOutput> {
+    const startTime = Date.now();
+
+    // Reset state
+    this.isCancelled = false;
+    this.completedResults = [];
+
+    // Validate evaluators
+    validateEvaluators(evaluatorIds);
+
+    // Initialize evaluator instances
+    this.initializeEvaluators(evaluatorIds);
+
+    // Create all tasks (flattened)
+    const tasks = this.createTasks(inputs, evaluatorIds);
+
+    // Execute all tasks with concurrency control
+    // Use allSettled to get partial results even if cancelled
+    const settledResults = await Promise.allSettled(
+      tasks.map((task) => this.limit(() => this.executeTask(task, onProgress)))
+    );
+
+    // Extract fulfilled results (skip rejected)
+    const results = settledResults
+      .filter((r): r is PromiseFulfilledResult<BatchResult> => r.status === 'fulfilled')
+      .map((r) => r.value);
+
+    // Calculate summary
+    const durationMs = Date.now() - startTime;
+    const summary = this.calculateSummary(results, durationMs);
+
+    return {
+      results,
+      summary,
+    };
+  }
+}
diff --git a/sdks/typescript/src/batch/formatters.ts b/sdks/typescript/src/batch/formatters.ts
new file mode 100644
index 0000000..f80a63e
--- /dev/null
+++ b/sdks/typescript/src/batch/formatters.ts
@@ -0,0 +1,399 @@
+import type { BatchOutput, BatchResult } from './types.js';
+
+/**
+ * Group results by row index
+ */
+function groupResultsByRow(results: BatchResult[]): Map<number, BatchResult[]> {
+  const grouped = new Map<number, BatchResult[]>();
+
+  for (const result of results) {
+    if (!grouped.has(result.rowIndex)) {
+      grouped.set(result.rowIndex, []);
+    }
+    grouped.get(result.rowIndex)!.push(result);
+  }
+
+  return grouped;
+}
+
+/**
+ * Format evaluator ID as column prefix (kebab-case to snake_case)
+ */
+function formatEvaluatorPrefix(evaluatorId: string): string {
+  return evaluatorId.replace(/-/g, '_');
+}
+
+/**
+ * Format results as CSV with columns per evaluator
+ */
+export function formatAsCSV(output: BatchOutput): string {
+  if (output.results.length === 0) {
+    return '';
+  }
+
+  // Group results by row
+  const groupedByRow = groupResultsByRow(output.results);
+
+  // Get unique evaluator IDs (sorted for consistent column order)
+  const evaluatorIds = Array.from(
+    new Set(output.results.map(r => r.evaluatorId))
+  ).sort();
+
+  // Get original column names from first result
+  const firstResult = output.results[0];
+  const originalColumns = Object.keys(firstResult.originalRow);
+
+  // Build headers: original columns + evaluator columns (score, reasoning, status)
+  const evaluatorColumns: string[] = [];
+  for (const evalId of evaluatorIds) {
+    const prefix = formatEvaluatorPrefix(evalId);
+    evaluatorColumns.push(`${prefix}_score`);
+    evaluatorColumns.push(`${prefix}_reasoning`);
+    evaluatorColumns.push(`${prefix}_status`);
+  }
+  const headers = [...originalColumns, ...evaluatorColumns];
+
+  // Build rows (one per input row)
+  const rows: string[][] = [];
+  const sortedRowIndices = Array.from(groupedByRow.keys()).sort((a, b) => a - b);
+
+  for (const rowIndex of sortedRowIndices) {
+    const resultsForRow = groupedByRow.get(rowIndex)!;
+    const firstResultForRow = resultsForRow[0];
+
+    // Original column values
+    const originalValues = originalColumns.map(col =>
+      escapeCSV(String(firstResultForRow.originalRow[col] || ''))
+    );
+
+    // Evaluator column values
+    const evaluatorValues: string[] = [];
+    for (const evalId of evaluatorIds) {
+      const result = resultsForRow.find(r => r.evaluatorId === evalId);
+
+      if (result) {
+        // Score
+        if (result.status === 'success') {
+          evaluatorValues.push(escapeCSV(result.score || ''));
+        } else {
+          evaluatorValues.push(''); // Empty for errors
+        }
+
+        // Reasoning
+        if (result.status === 'success') {
+          evaluatorValues.push(escapeCSV(result.reasoning || ''));
+        } else {
+          evaluatorValues.push(escapeCSV(result.error || ''));
+        }
+
+        // Status
+        evaluatorValues.push(result.status);
+      } else {
+        // Evaluator not run for this row
+        evaluatorValues.push('', '', 'not_run');
+      }
+    }
+
+    rows.push([...originalValues, ...evaluatorValues]);
+  }
+
+  return [headers, ...rows].map(row => row.join(',')).join('\n');
+}
+
+/**
+ * Escape CSV field (handle quotes and commas)
+ */
+function escapeCSV(field: string): string {
+  if (field.includes(',') || field.includes('"') || field.includes('\n')) {
+    return `"${field.replace(/"/g, '""')}"`;
+  }
+  return field;
+}
+
+/**
+ * Format results as JSON
+ */
+export function formatAsJSON(output: BatchOutput): string {
+  return JSON.stringify(output, null, 2);
+}
+
+/**
+ * Format results as HTML with AG Grid
+ */
+export function formatAsHTML(output: BatchOutput): string {
+  if (output.results.length === 0) {
+    return '<html><body><p>No results to display</p></body></html>';
+  }
+
+  // Group results by row
+  const groupedByRow = groupResultsByRow(output.results);
+
+  // Get unique evaluator IDs (sorted)
+  const evaluatorIds = Array.from(
+    new Set(output.results.map(r => r.evaluatorId))
+  ).sort();
+
+  // Convert grouped results to grid data (one row per input row)
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  const gridData: any[] = [];
+  const sortedRowIndices = Array.from(groupedByRow.keys()).sort((a, b) => a - b);
+
+  for (const rowIndex of sortedRowIndices) {
+    const resultsForRow = groupedByRow.get(rowIndex)!;
+    const firstResult = resultsForRow[0];
+
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const rowData: any = {
+      row: rowIndex,
+      text: firstResult.text.substring(0, 100) + (firstResult.text.length > 100 ? '...' : ''),
+      textFull: firstResult.text,
+      grade: firstResult.grade,
+    };
+
+    // Add evaluator-specific fields
+    for (const evalId of evaluatorIds) {
+      const result = resultsForRow.find(r => r.evaluatorId === evalId);
+      const prefix = formatEvaluatorPrefix(evalId);
+
+      if (result) {
+        rowData[`${prefix}_status`] = result.status;
+        rowData[`${prefix}_score`] = result.status === 'success' ? (result.score || '') : '';
+        rowData[`${prefix}_reasoning`] = result.status === 'success' ? (result.reasoning || '') : (result.error || '');
+      } else {
+        rowData[`${prefix}_status`] = 'not_run';
+        rowData[`${prefix}_score`] = '';
+        rowData[`${prefix}_reasoning`] = '';
+      }
+    }
+
+    gridData.push(rowData);
+  }
+
+  return `<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Batch Evaluation Results</title>
+  <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📊</text></svg>">
+
+  <!-- AG Grid -->
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/ag-grid-community@31.0.1/styles/ag-grid.css">
+  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/ag-grid-community@31.0.1/styles/ag-theme-quartz.css">
+  <script src="https://cdn.jsdelivr.net/npm/ag-grid-community@31.0.1/dist/ag-grid-community.min.js"></script>
+
+  <style>
+    * {
+      margin: 0;
+      padding: 0;
+      box-sizing: border-box;
+    }
+
+    body {
+      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
+      padding: 20px;
+      background: #f5f5f5;
+    }
+
+    .container {
+      max-width: 1400px;
+      margin: 0 auto;
+      background: white;
+      border-radius: 8px;
+      box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+      overflow: hidden;
+    }
+
+    .header {
+      background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+      color: white;
+      padding: 30px;
+    }
+
+    .header h1 {
+      font-size: 28px;
+      margin-bottom: 10px;
+    }
+
+    .header p {
+      opacity: 0.9;
+      font-size: 14px;
+    }
+
+    .summary {
+      display: grid;
+      grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+      gap: 20px;
+      padding: 30px;
+      background: #f8f9fa;
+      border-bottom: 1px solid #e0e0e0;
+    }
+
+    .stat {
+      text-align: center;
+    }
+
+    .stat-value {
+      font-size: 32px;
+      font-weight: bold;
+      color: #333;
+    }
+
+    .stat-label {
+      font-size: 12px;
+      text-transform: uppercase;
+      color: #666;
+      margin-top: 5px;
+      letter-spacing: 0.5px;
+    }
+
+    .success { color: #10b981; }
+    .error { color: #ef4444; }
+
+    #grid {
+      height: 600px;
+      margin: 20px;
+    }
+
+    .status-success {
+      background: #d1fae5;
+      color: #065f46;
+      padding: 4px 8px;
+      border-radius: 4px;
+      font-size: 12px;
+      font-weight: 600;
+    }
+
+    .status-error {
+      background: #fee2e2;
+      color: #991b1b;
+      padding: 4px 8px;
+      border-radius: 4px;
+      font-size: 12px;
+      font-weight: 600;
+    }
+
+    .footer {
+      padding: 20px 30px;
+      text-align: center;
+      color: #666;
+      font-size: 12px;
+      border-top: 1px solid #e0e0e0;
+    }
+  </style>
+</head>
+<body>
+  <div class="container">
+    <div class="header">
+      <h1>📊 Batch Evaluation Results</h1>
+      <p>Generated on ${new Date().toLocaleString()}</p>
+    </div>
+
+    <div class="summary">
+      <div class="stat">
+        <div class="stat-value">${output.summary.totalTasks}</div>
+        <div class="stat-label">Total Tasks</div>
+      </div>
+      <div class="stat">
+        <div class="stat-value success">${output.summary.successful}</div>
+        <div class="stat-label">Successful</div>
+      </div>
+      <div class="stat">
+        <div class="stat-value error">${output.summary.failed}</div>
+        <div class="stat-label">Failed</div>
+      </div>
+      <div class="stat">
+        <div class="stat-value">${Math.round(output.summary.durationMs / 1000)}s</div>
+        <div class="stat-label">Duration</div>
+      </div>
+    </div>
+
+    <div id="grid" class="ag-theme-quartz"></div>
+
+    <div class="footer">
+      Generated by @learning-commons/evaluators
+    </div>
+  </div>
+
+  <script>
+    // Grid data
+    const rowData = ${JSON.stringify(gridData)};
+
+    // Column definitions
+    const columnDefs = [
+      { field: 'row', headerName: 'Row', width: 80, filter: 'agNumberColumnFilter', sortable: true, pinned: 'left' },
+      {
+        field: 'text',
+        headerName: 'Text',
+        width: 300,
+        tooltipField: 'textFull',
+        sortable: true,
+        filter: 'agTextColumnFilter',
+        pinned: 'left'
+      },
+      { field: 'grade', headerName: 'Grade', width: 100, sortable: true, filter: true, pinned: 'left' },
+      ${evaluatorIds.map(evalId => {
+        const prefix = formatEvaluatorPrefix(evalId);
+        const displayName = evalId.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
+
+        return `// ${displayName} columns
+      {
+        headerName: '${displayName}',
+        children: [
+          {
+            field: '${prefix}_status',
+            headerName: 'Status',
+            width: 100,
+            sortable: true,
+            filter: true,
+            cellRenderer: params => {
+              if (!params.value || params.value === 'not_run') return '<span style="color: #999;">—</span>';
+              const className = params.value === 'success' ? 'status-success' : 'status-error';
+              return \`<span class="\${className}">\${params.value.toUpperCase()}</span>\`;
+            }
+          },
+          {
+            field: '${prefix}_score',
+            headerName: 'Score',
+            width: 150,
+            sortable: true,
+            filter: true
+          },
+          {
+            field: '${prefix}_reasoning',
+            headerName: 'Reasoning',
+            width: 350,
+            wrapText: true,
+            autoHeight: false,
+            tooltipField: '${prefix}_reasoning',
+            sortable: true,
+            filter: 'agTextColumnFilter'
+          }
+        ]
+      }`;
+      }).join(',\n      ')}
+    ];
+
+    // Grid options
+    const gridOptions = {
+      columnDefs: columnDefs,
+      rowData: rowData,
+      defaultColDef: {
+        resizable: true,
+        sortable: true,
+        filter: true,
+      },
+      pagination: true,
+      paginationPageSize: 50,
+      paginationPageSizeSelector: [25, 50, 100, 200],
+      domLayout: 'normal',
+      tooltipShowDelay: 500,
+    };
+
+    // Initialize grid
+    const gridDiv = document.querySelector('#grid');
+    agGrid.createGrid(gridDiv, gridOptions);
+  </script>
+</body>
+</html>`;
+}
diff --git a/sdks/typescript/src/batch/index.ts b/sdks/typescript/src/batch/index.ts
new file mode 100644
index 0000000..69948a3
--- /dev/null
+++ b/sdks/typescript/src/batch/index.ts
@@ -0,0 +1,422 @@
+#!/usr/bin/env node
+
+import * as fs from 'fs';
+import * as path from 'path';
+import prompts from 'prompts';
+import { parse } from 'csv-parse/sync';
+import {
+  BatchEvaluator,
+  getAvailableEvaluators,
+  getRequiredApiKeys,
+} from './evaluator.js';
+import { formatAsCSV, formatAsJSON, formatAsHTML } from './formatters.js';
+import { ProgressTracker } from './progress.js';
+import type { BatchInput } from './types.js';
+
+/**
+ * Find a column in a CSV row, case-insensitive with whitespace trimming
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function findColumn(row: any, columnName: string): string | undefined {
+  const normalizedTarget = columnName.toLowerCase().trim();
+
+  for (const key of Object.keys(row)) {
+    if (key.toLowerCase().trim() === normalizedTarget) {
+      return key;
+    }
+  }
+
+  return undefined;
+}
+
+/**
+ * Validate CSV file has required columns
+ */
+function validateCSV(csvPath: string): void {
+  if (!fs.existsSync(csvPath)) {
+    throw new Error(`CSV file not found: ${csvPath}`);
+  }
+
+  const content = fs.readFileSync(csvPath, 'utf-8');
+  const records = parse(content, {
+    columns: true,
+    skip_empty_lines: true,
+    trim: true,
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  }) as Record<string, any>[];
+
+  if (records.length === 0) {
+    throw new Error('CSV file is empty');
+  }
+
+  const firstRow = records[0];
+  const textColumn = findColumn(firstRow, 'text');
+  const gradeColumn = findColumn(firstRow, 'grade');
+
+  if (!textColumn) {
+    throw new Error('CSV must have a "text" column (case-insensitive)');
+  }
+  if (!gradeColumn) {
+    throw new Error('CSV must have a "grade" column (case-insensitive)');
+  }
+}
+
+/**
+ * Read and parse CSV file
+ */
+function readCSV(csvPath: string): BatchInput[] {
+  const content = fs.readFileSync(csvPath, 'utf-8');
+  const records = parse(content, {
+    columns: true,
+    skip_empty_lines: true,
+    trim: true,
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  }) as Record<string, any>[];
+
+  // Find column names (case-insensitive, whitespace-trimmed)
+  const firstRow = records[0];
+  const textColumn = findColumn(firstRow, 'text');
+  const gradeColumn = findColumn(firstRow, 'grade');
+
+  if (!textColumn || !gradeColumn) {
+    throw new Error('CSV missing required columns');
+  }
+
+  // Filter out empty rows and map to BatchInput
+  const inputs: BatchInput[] = [];
+  let rowIndex = 1;
+
+  for (const row of records) {
+    const text = row[textColumn];
+    const grade = row[gradeColumn];
+
+    // Skip rows with missing text or grade (empty rows)
+    if (!text || !grade) {
+      continue;
+    }
+
+    inputs.push({
+      text: String(text).trim(),
+      grade: String(grade).trim(),
+      rowIndex: rowIndex++,
+      originalRow: row, // Preserve all original columns
+    });
+  }
+
+  return inputs;
+}
+
+/**
+ * Main CLI function
+ */
+async function main() {
+  console.log('\n📊 Batch CSV Evaluator\n');
+  console.log('This tool will evaluate multiple texts using one or more evaluators.\n');
+
+  try {
+    // Step 1: Get CSV file path
+    const { csvPath } = await prompts({
+      type: 'text',
+      name: 'csvPath',
+      message: 'Where is your CSV file?',
+      initial: './input.csv',
+      validate: (value) => {
+        try {
+          validateCSV(value);
+          return true;
+        } catch (error) {
+          return error instanceof Error ? error.message : 'Invalid CSV file';
+        }
+      },
+    });
+
+    if (!csvPath) {
+      console.log('Cancelled.');
+      process.exit(0);
+    }
+
+    // Read CSV to show info
+    const inputs = readCSV(csvPath);
+    console.log(`\n✓ Found ${inputs.length} rows in CSV\n`);
+
+    // Step 2: Select evaluators
+    const availableEvaluators = getAvailableEvaluators();
+    const { evaluatorIds } = await prompts({
+      type: 'multiselect',
+      name: 'evaluatorIds',
+      message: 'Which evaluators do you want to run?',
+      choices: availableEvaluators.map((e) => ({
+        title: e.name,
+        value: e.id,
+        selected: false,
+      })),
+      min: 1,
+      hint: 'Use space to select, enter to confirm',
+    });
+
+    if (!evaluatorIds || evaluatorIds.length === 0) {
+      console.log('No evaluators selected. Cancelled.');
+      process.exit(0);
+    }
+
+    console.log(`\n✓ Selected: ${evaluatorIds.join(', ')}\n`);
+
+    // Step 3: Get API keys (only required ones)
+    const { requiresGoogle, requiresOpenAI } = getRequiredApiKeys(evaluatorIds);
+
+    let googleApiKey: string | undefined;
+    let openaiApiKey: string | undefined;
+
+    if (requiresGoogle) {
+      const result = await prompts({
+        type: 'password',
+        name: 'key',
+        message: 'Google API Key:',
+        initial: process.env.GOOGLE_API_KEY || '',
+        validate: (value) => (value ? true : 'Google API key is required'),
+      });
+
+      if (!result.key) {
+        console.log('Cancelled.');
+        process.exit(0);
+      }
+
+      googleApiKey = result.key;
+    }
+
+    if (requiresOpenAI) {
+      const result = await prompts({
+        type: 'password',
+        name: 'key',
+        message: 'OpenAI API Key:',
+        initial: process.env.OPENAI_API_KEY || '',
+        validate: (value) => (value ? true : 'OpenAI API key is required'),
+      });
+
+      if (!result.key) {
+        console.log('Cancelled.');
+        process.exit(0);
+      }
+
+      openaiApiKey = result.key;
+    }
+
+    // Step 4: Get output directory (with human-readable timestamp in local time)
+    const now = new Date();
+    const timestamp = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, '0')}-${String(now.getDate()).padStart(2, '0')}_${String(now.getHours()).padStart(2, '0')}-${String(now.getMinutes()).padStart(2, '0')}-${String(now.getSeconds()).padStart(2, '0')}`;
+    const defaultOutputDir = path.join(process.cwd(), `batch-results-${timestamp}`);
+
+    const { outputDir } = await prompts({
+      type: 'text',
+      name: 'outputDir',
+      message: 'Output directory:',
+      initial: defaultOutputDir,
+      validate: (value) => {
+        // Check if parent directory exists
+        const parentDir = path.dirname(value);
+        if (!fs.existsSync(parentDir)) {
+          return `Parent directory does not exist: ${parentDir}`;
+        }
+
+        // Check write permissions by attempting to create output directory
+        try {
+          if (!fs.existsSync(value)) {
+            fs.mkdirSync(value, { recursive: true });
+          }
+
+          // Test write permission with a temporary file
+          const testFile = path.join(value, '.write-test');
+          fs.writeFileSync(testFile, '');
+          fs.unlinkSync(testFile);
+
+          return true;
+        } catch (error) {
+          if (error instanceof Error) {
+            if (error.message.includes('EACCES')) {
+              return `No write permission for directory: ${value}`;
+            }
+            if (error.message.includes('EROFS')) {
+              return `Directory is read-only: ${value}`;
+            }
+            return `Cannot write to directory: ${error.message}`;
+          }
+          return `Cannot write to directory`;
+        }
+      },
+    });
+
+    if (!outputDir) {
+      console.log('Cancelled.');
+      process.exit(0);
+    }
+
+    // Ensure output directory exists
+    if (!fs.existsSync(outputDir)) {
+      fs.mkdirSync(outputDir, { recursive: true });
+    }
+
+    // Step 5: Confirm and run
+    const totalTasks = inputs.length * evaluatorIds.length;
+    const MAX_TASKS = 500;
+
+    console.log(`\n📝 Summary:`);
+    console.log(`  Input rows: ${inputs.length}`);
+    console.log(`  Evaluators: ${evaluatorIds.length}`);
+    console.log(`  Total tasks: ${totalTasks}`);
+    console.log(`  Output: ${outputDir}\n`);
+
+    // Hard limit check
+    if (totalTasks > MAX_TASKS) {
+      console.log(`❌ Batch too large!\n`);
+      console.log(`  Maximum allowed: ${MAX_TASKS} tasks`);
+      console.log(`  Your batch: ${totalTasks} tasks (${inputs.length} rows × ${evaluatorIds.length} evaluators)\n`);
+      console.log(`Suggestions:`);
+      console.log(`  • Reduce number of rows in CSV`);
+      console.log(`  • Select fewer evaluators`);
+      console.log(`  • Split into multiple smaller batches\n`);
+      process.exit(1);
+    }
+
+    // Warning for large batches (100-500 tasks)
+    if (totalTasks > 100) {
+      // Estimate time: ~2 seconds per task with concurrency=3
+      const estimatedMinutes = Math.ceil((totalTasks * 2) / 60);
+      // Estimate cost: ~$0.01-0.02 per task (rough average)
+      const estimatedCost = (totalTasks * 0.015).toFixed(2);
+
+      console.log(`⚠️  Warning: Large batch detected\n`);
+      console.log(`  API calls: ${totalTasks}`);
+      console.log(`  Estimated time: ~${estimatedMinutes} minute${estimatedMinutes > 1 ? 's' : ''}`);
+      console.log(`  Estimated cost: ~$${estimatedCost}\n`);
+    }
+
+    const { confirm } = await prompts({
+      type: 'confirm',
+      name: 'confirm',
+      message: 'Start batch evaluation?',
+      initial: totalTasks <= 100, // Default to No for large batches
+    });
+
+    if (!confirm) {
+      console.log('Cancelled.');
+      process.exit(0);
+    }
+
+    // Step 6: Run batch evaluation
+    console.log('\n' + '='.repeat(60));
+    const tracker = new ProgressTracker(totalTasks);
+    const evaluationStartTime = Date.now();
+
+    const evaluator = new BatchEvaluator({
+      googleApiKey,
+      openaiApiKey,
+      concurrency: 3,
+      maxRetries: 2,
+      telemetry: false,
+    });
+
+    // Handle Ctrl+C gracefully
+    let isShuttingDown = false;
+    const handleShutdown = () => {
+      if (isShuttingDown) {
+        console.log('\n\n⚠️  Force quit detected. Exiting immediately...');
+        process.exit(1);
+      }
+
+      isShuttingDown = true;
+      console.log('\n\n⚠️  Shutdown requested. Saving partial results...');
+      console.log('   (Press Ctrl+C again to force quit)\n');
+
+      // Get partial results
+      const partialResults = evaluator.cancel();
+
+      if (partialResults.length > 0) {
+        // Calculate summary for partial results
+        const durationMs = Date.now() - evaluationStartTime;
+        const partialOutput = {
+          results: partialResults,
+          summary: {
+            totalTasks: totalTasks,
+            successful: partialResults.filter((r) => r.status === 'success').length,
+            failed: partialResults.filter((r) => r.status === 'error').length,
+            durationMs,
+            resultsPerEvaluator: {},
+          },
+        };
+
+        // Save partial results
+        try {
+          const csvPath_partial = path.join(outputDir, 'results-partial.csv');
+          const jsonPath_partial = path.join(outputDir, 'results-partial.json');
+          const htmlPath_partial = path.join(outputDir, 'results-partial.html');
+
+          fs.writeFileSync(csvPath_partial, formatAsCSV(partialOutput));
+          fs.writeFileSync(jsonPath_partial, formatAsJSON(partialOutput));
+          fs.writeFileSync(htmlPath_partial, formatAsHTML(partialOutput));
+
+          console.log(`✓ Saved ${partialResults.length} results to:`);
+          console.log(`  ${outputDir}/`);
+          console.log(`    ├── results-partial.csv`);
+          console.log(`    ├── results-partial.json`);
+          console.log(`    └── results-partial.html`);
+          console.log();
+        } catch (error) {
+          console.error('❌ Error saving partial results:', error instanceof Error ? error.message : String(error));
+        }
+      } else {
+        console.log('No results to save yet.\n');
+      }
+
+      process.exit(0);
+    };
+
+    process.on('SIGINT', handleShutdown);
+    process.on('SIGTERM', handleShutdown);
+
+    let output;
+    try {
+      output = await evaluator.evaluate(inputs, evaluatorIds, (result) => {
+        tracker.update(result);
+        tracker.display();
+      });
+    } finally {
+      // Remove signal handlers
+      process.off('SIGINT', handleShutdown);
+      process.off('SIGTERM', handleShutdown);
+    }
+
+    // Display final summary
+    tracker.displaySummary();
+
+    // Step 7: Write output files
+    const csvPath_out = path.join(outputDir, 'results.csv');
+    const jsonPath = path.join(outputDir, 'results.json');
+    const htmlPath = path.join(outputDir, 'results.html');
+
+    try {
+      fs.writeFileSync(csvPath_out, formatAsCSV(output));
+      fs.writeFileSync(jsonPath, formatAsJSON(output));
+      fs.writeFileSync(htmlPath, formatAsHTML(output));
+
+      console.log('📄 Output files generated:');
+      console.log(`  ${outputDir}/`);
+      console.log(`    ├── results.csv`);
+      console.log(`    ├── results.json`);
+      console.log(`    └── results.html`);
+      console.log();
+    } catch (error) {
+      console.error('\n❌ Error writing output files:');
+      if (error instanceof Error) {
+        console.error(`  ${error.message}`);
+      }
+      console.error('\n⚠️  Evaluation completed but outputs could not be saved.');
+      process.exit(1);
+    }
+  } catch (error) {
+    console.error('\n❌ Error:', error instanceof Error ? error.message : String(error));
+    process.exit(1);
+  }
+}
+
+// Run CLI
+main();
diff --git a/sdks/typescript/src/batch/progress.ts b/sdks/typescript/src/batch/progress.ts
new file mode 100644
index 0000000..5b9eb1b
--- /dev/null
+++ b/sdks/typescript/src/batch/progress.ts
@@ -0,0 +1,167 @@
+import type { BatchResult } from './types.js';
+
+/**
+ * Progress tracker for batch evaluation
+ */
+export class ProgressTracker {
+  private totalTasks: number;
+  private completed = 0;
+  private successful = 0;
+  private failed = 0;
+  private startTime: number;
+  private perEvaluator = new Map<string, { completed: number; successful: number; failed: number }>();
+
+  constructor(totalTasks: number) {
+    this.totalTasks = totalTasks;
+    this.startTime = Date.now();
+  }
+
+  /**
+   * Update progress with a new result
+   */
+  update(result: BatchResult): void {
+    this.completed++;
+
+    if (result.status === 'success') {
+      this.successful++;
+    } else {
+      this.failed++;
+    }
+
+    // Track per-evaluator stats
+    if (!this.perEvaluator.has(result.evaluatorId)) {
+      this.perEvaluator.set(result.evaluatorId, { completed: 0, successful: 0, failed: 0 });
+    }
+
+    const stats = this.perEvaluator.get(result.evaluatorId)!;
+    stats.completed++;
+    if (result.status === 'success') {
+      stats.successful++;
+    } else {
+      stats.failed++;
+    }
+  }
+
+  /**
+   * Get current progress percentage
+   */
+  getPercentage(): number {
+    return Math.round((this.completed / this.totalTasks) * 100);
+  }
+
+  /**
+   * Get elapsed time in seconds
+   */
+  getElapsedSeconds(): number {
+    return Math.round((Date.now() - this.startTime) / 1000);
+  }
+
+  /**
+   * Estimate remaining time in seconds
+   */
+  getEstimatedRemainingSeconds(): number {
+    if (this.completed === 0) return 0;
+
+    const elapsed = Date.now() - this.startTime;
+    const avgTimePerTask = elapsed / this.completed;
+    const remaining = this.totalTasks - this.completed;
+
+    return Math.round((avgTimePerTask * remaining) / 1000);
+  }
+
+  /**
+   * Format elapsed time as human-readable string
+   */
+  formatElapsed(): string {
+    const seconds = this.getElapsedSeconds();
+    if (seconds < 60) return `${seconds}s`;
+
+    const minutes = Math.floor(seconds / 60);
+    const remainingSeconds = seconds % 60;
+    return `${minutes}m ${remainingSeconds}s`;
+  }
+
+  /**
+   * Format estimated remaining time as human-readable string
+   */
+  formatEstimatedRemaining(): string {
+    const seconds = this.getEstimatedRemainingSeconds();
+    if (seconds < 60) return `${seconds}s`;
+
+    const minutes = Math.floor(seconds / 60);
+    const remainingSeconds = seconds % 60;
+    return `${minutes}m ${remainingSeconds}s`;
+  }
+
+  /**
+   * Generate progress bar
+   */
+  getProgressBar(width = 20): string {
+    const percentage = this.getPercentage();
+    const filled = Math.round((percentage / 100) * width);
+    const empty = width - filled;
+
+    return '█'.repeat(filled) + '░'.repeat(empty);
+  }
+
+  /**
+   * Display progress in terminal
+   */
+  display(): void {
+    // Clear previous lines (move cursor up and clear)
+    if (this.completed > 1) {
+      const linesToClear = 3 + this.perEvaluator.size;
+      process.stdout.write(`\x1b[${linesToClear}A`); // Move cursor up
+      process.stdout.write('\x1b[J'); // Clear from cursor to end of screen
+    }
+
+    console.log('\nProcessing evaluations...');
+    console.log(
+      `${this.getProgressBar()} ${this.getPercentage()}% (${this.completed}/${this.totalTasks})`
+    );
+
+    // Show per-evaluator progress
+    for (const [evalId, stats] of this.perEvaluator.entries()) {
+      const status =
+        stats.completed === stats.successful
+          ? '✓'
+          : stats.failed > 0
+            ? '✗'
+            : '⏳';
+      console.log(
+        `  ${status} ${evalId}: ${stats.successful}/${stats.completed} successful`
+      );
+    }
+
+    console.log(
+      `\n⏱  Elapsed: ${this.formatElapsed()} | Estimated remaining: ${this.formatEstimatedRemaining()}`
+    );
+  }
+
+  /**
+   * Display final summary
+   */
+  displaySummary(): void {
+    // Clear progress display
+    const linesToClear = 3 + this.perEvaluator.size + 1;
+    process.stdout.write(`\x1b[${linesToClear}A`);
+    process.stdout.write('\x1b[J');
+
+    console.log('\n✅ Batch evaluation completed!\n');
+    console.log(`Total tasks: ${this.totalTasks}`);
+    console.log(`Successful: ${this.successful} ✓`);
+    console.log(`Failed: ${this.failed} ✗`);
+    console.log(`Duration: ${this.formatElapsed()}`);
+
+    // Show per-evaluator summary
+    if (this.perEvaluator.size > 1) {
+      console.log('\nResults per evaluator:');
+      for (const [evalId, stats] of this.perEvaluator.entries()) {
+        console.log(
+          `  ${evalId}: ${stats.successful} successful, ${stats.failed} failed`
+        );
+      }
+    }
+    console.log();
+  }
+}
diff --git a/sdks/typescript/src/batch/types.ts b/sdks/typescript/src/batch/types.ts
new file mode 100644
index 0000000..bb279ed
--- /dev/null
+++ b/sdks/typescript/src/batch/types.ts
@@ -0,0 +1,71 @@
+/**
+ * Batch evaluation types
+ */
+
+/**
+ * Input row from CSV
+ */
+export interface BatchInput {
+  text: string;
+  grade: string;
+  rowIndex: number;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  originalRow: Record<string, any>; // Preserve all original CSV columns
+}
+
+/**
+ * Individual evaluation task
+ */
+export interface BatchTask {
+  text: string;
+  grade: string;
+  evaluatorId: string;
+  rowIndex: number;
+}
+
+/**
+ * Result from a single evaluation
+ */
+export interface BatchResult {
+  rowIndex: number;
+  text: string;
+  grade: string;
+  evaluatorId: string;
+  status: 'success' | 'error';
+  score?: string;
+  reasoning?: string;
+  error?: string;
+  processingTimeMs: number;
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  originalRow: Record<string, any>; // Preserve all original CSV columns
+}
+
+/**
+ * Summary statistics for batch evaluation
+ */
+export interface BatchSummary {
+  totalTasks: number;
+  successful: number;
+  failed: number;
+  durationMs: number;
+  resultsPerEvaluator: Record<string, { successful: number; failed: number }>;
+}
+
+/**
+ * Complete batch evaluation output
+ */
+export interface BatchOutput {
+  results: BatchResult[];
+  summary: BatchSummary;
+}
+
+/**
+ * Configuration for batch evaluation
+ */
+export interface BatchConfig {
+  googleApiKey?: string;
+  openaiApiKey?: string;
+  concurrency?: number;
+  maxRetries?: number;
+  telemetry?: boolean;
+}
diff --git a/sdks/typescript/src/evaluators/base.ts b/sdks/typescript/src/evaluators/base.ts
index d4e48d6..f9025a3 100644
--- a/sdks/typescript/src/evaluators/base.ts
+++ b/sdks/typescript/src/evaluators/base.ts
@@ -7,6 +7,7 @@ import {
 } from '../telemetry/index.js';
 import { ConfigurationError, ValidationError } from '../errors.js';
 import { createLogger, LogLevel, type Logger } from '../logger.js';
+import type { EvaluationResult } from '../schemas/index.js';
 
 /**
  * Validation constants for input text
@@ -181,6 +182,16 @@ export abstract class BaseEvaluator {
     return meta;
   }
 
+  /**
+   * Abstract evaluate method that concrete evaluators must implement
+   *
+   * @param text - The text to evaluate
+   * @param grade - The grade level for context
+   * @returns Promise resolving to evaluation result with score and reasoning
+   */
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  abstract evaluate(text: string, grade: string): Promise<EvaluationResult<any, any>>;
+
   /**
    * Validate that required API keys are provided based on metadata
    * @throws {ConfigurationError} If required API keys are missing
diff --git a/sdks/typescript/tests/fixtures/batch-test.csv b/sdks/typescript/tests/fixtures/batch-test.csv
new file mode 100644
index 0000000..3a498a5
--- /dev/null
+++ b/sdks/typescript/tests/fixtures/batch-test.csv
@@ -0,0 +1,3 @@
+row_id,TEXT,  Grade  ,source,category
+1,"The cat sat on the mat. It was a warm, sunny day.",3,textbook,simple
+2,"The photosynthesis process converts light energy into chemical energy.",5,science,biology
diff --git a/sdks/typescript/tests/integration/batch.integration.test.ts b/sdks/typescript/tests/integration/batch.integration.test.ts
new file mode 100644
index 0000000..e4c5111
--- /dev/null
+++ b/sdks/typescript/tests/integration/batch.integration.test.ts
@@ -0,0 +1,192 @@
+import { describe, it, expect, beforeAll } from 'vitest';
+import { config } from 'dotenv';
+import { BatchEvaluator } from '../../src/batch/evaluator.js';
+import type { BatchInput } from '../../src/batch/types.js';
+import * as fs from 'fs';
+import { parse } from 'csv-parse/sync';
+import * as path from 'path';
+
+// Load .env file for testing convenience
+config();
+
+/**
+ * Batch Evaluator Integration Tests
+ *
+ * Lightweight integration test with 2 rows and 1 evaluator (sentence-structure).
+ * Verifies the full batch evaluation flow works end-to-end with real API calls.
+ *
+ * To run:
+ * ```bash
+ * RUN_INTEGRATION_TESTS=true npm run test:integration
+ * ```
+ */
+
+const SKIP_INTEGRATION = !process.env.RUN_INTEGRATION_TESTS &&
+                         !process.env.OPENAI_API_KEY;
+
+const describeIntegration = SKIP_INTEGRATION ? describe.skip : describe;
+
+// Test timeout: 2 minutes (generous for API calls)
+const TEST_TIMEOUT_MS = 2 * 60 * 1000;
+
+describeIntegration('Batch Evaluator - Integration', () => {
+  let evaluator: BatchEvaluator;
+
+  beforeAll(() => {
+    if (SKIP_INTEGRATION) {
+      console.log('⏭️  Skipping batch integration tests (no API keys or RUN_INTEGRATION_TESTS not set)');
+      return;
+    }
+
+    evaluator = new BatchEvaluator({
+      openaiApiKey: process.env.OPENAI_API_KEY!,
+      concurrency: 2, // Process both rows in parallel
+      maxRetries: 2,
+      telemetry: false,
+    });
+
+    console.log('\n' + '='.repeat(80));
+    console.log('BATCH EVALUATOR - INTEGRATION TEST');
+    console.log('='.repeat(80));
+    console.log('Testing with 2 rows, 1 evaluator (sentence-structure)');
+    console.log('='.repeat(80));
+  });
+
+  it(
+    'should process sample CSV end-to-end',
+    async () => {
+      // Read test CSV
+      const csvPath = path.join(__dirname, '../fixtures/batch-test.csv');
+      const content = fs.readFileSync(csvPath, 'utf-8');
+      const records = parse(content, {
+        columns: true,
+        skip_empty_lines: true,
+        trim: true,
+      });
+
+      // Helper to find column case-insensitively
+      function findColumn(row: any, columnName: string): string | undefined {
+        const normalizedTarget = columnName.toLowerCase().trim();
+        for (const key of Object.keys(row)) {
+          if (key.toLowerCase().trim() === normalizedTarget) {
+            return key;
+          }
+        }
+        return undefined;
+      }
+
+      const firstRow = records[0];
+      const textColumn = findColumn(firstRow, 'text')!;
+      const gradeColumn = findColumn(firstRow, 'grade')!;
+
+      const inputs: BatchInput[] = records.map((row: any, index: number) => ({
+        text: row[textColumn],
+        grade: row[gradeColumn],
+        rowIndex: index + 1,
+        originalRow: row,
+      }));
+
+      console.log(`\n📊 Processing ${inputs.length} rows...`);
+
+      // Run batch evaluation
+      const startTime = Date.now();
+      const output = await evaluator.evaluate(
+        inputs,
+        ['sentence-structure'],
+        (result) => {
+          console.log(`  ✓ Row ${result.rowIndex} - ${result.status}: ${result.score || result.error}`);
+        }
+      );
+      const duration = Date.now() - startTime;
+
+      console.log(`\n⏱  Completed in ${Math.round(duration / 1000)}s\n`);
+
+      // Verify results structure
+      expect(output).toBeDefined();
+      expect(output.results).toBeDefined();
+      expect(output.summary).toBeDefined();
+
+      // Should have 2 results (2 rows × 1 evaluator)
+      expect(output.results).toHaveLength(2);
+
+      // Verify each result has expected fields
+      for (const result of output.results) {
+        expect(result.rowIndex).toBeGreaterThan(0);
+        expect(result.text).toBeTruthy();
+        expect(result.grade).toBeTruthy();
+        expect(result.evaluatorId).toBe('sentence-structure');
+        expect(result.status).toMatch(/success|error/);
+        expect(result.processingTimeMs).toBeGreaterThan(0);
+
+        if (result.status === 'success') {
+          expect(result.score).toBeTruthy();
+          expect(result.reasoning).toBeTruthy();
+        } else {
+          expect(result.error).toBeTruthy();
+        }
+      }
+
+      // Verify summary
+      expect(output.summary.totalTasks).toBe(2);
+      expect(output.summary.successful + output.summary.failed).toBe(2);
+      expect(output.summary.durationMs).toBeGreaterThan(0);
+      expect(output.summary.resultsPerEvaluator).toHaveProperty('sentence-structure');
+
+      // Log summary
+      console.log('📊 Summary:');
+      console.log(`  Total: ${output.summary.totalTasks}`);
+      console.log(`  Successful: ${output.summary.successful} ✓`);
+      console.log(`  Failed: ${output.summary.failed} ✗`);
+      console.log(`  Duration: ${Math.round(output.summary.durationMs / 1000)}s`);
+
+      // At least 1 should succeed (allow for occasional API issues)
+      expect(output.summary.successful).toBeGreaterThan(0);
+    },
+    TEST_TIMEOUT_MS
+  );
+
+  it(
+    'should handle multiple evaluators with same inputs',
+    async () => {
+      // Skip if Google key not available
+      if (!process.env.GOOGLE_API_KEY) {
+        console.log('⏭️  Skipping multi-evaluator test (no GOOGLE_API_KEY)');
+        return;
+      }
+
+      // Single row, two evaluators
+      const inputs: BatchInput[] = [
+        { text: 'The cat sat on the mat.', grade: '3', rowIndex: 1, originalRow: { text: 'The cat sat on the mat.', grade: '3' } },
+      ];
+
+      console.log('\n📊 Processing 1 row with 2 evaluators...');
+
+      const evaluatorWithBothKeys = new BatchEvaluator({
+        openaiApiKey: process.env.OPENAI_API_KEY!,
+        googleApiKey: process.env.GOOGLE_API_KEY!,
+        concurrency: 2,
+        maxRetries: 2,
+        telemetry: false,
+      });
+
+      const output = await evaluatorWithBothKeys.evaluate(
+        inputs,
+        ['sentence-structure', 'grade-level-appropriateness'],
+        (result) => {
+          console.log(`  ✓ ${result.evaluatorId} - ${result.status}: ${result.score || result.error}`);
+        }
+      );
+
+      // Should have 2 results (1 row × 2 evaluators)
+      expect(output.results).toHaveLength(2);
+
+      // Verify both evaluators ran
+      const evaluatorIds = output.results.map((r) => r.evaluatorId);
+      expect(evaluatorIds).toContain('sentence-structure');
+      expect(evaluatorIds).toContain('grade-level-appropriateness');
+
+      console.log('\n✅ Multi-evaluator test passed\n');
+    },
+    TEST_TIMEOUT_MS
+  );
+});
diff --git a/sdks/typescript/tests/unit/batch/csv-parsing.test.ts b/sdks/typescript/tests/unit/batch/csv-parsing.test.ts
new file mode 100644
index 0000000..414b815
--- /dev/null
+++ b/sdks/typescript/tests/unit/batch/csv-parsing.test.ts
@@ -0,0 +1,91 @@
+import { describe, it, expect } from 'vitest';
+import * as fs from 'fs';
+import { parse } from 'csv-parse/sync';
+import * as path from 'path';
+
+/**
+ * Helper to find column case-insensitively (same as in batch/index.ts)
+ */
+function findColumn(row: any, columnName: string): string | undefined {
+  const normalizedTarget = columnName.toLowerCase().trim();
+
+  for (const key of Object.keys(row)) {
+    if (key.toLowerCase().trim() === normalizedTarget) {
+      return key;
+    }
+  }
+
+  return undefined;
+}
+
+describe('CSV Parsing Robustness', () => {
+  it('should find columns case-insensitively', () => {
+    const row = { TEXT: 'sample', GRADE: '3' };
+
+    expect(findColumn(row, 'text')).toBe('TEXT');
+    expect(findColumn(row, 'Text')).toBe('TEXT');
+    expect(findColumn(row, 'TEXT')).toBe('TEXT');
+    expect(findColumn(row, 'grade')).toBe('GRADE');
+  });
+
+  it('should find columns with whitespace in name', () => {
+    const row = { '  text  ': 'sample', ' GRADE ': '3' };
+
+    expect(findColumn(row, 'text')).toBe('  text  ');
+    expect(findColumn(row, 'grade')).toBe(' GRADE ');
+  });
+
+  it('should handle mixed case CSV', () => {
+    const row = { Text: 'sample', Grade: '3' };
+
+    expect(findColumn(row, 'text')).toBe('Text');
+    expect(findColumn(row, 'TEXT')).toBe('Text');
+    expect(findColumn(row, 'grade')).toBe('Grade');
+  });
+
+  it('should return undefined for missing columns', () => {
+    const row = { foo: 'bar' };
+
+    expect(findColumn(row, 'text')).toBeUndefined();
+    expect(findColumn(row, 'grade')).toBeUndefined();
+  });
+
+  it('should parse CSV with column name variants', () => {
+    const csvPath = path.join(__dirname, '../../fixtures/batch-test.csv');
+    const content = fs.readFileSync(csvPath, 'utf-8');
+    const records = parse(content, {
+      columns: true,
+      skip_empty_lines: true,
+      trim: true,
+    });
+
+    expect(records.length).toBe(2);
+
+    // Find columns (CSV has "TEXT" and "Grade" - trim option normalizes whitespace)
+    const firstRow = records[0];
+    const textColumn = findColumn(firstRow, 'text');
+    const gradeColumn = findColumn(firstRow, 'grade');
+
+    expect(textColumn).toBe('TEXT'); // Uppercase in CSV
+    expect(gradeColumn).toBe('Grade'); // Mixed case in CSV (whitespace trimmed)
+
+    // Should be able to read values
+    expect(firstRow[textColumn!]).toBeTruthy();
+    expect(firstRow[gradeColumn!]).toBe('3');
+  });
+
+  it('should filter out empty rows', () => {
+    const records = [
+      { text: 'Row 1', grade: '3' },
+      { text: '', grade: '4' }, // Empty text
+      { text: 'Row 3', grade: '' }, // Empty grade
+      { text: 'Row 4', grade: '5' },
+    ];
+
+    const filtered = records.filter((row) => row.text && row.grade);
+
+    expect(filtered).toHaveLength(2);
+    expect(filtered[0].text).toBe('Row 1');
+    expect(filtered[1].text).toBe('Row 4');
+  });
+});
diff --git a/sdks/typescript/tests/unit/batch/formatters.test.ts b/sdks/typescript/tests/unit/batch/formatters.test.ts
new file mode 100644
index 0000000..3ee84db
--- /dev/null
+++ b/sdks/typescript/tests/unit/batch/formatters.test.ts
@@ -0,0 +1,252 @@
+import { describe, it, expect } from 'vitest';
+import { formatAsCSV, formatAsJSON, formatAsHTML } from '../../../src/batch/formatters.js';
+import type { BatchOutput, BatchResult } from '../../../src/batch/types.js';
+
+describe('Batch Formatters', () => {
+  const sampleResults: BatchResult[] = [
+    {
+      rowIndex: 1,
+      text: 'The cat sat on the mat.',
+      grade: '3',
+      evaluatorId: 'vocabulary',
+      status: 'success',
+      score: 'slightly complex',
+      reasoning: 'Simple vocabulary',
+      processingTimeMs: 1250,
+      originalRow: { row_id: '1', text: 'The cat sat on the mat.', grade: '3', source: 'test' },
+    },
+    {
+      rowIndex: 1,
+      text: 'The cat sat on the mat.',
+      grade: '3',
+      evaluatorId: 'sentence-structure',
+      status: 'success',
+      score: 'Moderately Complex',
+      reasoning: 'Simple sentence structure',
+      processingTimeMs: 1100,
+      originalRow: { row_id: '1', text: 'The cat sat on the mat.', grade: '3', source: 'test' },
+    },
+    {
+      rowIndex: 2,
+      text: 'The quick brown fox jumps over the lazy dog.',
+      grade: '4',
+      evaluatorId: 'vocabulary',
+      status: 'error',
+      error: 'API timeout',
+      processingTimeMs: 5000,
+      originalRow: { row_id: '2', text: 'The quick brown fox jumps over the lazy dog.', grade: '4', source: 'test' },
+    },
+  ];
+
+  const sampleOutput: BatchOutput = {
+    results: sampleResults,
+    summary: {
+      totalTasks: 3,
+      successful: 2,
+      failed: 1,
+      durationMs: 7500,
+      resultsPerEvaluator: {
+        vocabulary: { successful: 1, failed: 1 },
+        'sentence-structure': { successful: 1, failed: 0 },
+      },
+    },
+  };
+
+  describe('formatAsCSV', () => {
+    it('should format results as CSV with columns per evaluator', () => {
+      const csv = formatAsCSV(sampleOutput);
+
+      // Should include original columns
+      expect(csv).toContain('row_id');
+      expect(csv).toContain('source');
+
+      // Should have evaluator-specific columns (not "evaluator" column)
+      expect(csv).toContain('vocabulary_score');
+      expect(csv).toContain('vocabulary_reasoning');
+      expect(csv).toContain('vocabulary_status');
+      expect(csv).toContain('sentence_structure_score');
+      expect(csv).toContain('sentence_structure_reasoning');
+      expect(csv).toContain('sentence_structure_status');
+
+      // Should have one row per input row (not per evaluator)
+      expect(csv.split('\n')).toHaveLength(3); // Header + 2 data rows
+    });
+
+    it('should escape CSV fields with quotes', () => {
+      const resultsWithCommas: BatchResult[] = [
+        {
+          rowIndex: 1,
+          text: 'Text with, comma',
+          grade: '3',
+          evaluatorId: 'vocabulary',
+          status: 'success',
+          score: 'slightly complex',
+          reasoning: 'Reasoning with, comma',
+          processingTimeMs: 1000,
+          originalRow: { text: 'Text with, comma', grade: '3' },
+        },
+      ];
+
+      const output: BatchOutput = {
+        results: resultsWithCommas,
+        summary: {
+          totalTasks: 1,
+          successful: 1,
+          failed: 0,
+          durationMs: 1000,
+          resultsPerEvaluator: { vocabulary: { successful: 1, failed: 0 } },
+        },
+      };
+
+      const csv = formatAsCSV(output);
+      expect(csv).toContain('"Text with, comma"');
+      expect(csv).toContain('"Reasoning with, comma"');
+    });
+
+    it('should handle errors in evaluator columns', () => {
+      const csv = formatAsCSV(sampleOutput);
+
+      // Row 2 has vocabulary error - should have empty score, error as reasoning, status=error
+      expect(csv).toContain('API timeout'); // Error message in reasoning column
+      expect(csv).toContain('error'); // Status column
+    });
+
+    it('should preserve original columns in order', () => {
+      const csv = formatAsCSV(sampleOutput);
+      const lines = csv.split('\n');
+
+      // First line should be headers with original columns first
+      const headers = lines[0];
+      expect(headers).toContain('row_id');
+      expect(headers).toContain('text');
+      expect(headers).toContain('grade');
+      expect(headers).toContain('source');
+
+      // Should have evaluator columns (not single "evaluator" column)
+      expect(headers).toContain('vocabulary_score');
+
+      // Data rows should have original data first - now one row per input row
+      expect(lines[1].startsWith('1,')).toBe(true); // Row 1
+      expect(lines[2].startsWith('2,')).toBe(true); // Row 2
+    });
+  });
+
+  describe('formatAsJSON', () => {
+    it('should format results as valid JSON', () => {
+      const json = formatAsJSON(sampleOutput);
+
+      expect(() => JSON.parse(json)).not.toThrow();
+    });
+
+    it('should include results and summary', () => {
+      const json = formatAsJSON(sampleOutput);
+      const parsed = JSON.parse(json);
+
+      expect(parsed).toHaveProperty('results');
+      expect(parsed).toHaveProperty('summary');
+      expect(parsed.results).toHaveLength(3);
+    });
+
+    it('should preserve all result fields', () => {
+      const json = formatAsJSON(sampleOutput);
+      const parsed = JSON.parse(json);
+
+      const firstResult = parsed.results[0];
+      expect(firstResult).toHaveProperty('rowIndex');
+      expect(firstResult).toHaveProperty('text');
+      expect(firstResult).toHaveProperty('grade');
+      expect(firstResult).toHaveProperty('evaluatorId');
+      expect(firstResult).toHaveProperty('status');
+      expect(firstResult).toHaveProperty('processingTimeMs');
+    });
+
+    it('should include summary statistics', () => {
+      const json = formatAsJSON(sampleOutput);
+      const parsed = JSON.parse(json);
+
+      expect(parsed.summary.totalTasks).toBe(3);
+      expect(parsed.summary.successful).toBe(2);
+      expect(parsed.summary.failed).toBe(1);
+      expect(parsed.summary.durationMs).toBe(7500);
+    });
+  });
+
+  describe('formatAsHTML', () => {
+    it('should generate valid HTML', () => {
+      const html = formatAsHTML(sampleOutput);
+
+      expect(html).toContain('<!DOCTYPE html>');
+      expect(html).toContain('<html');
+      expect(html).toContain('</html>');
+    });
+
+    it('should include AG Grid script', () => {
+      const html = formatAsHTML(sampleOutput);
+
+      expect(html).toContain('ag-grid-community');
+    });
+
+    it('should include summary statistics', () => {
+      const html = formatAsHTML(sampleOutput);
+
+      expect(html).toContain('3'); // Total tasks
+      expect(html).toContain('2'); // Successful
+      expect(html).toContain('1'); // Failed
+    });
+
+    it('should include grid data as JSON', () => {
+      const html = formatAsHTML(sampleOutput);
+
+      expect(html).toContain('const rowData');
+      expect(html).toContain('vocabulary_status');
+      expect(html).toContain('sentence_structure_status');
+    });
+
+    it('should include HTML-like content in JSON data', () => {
+      const resultsWithHTML: BatchResult[] = [
+        {
+          rowIndex: 1,
+          text: 'Text with <script>alert("xss")</script>',
+          grade: '3',
+          evaluatorId: 'vocabulary',
+          status: 'success',
+          score: 'slightly complex',
+          reasoning: 'Reasoning with <b>bold</b>',
+          processingTimeMs: 1000,
+          originalRow: { text: 'Text with <script>alert("xss")</script>', grade: '3' },
+        },
+      ];
+
+      const output: BatchOutput = {
+        results: resultsWithHTML,
+        summary: {
+          totalTasks: 1,
+          successful: 1,
+          failed: 0,
+          durationMs: 1000,
+          resultsPerEvaluator: { vocabulary: { successful: 1, failed: 0 } },
+        },
+      };
+
+      const html = formatAsHTML(output);
+
+      // JSON.stringify automatically escapes HTML, so it's safe
+      // The content will be in the JSON data but escaped
+      expect(html).toContain('const rowData');
+      expect(html).toContain('vocabulary');
+    });
+
+    it('should include column definitions with evaluator columns', () => {
+      const html = formatAsHTML(sampleOutput);
+
+      expect(html).toContain('columnDefs');
+      expect(html).toContain('field: \'row\'');
+      expect(html).toContain('field: \'text\'');
+
+      // Should have evaluator-specific columns (not single "status" column)
+      expect(html).toContain('vocabulary_status');
+      expect(html).toContain('vocabulary_score');
+      expect(html).toContain('sentence_structure_status');
+    });
+  });
+});
diff --git a/sdks/typescript/tests/unit/batch/limits.test.ts b/sdks/typescript/tests/unit/batch/limits.test.ts
new file mode 100644
index 0000000..7db5f4f
--- /dev/null
+++ b/sdks/typescript/tests/unit/batch/limits.test.ts
@@ -0,0 +1,107 @@
+import { describe, it, expect } from 'vitest';
+
+describe('Batch Size Limits', () => {
+  const MAX_TASKS = 500;
+
+  it('should calculate total tasks correctly', () => {
+    const inputs = 100;
+    const evaluators = 3;
+    const totalTasks = inputs * evaluators;
+
+    expect(totalTasks).toBe(300);
+  });
+
+  it('should accept batch under limit', () => {
+    const totalTasks = 100;
+
+    expect(totalTasks).toBeLessThanOrEqual(MAX_TASKS);
+  });
+
+  it('should accept batch at limit', () => {
+    const totalTasks = 500;
+
+    expect(totalTasks).toBeLessThanOrEqual(MAX_TASKS);
+  });
+
+  it('should reject batch over limit', () => {
+    const totalTasks = 501;
+
+    expect(totalTasks).toBeGreaterThan(MAX_TASKS);
+  });
+
+  it('should show warning for batch > 100 tasks', () => {
+    const totalTasks = 150;
+
+    expect(totalTasks).toBeGreaterThan(100);
+    expect(totalTasks).toBeLessThanOrEqual(MAX_TASKS);
+  });
+
+  it('should estimate time correctly', () => {
+    // Rough estimate: 2 seconds per task with concurrency=3
+    const totalTasks = 300;
+    const estimatedSeconds = (totalTasks * 2) / 3; // Parallel processing
+    const estimatedMinutes = Math.ceil(estimatedSeconds / 60);
+
+    expect(estimatedMinutes).toBeGreaterThan(0);
+    expect(estimatedMinutes).toBe(4); // 300 * 2 / 3 = 200s = 3.33m → 4m
+  });
+
+  it('should estimate cost correctly', () => {
+    const totalTasks = 300;
+    const estimatedCost = (totalTasks * 0.015).toFixed(2);
+
+    expect(estimatedCost).toBe('4.50');
+  });
+
+  describe('Edge Cases', () => {
+    it('should handle 1 row × 1 evaluator', () => {
+      const totalTasks = 1 * 1;
+      expect(totalTasks).toBe(1);
+      expect(totalTasks).toBeLessThanOrEqual(MAX_TASKS);
+    });
+
+    it('should handle max rows with 1 evaluator', () => {
+      const totalTasks = 500 * 1;
+      expect(totalTasks).toBe(500);
+      expect(totalTasks).toBeLessThanOrEqual(MAX_TASKS);
+    });
+
+    it('should reject 167 rows × 3 evaluators', () => {
+      const totalTasks = 167 * 3;
+      expect(totalTasks).toBe(501);
+      expect(totalTasks).toBeGreaterThan(MAX_TASKS);
+    });
+
+    it('should accept 166 rows × 3 evaluators', () => {
+      const totalTasks = 166 * 3;
+      expect(totalTasks).toBe(498);
+      expect(totalTasks).toBeLessThanOrEqual(MAX_TASKS);
+    });
+  });
+
+  describe('Suggestions for Over Limit', () => {
+    it('should suggest reducing rows', () => {
+      const currentRows = 200;
+      const evaluators = 3;
+      const totalTasks = currentRows * evaluators; // 600
+
+      expect(totalTasks).toBeGreaterThan(MAX_TASKS);
+
+      // Calculate max rows for current evaluators
+      const maxRows = Math.floor(MAX_TASKS / evaluators);
+      expect(maxRows).toBe(166);
+    });
+
+    it('should suggest reducing evaluators', () => {
+      const rows = 300;
+      const currentEvaluators = 3;
+      const totalTasks = rows * currentEvaluators; // 900
+
+      expect(totalTasks).toBeGreaterThan(MAX_TASKS);
+
+      // Calculate max evaluators for current rows
+      const maxEvaluators = Math.floor(MAX_TASKS / rows);
+      expect(maxEvaluators).toBe(1);
+    });
+  });
+});
diff --git a/sdks/typescript/tsup.config.ts b/sdks/typescript/tsup.config.ts
index 1a81469..de81c72 100644
--- a/sdks/typescript/tsup.config.ts
+++ b/sdks/typescript/tsup.config.ts
@@ -1,7 +1,7 @@
 import { defineConfig } from 'tsup';
 
 export default defineConfig({
-  entry: ['src/index.ts'],
+  entry: ['src/index.ts', 'src/batch/index.ts'],
   format: ['esm', 'cjs'],
   dts: true,
   splitting: false,

From b33922613e66f1dc012fb4de1458221ecf9cf4a7 Mon Sep 17 00:00:00 2001
From: Adnan Rashid Hussain <ahussain@chanzuckerberg.com>
Date: Wed, 4 Mar 2026 20:53:15 -0800
Subject: [PATCH 2/4] fix typing

---
 sdks/typescript/tests/unit/batch/csv-parsing.test.ts | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sdks/typescript/tests/unit/batch/csv-parsing.test.ts b/sdks/typescript/tests/unit/batch/csv-parsing.test.ts
index 414b815..4b0fcfb 100644
--- a/sdks/typescript/tests/unit/batch/csv-parsing.test.ts
+++ b/sdks/typescript/tests/unit/batch/csv-parsing.test.ts
@@ -70,8 +70,10 @@ describe('CSV Parsing Robustness', () => {
     expect(gradeColumn).toBe('Grade'); // Mixed case in CSV (whitespace trimmed)
 
     // Should be able to read values
-    expect(firstRow[textColumn!]).toBeTruthy();
-    expect(firstRow[gradeColumn!]).toBe('3');
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    expect((firstRow as any)[textColumn!]).toBeTruthy();
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    expect((firstRow as any)[gradeColumn!]).toBe('3');
   });
 
   it('should filter out empty rows', () => {

From 177f18057775cf23f7cd6117bde5e46e98346821 Mon Sep 17 00:00:00 2001
From: Adnan Rashid Hussain <ahussain@chanzuckerberg.com>
Date: Wed, 4 Mar 2026 21:01:15 -0800
Subject: [PATCH 3/4] remove json output

---
 sdks/typescript/src/batch/README.md     | 15 ++++-----------
 sdks/typescript/src/batch/formatters.ts |  7 -------
 sdks/typescript/src/batch/index.ts      |  8 +-------
 3 files changed, 5 insertions(+), 25 deletions(-)

diff --git a/sdks/typescript/src/batch/README.md b/sdks/typescript/src/batch/README.md
index 8b4fc91..d3636cc 100644
--- a/sdks/typescript/src/batch/README.md
+++ b/sdks/typescript/src/batch/README.md
@@ -1,6 +1,6 @@
 # Batch CSV Evaluator
 
-Evaluate multiple texts from a CSV file using one or more evaluators, with results output in CSV, JSON, and HTML formats.
+Evaluate multiple texts from a CSV file using one or more evaluators, with results output in CSV and HTML formats.
 
 ## Usage
 
@@ -41,7 +41,6 @@ The output directory is automatically created with a human-readable timestamp:
 ```
 batch-results-2024-02-07_14-30-22/
 ├── results.csv
-├── results.json
 └── results.html
 ```
 
@@ -69,17 +68,13 @@ See `tests/fixtures/sample-batch-input.csv` for a complete example.
 
 ### Output Files
 
-Three files are generated:
+Two files are generated:
 
-1. **CSV** (`batch-results-YYYY-MM-DD.csv`):
+1. **CSV** (`results.csv`):
    - Spreadsheet-compatible format
    - Columns: Row, Text, Grade, Evaluator, Status, Score, Reasoning, Error, Processing Time
 
-2. **JSON** (`batch-results-YYYY-MM-DD.json`):
-   - Structured data with full results and summary statistics
-   - Easy to parse programmatically
-
-3. **HTML** (`batch-results-YYYY-MM-DD.html`):
+2. **HTML** (`results.html`):
    - Interactive table with sorting and filtering (AG Grid)
    - Color-coded status indicators
    - Summary statistics dashboard
@@ -198,7 +193,6 @@ Duration: 45s
 📄 Output files generated:
   ./batch-results-2024-02-07_14-30-22/
     ├── results.csv
-    ├── results.json
     └── results.html
 ```
 
@@ -228,7 +222,6 @@ Example:
 ✓ Saved 15 results to:
   ./batch-results-2024-02-07_14-30-22/
     ├── results-partial.csv
-    ├── results-partial.json
     └── results-partial.html
 ```
 
diff --git a/sdks/typescript/src/batch/formatters.ts b/sdks/typescript/src/batch/formatters.ts
index f80a63e..e1ea75f 100644
--- a/sdks/typescript/src/batch/formatters.ts
+++ b/sdks/typescript/src/batch/formatters.ts
@@ -110,13 +110,6 @@ function escapeCSV(field: string): string {
   return field;
 }
 
-/**
- * Format results as JSON
- */
-export function formatAsJSON(output: BatchOutput): string {
-  return JSON.stringify(output, null, 2);
-}
-
 /**
  * Format results as HTML with AG Grid
  */
diff --git a/sdks/typescript/src/batch/index.ts b/sdks/typescript/src/batch/index.ts
index 69948a3..e8f5625 100644
--- a/sdks/typescript/src/batch/index.ts
+++ b/sdks/typescript/src/batch/index.ts
@@ -9,7 +9,7 @@ import {
   getAvailableEvaluators,
   getRequiredApiKeys,
 } from './evaluator.js';
-import { formatAsCSV, formatAsJSON, formatAsHTML } from './formatters.js';
+import { formatAsCSV, formatAsHTML } from './formatters.js';
 import { ProgressTracker } from './progress.js';
 import type { BatchInput } from './types.js';
 
@@ -347,17 +347,14 @@ async function main() {
         // Save partial results
         try {
           const csvPath_partial = path.join(outputDir, 'results-partial.csv');
-          const jsonPath_partial = path.join(outputDir, 'results-partial.json');
           const htmlPath_partial = path.join(outputDir, 'results-partial.html');
 
           fs.writeFileSync(csvPath_partial, formatAsCSV(partialOutput));
-          fs.writeFileSync(jsonPath_partial, formatAsJSON(partialOutput));
           fs.writeFileSync(htmlPath_partial, formatAsHTML(partialOutput));
 
           console.log(`✓ Saved ${partialResults.length} results to:`);
           console.log(`  ${outputDir}/`);
           console.log(`    ├── results-partial.csv`);
-          console.log(`    ├── results-partial.json`);
           console.log(`    └── results-partial.html`);
           console.log();
         } catch (error) {
@@ -390,18 +387,15 @@ async function main() {
 
     // Step 7: Write output files
     const csvPath_out = path.join(outputDir, 'results.csv');
-    const jsonPath = path.join(outputDir, 'results.json');
     const htmlPath = path.join(outputDir, 'results.html');
 
     try {
       fs.writeFileSync(csvPath_out, formatAsCSV(output));
-      fs.writeFileSync(jsonPath, formatAsJSON(output));
       fs.writeFileSync(htmlPath, formatAsHTML(output));
 
       console.log('📄 Output files generated:');
       console.log(`  ${outputDir}/`);
       console.log(`    ├── results.csv`);
-      console.log(`    ├── results.json`);
       console.log(`    └── results.html`);
       console.log();
     } catch (error) {

From e6621b55bc10445d3ae47340e639651ee04f214e Mon Sep 17 00:00:00 2001
From: Adnan Rashid Hussain <ahussain@chanzuckerberg.com>
Date: Wed, 4 Mar 2026 23:04:26 -0800
Subject: [PATCH 4/4] implement a html template and formatter

---
 sdks/typescript/src/batch/formatters.ts       | 588 ++++++-----
 sdks/typescript/src/batch/index.ts            |  17 +-
 .../typescript/src/batch/report-template.html | 914 ++++++++++++++++++
 sdks/typescript/src/types/html.d.ts           |   4 +
 .../tests/unit/batch/formatters.test.ts       | 510 ++++++----
 sdks/typescript/tsup.config.ts                |   3 +-
 sdks/typescript/vitest.config.ts              |  17 +-
 7 files changed, 1537 insertions(+), 516 deletions(-)
 create mode 100644 sdks/typescript/src/batch/report-template.html
 create mode 100644 sdks/typescript/src/types/html.d.ts

diff --git a/sdks/typescript/src/batch/formatters.ts b/sdks/typescript/src/batch/formatters.ts
index e1ea75f..2a08735 100644
--- a/sdks/typescript/src/batch/formatters.ts
+++ b/sdks/typescript/src/batch/formatters.ts
@@ -1,49 +1,111 @@
 import type { BatchOutput, BatchResult } from './types.js';
+import reportTemplate from './report-template.html';
+
+// ---- Constants ----
+
+const GLA_EVALUATOR_ID = 'grade-level-appropriateness';
+
+const GRADE_BANDS = ['K-1', '2-3', '4-5', '6-8', '9-10', '11-CCR'] as const;
+type GradeBand = typeof GRADE_BANDS[number];
+
+// Complexity string scores → numeric (supports both Title Case and lowercase from evaluators)
+const COMPLEXITY_SCORE_MAP: Record<string, number> = {
+  'slightly complex': 1,
+  'moderately complex': 2,
+  'very complex': 3,
+  'exceedingly complex': 4,
+};
+
+// ---- Helpers ----
+
+function evaluatorDisplayName(id: string): string {
+  return id.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
+}
+
+/** Maps a raw grade string (K, 1, 2 … 12, CCR) to a GRADE_BANDS index (0–5). */
+function gradeToBandIndex(grade: string): number {
+  const g = String(grade).trim().toUpperCase().replace(/^0+/, '');
+  if (g === 'K' || g === 'KINDERGARTEN') return 0;
+  if (g === '1') return 0;
+  if (g === '2' || g === '3') return 1;
+  if (g === '4' || g === '5') return 2;
+  if (g === '6' || g === '7' || g === '8') return 3;
+  if (g === '9' || g === '10') return 4;
+  if (g === '11' || g === '12' || g === 'CCR') return 5;
+  return -1;
+}
+
+/** Maps a GLA score string (e.g. "4-5") to a GRADE_BANDS index. */
+function glaBandToIndex(band: string): number {
+  return GRADE_BANDS.indexOf(band as GradeBand);
+}
+
+function getGLAStatus(inputGrade: string, glaBand: string): 'on-band' | 'adjacent' | 'off-target' {
+  const inputIdx = gradeToBandIndex(inputGrade);
+  const glaIdx = glaBandToIndex(glaBand);
+  if (inputIdx === -1 || glaIdx === -1) return 'off-target';
+  const diff = Math.abs(inputIdx - glaIdx);
+  if (diff === 0) return 'on-band';
+  if (diff === 1) return 'adjacent';
+  return 'off-target';
+}
+
+function complexityToNumeric(score: string): number | undefined {
+  return COMPLEXITY_SCORE_MAP[score.toLowerCase().trim()];
+}
+
+function complexityScoreLabel(avg: number): string {
+  if (avg < 1.5) return 'Slightly Complex';
+  if (avg < 2.5) return 'Moderately Complex';
+  if (avg < 3.5) return 'Very Complex';
+  return 'Exceedingly Complex';
+}
+
+/** Stub — returns hard-coded insights. Replace with real logic later. */
+function generateInsights(): string[] {
+  return [
+    'Review texts marked as Off Target — they may need content revision or grade-level adjustment before distribution.',
+    'Texts evaluated as Adjacent may benefit from light scaffolding strategies such as vocabulary pre-teaching.',
+    'Higher grade bands tend to show greater text complexity. Consider whether complexity aligns with instructional goals.',
+  ];
+}
+
+// ---- Shared grouping utility ----
 
-/**
- * Group results by row index
- */
 function groupResultsByRow(results: BatchResult[]): Map<number, BatchResult[]> {
   const grouped = new Map<number, BatchResult[]>();
-
   for (const result of results) {
     if (!grouped.has(result.rowIndex)) {
       grouped.set(result.rowIndex, []);
     }
     grouped.get(result.rowIndex)!.push(result);
   }
-
   return grouped;
 }
 
-/**
- * Format evaluator ID as column prefix (kebab-case to snake_case)
- */
+// ---- CSV Formatter ----
+
 function formatEvaluatorPrefix(evaluatorId: string): string {
   return evaluatorId.replace(/-/g, '_');
 }
 
-/**
- * Format results as CSV with columns per evaluator
- */
+function escapeCSV(field: string): string {
+  if (field.includes(',') || field.includes('"') || field.includes('\n')) {
+    return `"${field.replace(/"/g, '""')}"`;
+  }
+  return field;
+}
+
 export function formatAsCSV(output: BatchOutput): string {
   if (output.results.length === 0) {
     return '';
   }
 
-  // Group results by row
   const groupedByRow = groupResultsByRow(output.results);
-
-  // Get unique evaluator IDs (sorted for consistent column order)
-  const evaluatorIds = Array.from(
-    new Set(output.results.map(r => r.evaluatorId))
-  ).sort();
-
-  // Get original column names from first result
+  const evaluatorIds = Array.from(new Set(output.results.map(r => r.evaluatorId))).sort();
   const firstResult = output.results[0];
   const originalColumns = Object.keys(firstResult.originalRow);
 
-  // Build headers: original columns + evaluator columns (score, reasoning, status)
   const evaluatorColumns: string[] = [];
   for (const evalId of evaluatorIds) {
     const prefix = formatEvaluatorPrefix(evalId);
@@ -53,7 +115,6 @@ export function formatAsCSV(output: BatchOutput): string {
   }
   const headers = [...originalColumns, ...evaluatorColumns];
 
-  // Build rows (one per input row)
   const rows: string[][] = [];
   const sortedRowIndices = Array.from(groupedByRow.keys()).sort((a, b) => a - b);
 
@@ -61,35 +122,20 @@ export function formatAsCSV(output: BatchOutput): string {
     const resultsForRow = groupedByRow.get(rowIndex)!;
     const firstResultForRow = resultsForRow[0];
 
-    // Original column values
     const originalValues = originalColumns.map(col =>
       escapeCSV(String(firstResultForRow.originalRow[col] || ''))
     );
 
-    // Evaluator column values
     const evaluatorValues: string[] = [];
     for (const evalId of evaluatorIds) {
       const result = resultsForRow.find(r => r.evaluatorId === evalId);
-
       if (result) {
-        // Score
-        if (result.status === 'success') {
-          evaluatorValues.push(escapeCSV(result.score || ''));
-        } else {
-          evaluatorValues.push(''); // Empty for errors
-        }
-
-        // Reasoning
-        if (result.status === 'success') {
-          evaluatorValues.push(escapeCSV(result.reasoning || ''));
-        } else {
-          evaluatorValues.push(escapeCSV(result.error || ''));
-        }
-
-        // Status
+        evaluatorValues.push(result.status === 'success' ? escapeCSV(result.score || '') : '');
+        evaluatorValues.push(result.status === 'success'
+          ? escapeCSV(result.reasoning || '')
+          : escapeCSV(result.error || ''));
         evaluatorValues.push(result.status);
       } else {
-        // Evaluator not run for this row
         evaluatorValues.push('', '', 'not_run');
       }
     }
@@ -100,293 +146,217 @@ export function formatAsCSV(output: BatchOutput): string {
   return [headers, ...rows].map(row => row.join(',')).join('\n');
 }
 
-/**
- * Escape CSV field (handle quotes and commas)
- */
-function escapeCSV(field: string): string {
-  if (field.includes(',') || field.includes('"') || field.includes('\n')) {
-    return `"${field.replace(/"/g, '""')}"`;
-  }
-  return field;
+// ---- HTML Formatter ----
+
+export interface ReportMeta {
+  csvPath: string;
+  evaluatorIds: string[];
+  reportId: string;
+  generatedAt: Date;
+  totalInputRows: number;
 }
 
-/**
- * Format results as HTML with AG Grid
- */
-export function formatAsHTML(output: BatchOutput): string {
-  if (output.results.length === 0) {
-    return '<html><body><p>No results to display</p></body></html>';
+export function formatAsHTML(output: BatchOutput, meta: ReportMeta): string {
+  const { results } = output;
+  const byRow = groupResultsByRow(results);
+  const allRowIndices = Array.from(byRow.keys()).sort((a, b) => a - b);
+
+  const allEvaluatorIds = Array.from(new Set(results.map(r => r.evaluatorId))).sort();
+  const hasGLA = allEvaluatorIds.includes(GLA_EVALUATOR_ID);
+  const complexityIds = allEvaluatorIds.filter(id => id !== GLA_EVALUATOR_ID);
+
+  // ---- Snapshot ----
+  let processedRows = 0;
+  let erroredRows = 0;
+  for (const rowResults of byRow.values()) {
+    if (rowResults.some(r => r.status === 'error')) erroredRows++;
+    else processedRows++;
   }
 
-  // Group results by row
-  const groupedByRow = groupResultsByRow(output.results);
-
-  // Get unique evaluator IDs (sorted)
-  const evaluatorIds = Array.from(
-    new Set(output.results.map(r => r.evaluatorId))
-  ).sort();
-
-  // Convert grouped results to grid data (one row per input row)
-  // eslint-disable-next-line @typescript-eslint/no-explicit-any
-  const gridData: any[] = [];
-  const sortedRowIndices = Array.from(groupedByRow.keys()).sort((a, b) => a - b);
-
-  for (const rowIndex of sortedRowIndices) {
-    const resultsForRow = groupedByRow.get(rowIndex)!;
-    const firstResult = resultsForRow[0];
-
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const rowData: any = {
-      row: rowIndex,
-      text: firstResult.text.substring(0, 100) + (firstResult.text.length > 100 ? '...' : ''),
-      textFull: firstResult.text,
-      grade: firstResult.grade,
-    };
-
-    // Add evaluator-specific fields
-    for (const evalId of evaluatorIds) {
-      const result = resultsForRow.find(r => r.evaluatorId === evalId);
-      const prefix = formatEvaluatorPrefix(evalId);
-
-      if (result) {
-        rowData[`${prefix}_status`] = result.status;
-        rowData[`${prefix}_score`] = result.status === 'success' ? (result.score || '') : '';
-        rowData[`${prefix}_reasoning`] = result.status === 'success' ? (result.reasoning || '') : (result.error || '');
-      } else {
-        rowData[`${prefix}_status`] = 'not_run';
-        rowData[`${prefix}_score`] = '';
-        rowData[`${prefix}_reasoning`] = '';
+  // ---- GLA stats ----
+  const glaCounts = { onBand: 0, adjacent: 0, offTarget: 0 };
+  const rowGLAStatus = new Map<number, {
+    status: 'on-band' | 'adjacent' | 'off-target';
+    band: string;
+    reasoning: string;
+  }>();
+
+  if (hasGLA) {
+    for (const [rowIndex, rowResults] of byRow) {
+      const glaResult = rowResults.find(r => r.evaluatorId === GLA_EVALUATOR_ID);
+      if (glaResult && glaResult.status === 'success' && glaResult.score) {
+        const status = getGLAStatus(glaResult.grade, glaResult.score);
+        rowGLAStatus.set(rowIndex, { status, band: glaResult.score, reasoning: glaResult.reasoning || '' });
+        if (status === 'on-band') glaCounts.onBand++;
+        else if (status === 'adjacent') glaCounts.adjacent++;
+        else glaCounts.offTarget++;
       }
     }
-
-    gridData.push(rowData);
   }
 
-  return `<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>Batch Evaluation Results</title>
-  <link rel="icon" href="data:image/svg+xml,<svg xmlns=%22http://www.w3.org/2000/svg%22 viewBox=%220 0 100 100%22><text y=%22.9em%22 font-size=%2290%22>📊</text></svg>">
-
-  <!-- AG Grid -->
-  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/ag-grid-community@31.0.1/styles/ag-grid.css">
-  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/ag-grid-community@31.0.1/styles/ag-theme-quartz.css">
-  <script src="https://cdn.jsdelivr.net/npm/ag-grid-community@31.0.1/dist/ag-grid-community.min.js"></script>
-
-  <style>
-    * {
-      margin: 0;
-      padding: 0;
-      box-sizing: border-box;
-    }
-
-    body {
-      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
-      padding: 20px;
-      background: #f5f5f5;
-    }
-
-    .container {
-      max-width: 1400px;
-      margin: 0 auto;
-      background: white;
-      border-radius: 8px;
-      box-shadow: 0 2px 8px rgba(0,0,0,0.1);
-      overflow: hidden;
-    }
-
-    .header {
-      background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-      color: white;
-      padding: 30px;
-    }
-
-    .header h1 {
-      font-size: 28px;
-      margin-bottom: 10px;
-    }
-
-    .header p {
-      opacity: 0.9;
-      font-size: 14px;
-    }
-
-    .summary {
-      display: grid;
-      grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
-      gap: 20px;
-      padding: 30px;
-      background: #f8f9fa;
-      border-bottom: 1px solid #e0e0e0;
-    }
-
-    .stat {
-      text-align: center;
+  const glaTotal = glaCounts.onBand + glaCounts.adjacent + glaCounts.offTarget;
+  const pct = (n: number) => glaTotal > 0 ? Math.round((n / glaTotal) * 100) : 0;
+
+  // ---- Complexity stats per evaluator ----
+  const complexityStats = complexityIds.map(evalId => {
+    const scores: number[] = [];
+    const distribution: [number, number, number, number] = [0, 0, 0, 0];
+
+    for (const rowResults of byRow.values()) {
+      const r = rowResults.find(x => x.evaluatorId === evalId);
+      if (r && r.status === 'success' && r.score) {
+        const num = complexityToNumeric(r.score);
+        if (num !== undefined) {
+          scores.push(num);
+          distribution[num - 1]++;
+        }
+      }
     }
 
-    .stat-value {
-      font-size: 32px;
-      font-weight: bold;
-      color: #333;
+    const avg = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : 0;
+    return {
+      evaluatorId: evalId,
+      name: evaluatorDisplayName(evalId),
+      average: Math.round(avg * 10) / 10,
+      label: avg > 0 ? complexityScoreLabel(avg) : 'N/A',
+      distribution,
+    };
+  });
+
+  // ---- Grade band distribution (GLA status per input grade band) ----
+  const bandDist = GRADE_BANDS.map(() => ({ onBand: 0, adjacent: 0, offTarget: 0, total: 0 }));
+
+  for (const [rowIndex, rowResults] of byRow) {
+    const firstResult = rowResults[0];
+    if (!firstResult) continue;
+    const bandIdx = gradeToBandIndex(firstResult.grade);
+    if (bandIdx === -1) continue;
+
+    const glaStatus = rowGLAStatus.get(rowIndex);
+    if (glaStatus) {
+      bandDist[bandIdx].total++;
+      if (glaStatus.status === 'on-band') bandDist[bandIdx].onBand++;
+      else if (glaStatus.status === 'adjacent') bandDist[bandIdx].adjacent++;
+      else bandDist[bandIdx].offTarget++;
     }
+  }
 
-    .stat-label {
-      font-size: 12px;
-      text-transform: uppercase;
-      color: #666;
-      margin-top: 5px;
-      letter-spacing: 0.5px;
-    }
+  // ---- Complexity heatmap: avg score per [grade band][evaluator] ----
+  const hmSums: number[][] = GRADE_BANDS.map(() => complexityIds.map(() => 0));
+  const hmCounts: number[][] = GRADE_BANDS.map(() => complexityIds.map(() => 0));
+
+  for (const rowResults of byRow.values()) {
+    const firstResult = rowResults[0];
+    if (!firstResult) continue;
+    const bandIdx = gradeToBandIndex(firstResult.grade);
+    if (bandIdx === -1) continue;
+
+    complexityIds.forEach((evalId, evalIdx) => {
+      const r = rowResults.find(x => x.evaluatorId === evalId);
+      if (r && r.status === 'success' && r.score) {
+        const num = complexityToNumeric(r.score);
+        if (num !== undefined) {
+          hmSums[bandIdx][evalIdx] += num;
+          hmCounts[bandIdx][evalIdx]++;
+        }
+      }
+    });
+  }
 
-    .success { color: #10b981; }
-    .error { color: #ef4444; }
+  const heatmapValues: (number | null)[][] = GRADE_BANDS.map((_, bi) =>
+    complexityIds.map((_, ei) => {
+      const count = hmCounts[bi][ei];
+      return count > 0 ? Math.round((hmSums[bi][ei] / count) * 10) / 10 : null;
+    })
+  );
 
-    #grid {
-      height: 600px;
-      margin: 20px;
-    }
+  // ---- Full results rows ----
+  const firstRowResults = allRowIndices.length > 0 ? (byRow.get(allRowIndices[0]) ?? []) : [];
+  const originalColumns = firstRowResults.length > 0 ? Object.keys(firstRowResults[0].originalRow) : [];
 
-    .status-success {
-      background: #d1fae5;
-      color: #065f46;
-      padding: 4px 8px;
-      border-radius: 4px;
-      font-size: 12px;
-      font-weight: 600;
-    }
+  const fullResultsRows = allRowIndices.map(rowIndex => {
+    const rowResults = byRow.get(rowIndex)!;
+    const firstResult = rowResults[0];
+    const row: Record<string, string> = {};
 
-    .status-error {
-      background: #fee2e2;
-      color: #991b1b;
-      padding: 4px 8px;
-      border-radius: 4px;
-      font-size: 12px;
-      font-weight: 600;
+    for (const col of originalColumns) {
+      row[col] = String(firstResult.originalRow[col] ?? '');
     }
 
-    .footer {
-      padding: 20px 30px;
-      text-align: center;
-      color: #666;
-      font-size: 12px;
-      border-top: 1px solid #e0e0e0;
+    const glaStatus = rowGLAStatus.get(rowIndex);
+    const glaLabels = { 'on-band': 'On Band', 'adjacent': 'Adjacent', 'off-target': 'Off Target' } as const;
+    row['__gla_status'] = glaStatus ? glaLabels[glaStatus.status] : (hasGLA ? 'Error' : '');
+    row['__gla_band'] = glaStatus?.band ?? '';
+    row['__gla_reasoning'] = glaStatus?.reasoning ?? '';
+
+    for (const evalId of complexityIds) {
+      const r = rowResults.find(x => x.evaluatorId === evalId);
+      const prefix = `__${evalId.replace(/-/g, '_')}`;
+      row[`${prefix}_score`] = r?.status === 'success' ? (r.score ?? '') : (r?.status === 'error' ? 'Error' : '');
+      row[`${prefix}_reasoning`] = r?.status === 'success' ? (r.reasoning ?? '') : (r?.error ?? '');
     }
-  </style>
-</head>
-<body>
-  <div class="container">
-    <div class="header">
-      <h1>📊 Batch Evaluation Results</h1>
-      <p>Generated on ${new Date().toLocaleString()}</p>
-    </div>
-
-    <div class="summary">
-      <div class="stat">
-        <div class="stat-value">${output.summary.totalTasks}</div>
-        <div class="stat-label">Total Tasks</div>
-      </div>
-      <div class="stat">
-        <div class="stat-value success">${output.summary.successful}</div>
-        <div class="stat-label">Successful</div>
-      </div>
-      <div class="stat">
-        <div class="stat-value error">${output.summary.failed}</div>
-        <div class="stat-label">Failed</div>
-      </div>
-      <div class="stat">
-        <div class="stat-value">${Math.round(output.summary.durationMs / 1000)}s</div>
-        <div class="stat-label">Duration</div>
-      </div>
-    </div>
-
-    <div id="grid" class="ag-theme-quartz"></div>
-
-    <div class="footer">
-      Generated by @learning-commons/evaluators
-    </div>
-  </div>
-
-  <script>
-    // Grid data
-    const rowData = ${JSON.stringify(gridData)};
-
-    // Column definitions
-    const columnDefs = [
-      { field: 'row', headerName: 'Row', width: 80, filter: 'agNumberColumnFilter', sortable: true, pinned: 'left' },
-      {
-        field: 'text',
-        headerName: 'Text',
-        width: 300,
-        tooltipField: 'textFull',
-        sortable: true,
-        filter: 'agTextColumnFilter',
-        pinned: 'left'
-      },
-      { field: 'grade', headerName: 'Grade', width: 100, sortable: true, filter: true, pinned: 'left' },
-      ${evaluatorIds.map(evalId => {
-        const prefix = formatEvaluatorPrefix(evalId);
-        const displayName = evalId.split('-').map(w => w.charAt(0).toUpperCase() + w.slice(1)).join(' ');
-
-        return `// ${displayName} columns
-      {
-        headerName: '${displayName}',
-        children: [
-          {
-            field: '${prefix}_status',
-            headerName: 'Status',
-            width: 100,
-            sortable: true,
-            filter: true,
-            cellRenderer: params => {
-              if (!params.value || params.value === 'not_run') return '<span style="color: #999;">—</span>';
-              const className = params.value === 'success' ? 'status-success' : 'status-error';
-              return \`<span class="\${className}">\${params.value.toUpperCase()}</span>\`;
-            }
-          },
-          {
-            field: '${prefix}_score',
-            headerName: 'Score',
-            width: 150,
-            sortable: true,
-            filter: true
-          },
-          {
-            field: '${prefix}_reasoning',
-            headerName: 'Reasoning',
-            width: 350,
-            wrapText: true,
-            autoHeight: false,
-            tooltipField: '${prefix}_reasoning',
-            sortable: true,
-            filter: 'agTextColumnFilter'
-          }
-        ]
-      }`;
-      }).join(',\n      ')}
-    ];
-
-    // Grid options
-    const gridOptions = {
-      columnDefs: columnDefs,
-      rowData: rowData,
-      defaultColDef: {
-        resizable: true,
-        sortable: true,
-        filter: true,
-      },
-      pagination: true,
-      paginationPageSize: 50,
-      paginationPageSizeSelector: [25, 50, 100, 200],
-      domLayout: 'normal',
-      tooltipShowDelay: 500,
-    };
 
-    // Initialize grid
-    const gridDiv = document.querySelector('#grid');
-    agGrid.createGrid(gridDiv, gridOptions);
-  </script>
-</body>
-</html>`;
+    return row;
+  });
+
+  // ---- Assemble report data ----
+  const reportData = {
+    meta: {
+      reportId: meta.reportId,
+      generatedAt: meta.generatedAt.toLocaleString('en-US', {
+        month: 'short', day: 'numeric', year: 'numeric',
+        hour: 'numeric', minute: '2-digit', hour12: true,
+      }),
+      csvPath: meta.csvPath,
+      evaluatorIds: meta.evaluatorIds,
+      evaluatorNames: meta.evaluatorIds.map(evaluatorDisplayName),
+      totalRows: meta.totalInputRows,
+      processedRows,
+      erroredRows,
+    },
+    gradeLevelStats: {
+      onBand: glaCounts.onBand,
+      adjacent: glaCounts.adjacent,
+      offTarget: glaCounts.offTarget,
+      onBandPct: pct(glaCounts.onBand),
+      adjacentPct: pct(glaCounts.adjacent),
+      offTargetPct: pct(glaCounts.offTarget),
+      hasData: glaTotal > 0,
+    },
+    complexityStats,
+    gradeBandDistribution: {
+      bands: [...GRADE_BANDS],
+      data: bandDist,
+    },
+    complexityHeatmap: {
+      bands: [...GRADE_BANDS],
+      evaluators: complexityIds.map(evaluatorDisplayName),
+      evaluatorIds: complexityIds,
+      values: heatmapValues,
+    },
+    insights: generateInsights(),
+    fullResults: {
+      originalColumns,
+      hasGLA,
+      complexityEvaluators: complexityIds.map(id => ({
+        evaluatorId: id,
+        name: evaluatorDisplayName(id),
+        prefix: id.replace(/-/g, '_'),
+      })),
+      rows: fullResultsRows,
+    },
+  };
+
+  // Inject serialized data into the template.
+  // Unicode-escape < > & so the JSON is safe inside a <script> tag even if
+  // the data contains HTML-like strings (prevents </script> injection).
+  const safeJson = JSON.stringify(reportData)
+    .replace(/</g, '\\u003c')
+    .replace(/>/g, '\\u003e')
+    .replace(/&/g, '\\u0026');
+
+  return reportTemplate.replace(
+    'var REPORT_DATA = null; // __REPLACED_BY_FORMATTER__',
+    `var REPORT_DATA = ${safeJson};`,
+  );
 }
diff --git a/sdks/typescript/src/batch/index.ts b/sdks/typescript/src/batch/index.ts
index e8f5625..3f0ec7b 100644
--- a/sdks/typescript/src/batch/index.ts
+++ b/sdks/typescript/src/batch/index.ts
@@ -9,7 +9,7 @@ import {
   getAvailableEvaluators,
   getRequiredApiKeys,
 } from './evaluator.js';
-import { formatAsCSV, formatAsHTML } from './formatters.js';
+import { formatAsCSV, formatAsHTML, type ReportMeta } from './formatters.js';
 import { ProgressTracker } from './progress.js';
 import type { BatchInput } from './types.js';
 
@@ -255,6 +255,17 @@ async function main() {
       fs.mkdirSync(outputDir, { recursive: true });
     }
 
+    // Build report metadata used by the HTML formatter
+    const csvBasename = path.basename(csvPath, path.extname(csvPath));
+    const reportTimestamp = `${now.getFullYear()}${String(now.getMonth() + 1).padStart(2, '0')}${String(now.getDate()).padStart(2, '0')}T${String(now.getHours()).padStart(2, '0')}${String(now.getMinutes()).padStart(2, '0')}`;
+    const reportMeta: ReportMeta = {
+      csvPath: path.resolve(csvPath),
+      evaluatorIds,
+      reportId: `${csvBasename.replace(/[^a-zA-Z0-9]/g, '_')}_${reportTimestamp}`,
+      generatedAt: now,
+      totalInputRows: inputs.length,
+    };
+
     // Step 5: Confirm and run
     const totalTasks = inputs.length * evaluatorIds.length;
     const MAX_TASKS = 500;
@@ -350,7 +361,7 @@ async function main() {
           const htmlPath_partial = path.join(outputDir, 'results-partial.html');
 
           fs.writeFileSync(csvPath_partial, formatAsCSV(partialOutput));
-          fs.writeFileSync(htmlPath_partial, formatAsHTML(partialOutput));
+          fs.writeFileSync(htmlPath_partial, formatAsHTML(partialOutput, reportMeta));
 
           console.log(`✓ Saved ${partialResults.length} results to:`);
           console.log(`  ${outputDir}/`);
@@ -391,7 +402,7 @@ async function main() {
 
     try {
       fs.writeFileSync(csvPath_out, formatAsCSV(output));
-      fs.writeFileSync(htmlPath, formatAsHTML(output));
+      fs.writeFileSync(htmlPath, formatAsHTML(output, reportMeta));
 
       console.log('📄 Output files generated:');
       console.log(`  ${outputDir}/`);
diff --git a/sdks/typescript/src/batch/report-template.html b/sdks/typescript/src/batch/report-template.html
new file mode 100644
index 0000000..885a0a1
--- /dev/null
+++ b/sdks/typescript/src/batch/report-template.html
@@ -0,0 +1,914 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Evaluation Report</title>
+  <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
+  <style>
+    :root {
+      --bg:           #f1f5f9;
+      --surface:      #ffffff;
+      --border:       #e2e8f0;
+      --header-bg:    #0f172a;
+      --text:         #1e293b;
+      --text-muted:   #64748b;
+      --blue:         #3b82f6;
+      --blue-bg:      #dbeafe;
+      --green:        #059669;
+      --green-bg:     #d1fae5;
+      --amber:        #d97706;
+      --amber-bg:     #fef3c7;
+      --red:          #dc2626;
+      --red-bg:       #fee2e2;
+      --radius:       10px;
+    }
+
+    * { box-sizing: border-box; margin: 0; padding: 0; }
+
+    body {
+      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
+      background: var(--bg);
+      color: var(--text);
+      font-size: 14px;
+      line-height: 1.5;
+    }
+
+    /* ── Header ── */
+    .report-header {
+      background: var(--header-bg);
+      color: #fff;
+      padding: 28px 40px;
+    }
+    .report-header h1 {
+      font-size: 20px;
+      font-weight: 600;
+      margin-bottom: 5px;
+    }
+    .report-header .report-meta {
+      font-size: 12px;
+      color: #94a3b8;
+      letter-spacing: 0.01em;
+    }
+
+    /* ── Tab bar ── */
+    .tab-bar {
+      background: var(--surface);
+      border-bottom: 1px solid var(--border);
+      display: flex;
+      padding: 0 40px;
+      position: sticky;
+      top: 0;
+      z-index: 100;
+    }
+    .tab-btn {
+      padding: 13px 18px;
+      font-size: 13px;
+      font-weight: 500;
+      border: none;
+      background: none;
+      cursor: pointer;
+      color: var(--text-muted);
+      border-bottom: 3px solid transparent;
+      margin-bottom: -1px;
+      transition: color 0.15s, border-color 0.15s;
+    }
+    .tab-btn.active { color: var(--blue); border-bottom-color: var(--blue); }
+    .tab-btn:hover  { color: var(--text); }
+
+    /* ── Tab panels ── */
+    .tab-panel { display: none; padding: 32px 40px; max-width: 1200px; margin: 0 auto; }
+    .tab-panel.active { display: block; }
+
+    /* ── Cards ── */
+    .card {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: var(--radius);
+      padding: 24px;
+      margin-bottom: 20px;
+    }
+    .card-title {
+      font-size: 11px;
+      font-weight: 700;
+      text-transform: uppercase;
+      letter-spacing: 0.07em;
+      color: var(--text-muted);
+      margin-bottom: 18px;
+    }
+
+    /* ── Snapshot ── */
+    .snapshot-grid {
+      display: grid;
+      grid-template-columns: 1fr 1fr;
+      gap: 20px 32px;
+    }
+    .snapshot-item { display: flex; flex-direction: column; gap: 5px; }
+    .snapshot-label {
+      font-size: 11px;
+      font-weight: 600;
+      text-transform: uppercase;
+      letter-spacing: 0.06em;
+      color: var(--text-muted);
+    }
+    .snapshot-value { font-size: 15px; font-weight: 500; }
+    .snapshot-value.mono {
+      font-family: 'SFMono-Regular', 'Menlo', Consolas, monospace;
+      font-size: 12px;
+      color: var(--text-muted);
+      word-break: break-all;
+    }
+    .tags { display: flex; flex-wrap: wrap; gap: 6px; margin-top: 2px; }
+    .tag {
+      background: var(--blue-bg);
+      color: var(--blue);
+      padding: 3px 10px;
+      border-radius: 100px;
+      font-size: 12px;
+      font-weight: 500;
+    }
+
+    /* ── GLA stat cards ── */
+    .stat-row {
+      display: grid;
+      grid-template-columns: repeat(3, 1fr);
+      gap: 16px;
+      margin-bottom: 20px;
+    }
+    .stat-card {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: var(--radius);
+      padding: 22px 24px;
+      border-left-width: 4px;
+      border-left-style: solid;
+    }
+    .stat-card .stat-label {
+      font-size: 11px;
+      font-weight: 700;
+      text-transform: uppercase;
+      letter-spacing: 0.07em;
+      margin-bottom: 10px;
+    }
+    .stat-card .stat-value { font-size: 40px; font-weight: 700; line-height: 1; margin-bottom: 6px; }
+    .stat-card .stat-sub   { font-size: 13px; color: var(--text-muted); }
+    .stat-card.green { border-left-color: var(--green); }
+    .stat-card.green .stat-label { color: var(--green); }
+    .stat-card.green .stat-value { color: var(--green); }
+    .stat-card.amber { border-left-color: var(--amber); }
+    .stat-card.amber .stat-label { color: var(--amber); }
+    .stat-card.amber .stat-value { color: var(--amber); }
+    .stat-card.red { border-left-color: var(--red); }
+    .stat-card.red .stat-label { color: var(--red); }
+    .stat-card.red .stat-value { color: var(--red); }
+
+    /* ── Complexity dimension cards ── */
+    .complexity-row {
+      display: grid;
+      grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+      gap: 16px;
+      margin-bottom: 20px;
+    }
+    .complexity-card {
+      background: var(--surface);
+      border: 1px solid var(--border);
+      border-radius: var(--radius);
+      padding: 22px;
+    }
+    .complexity-card .cx-name {
+      font-size: 11px;
+      font-weight: 700;
+      text-transform: uppercase;
+      letter-spacing: 0.07em;
+      color: var(--text-muted);
+      margin-bottom: 12px;
+    }
+    .complexity-card .cx-avg { font-size: 44px; font-weight: 700; line-height: 1; color: var(--text); }
+    .complexity-card .cx-label { font-size: 13px; color: var(--text-muted); margin-top: 4px; margin-bottom: 16px; }
+    .dist-bar { display: flex; flex-direction: column; gap: 7px; }
+    .dist-item { display: flex; align-items: center; gap: 8px; font-size: 12px; }
+    .dist-item .dl { width: 110px; color: var(--text-muted); white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
+    .dist-item .dt { flex: 1; background: #f1f5f9; border-radius: 4px; height: 7px; overflow: hidden; }
+    .dist-item .df { height: 100%; border-radius: 4px; background: var(--blue); }
+    .dist-item .dc { color: var(--text-muted); min-width: 28px; text-align: right; }
+
+    /* ── Insights ── */
+    .insights-body { display: flex; flex-direction: column; gap: 10px; }
+    .insight-item {
+      display: flex;
+      gap: 12px;
+      padding: 11px 14px;
+      background: #f8fafc;
+      border-radius: 8px;
+      border-left: 3px solid var(--blue);
+      font-size: 14px;
+      line-height: 1.55;
+    }
+    .insight-arrow { color: var(--blue); font-weight: 700; flex-shrink: 0; margin-top: 1px; }
+    .alpha-badge {
+      display: inline-block;
+      font-size: 10px;
+      font-weight: 700;
+      text-transform: uppercase;
+      letter-spacing: 0.06em;
+      background: var(--amber-bg);
+      color: var(--amber);
+      padding: 2px 7px;
+      border-radius: 100px;
+      margin-left: 8px;
+      vertical-align: middle;
+    }
+    .insights-note {
+      margin-top: 12px;
+      font-size: 12px;
+      color: var(--text-muted);
+      font-style: italic;
+    }
+
+    /* ── Chart ── */
+    .chart-container { position: relative; height: 280px; }
+
+    /* ── Heatmap table ── */
+    .heatmap-wrap { overflow-x: auto; }
+    .heatmap-table {
+      width: 100%;
+      border-collapse: collapse;
+      font-size: 13px;
+    }
+    .heatmap-table th {
+      padding: 10px 16px;
+      text-align: left;
+      font-size: 11px;
+      font-weight: 700;
+      text-transform: uppercase;
+      letter-spacing: 0.06em;
+      color: var(--text-muted);
+      border-bottom: 2px solid var(--border);
+      white-space: nowrap;
+    }
+    .heatmap-table td {
+      padding: 10px 16px;
+      border-bottom: 1px solid var(--border);
+      text-align: center;
+    }
+    .heatmap-table td:first-child { text-align: left; font-weight: 600; }
+    .heatmap-table tr:last-child td { border-bottom: none; }
+    .hm-cell {
+      display: inline-block;
+      padding: 5px 12px;
+      border-radius: 6px;
+      font-size: 13px;
+      font-weight: 600;
+    }
+    .hm-1 { background: var(--green-bg); color: #065f46; }
+    .hm-2 { background: var(--amber-bg); color: #78350f; }
+    .hm-3 { background: #fed7aa;         color: #7c2d12; }
+    .hm-4 { background: var(--red-bg);   color: #7f1d1d; }
+    .hm-null { background: #f1f5f9; color: var(--text-muted); }
+
+    /* ── Full Results table ── */
+    .results-outer {
+      overflow-x: auto;
+      border: 1px solid var(--border);
+      border-radius: var(--radius);
+      background: var(--surface);
+    }
+    .results-table {
+      border-collapse: collapse;
+      font-size: 13px;
+      min-width: 100%;
+      white-space: nowrap;
+    }
+    .results-table th {
+      padding: 10px 16px;
+      text-align: left;
+      font-size: 11px;
+      font-weight: 700;
+      text-transform: uppercase;
+      letter-spacing: 0.05em;
+      color: var(--text-muted);
+      background: #f8fafc;
+      border-bottom: 2px solid var(--border);
+      position: sticky;
+      top: 0;
+      z-index: 3;
+    }
+    /* Frozen column headers sit above both sticky top AND sticky left */
+    .results-table th.frozen { z-index: 4; }
+    .results-table td {
+      padding: 10px 16px;
+      border-bottom: 1px solid var(--border);
+      vertical-align: top;
+      max-width: 280px;
+      white-space: normal;
+    }
+    .results-table tr:last-child td { border-bottom: none; }
+    .results-table tr:hover td { background: #f8fafc; }
+    .results-table tr:hover td.frozen { background: #f1f5f9; }
+    /* Frozen (sticky-left) cells */
+    .results-table th.frozen,
+    .results-table td.frozen {
+      position: sticky;
+      background: #f8fafc;
+      z-index: 2;
+    }
+    .results-table td.frozen { background: var(--surface); }
+    /* Visual separator after frozen columns */
+    .results-table th.frozen-last,
+    .results-table td.frozen-last {
+      border-right: 2px solid var(--border);
+    }
+    /* Column group separator */
+    .results-table th.group-start,
+    .results-table td.group-start {
+      border-left: 2px solid var(--border);
+    }
+    /* Reasoning cells — truncated with tooltip */
+    .reasoning-cell {
+      max-width: 280px;
+      white-space: nowrap;
+      overflow: hidden;
+      text-overflow: ellipsis;
+      color: var(--text-muted);
+      font-size: 12px;
+      cursor: help;
+    }
+    /* Status badges */
+    .badge {
+      display: inline-block;
+      padding: 3px 10px;
+      border-radius: 100px;
+      font-size: 11px;
+      font-weight: 700;
+      text-transform: uppercase;
+      letter-spacing: 0.04em;
+      white-space: nowrap;
+    }
+    .badge-on-band  { background: var(--green-bg); color: var(--green); }
+    .badge-adjacent { background: var(--amber-bg); color: var(--amber); }
+    .badge-off-target { background: var(--red-bg);  color: var(--red); }
+
+    /* ── Empty / no-data states ── */
+    .no-data {
+      padding: 40px;
+      text-align: center;
+      color: var(--text-muted);
+      font-size: 15px;
+    }
+  </style>
+</head>
+<body>
+
+  <header class="report-header" id="app-header"></header>
+
+  <nav class="tab-bar" id="tab-bar"></nav>
+
+  <div id="tab-summary" class="tab-panel active"></div>
+  <div id="tab-results" class="tab-panel"></div>
+
+  <script>
+    // ---------------------------------------------------------------------------
+    // MOCK DATA — used when the template is opened directly in a browser so
+    // designers can see a realistic preview without running the CLI.
+    // This constant is never referenced when real data has been injected.
+    // ---------------------------------------------------------------------------
+    const MOCK_REPORT_DATA = {
+      meta: {
+        reportId: 'sample_content_batch_20260301T1430',
+        generatedAt: 'Mar 1, 2026 2:30 PM',
+        csvPath: '/Users/designer/Documents/interventionhelper_content_batch_03-01.csv',
+        evaluatorIds: ['grade-level-appropriateness', 'vocabulary', 'sentence-structure'],
+        evaluatorNames: ['Grade Level Appropriateness', 'Vocabulary', 'Sentence Structure'],
+        totalRows: 300,
+        processedRows: 287,
+        erroredRows: 13,
+      },
+      gradeLevelStats: {
+        onBand: 172, adjacent: 85, offTarget: 30,
+        onBandPct: 60, adjacentPct: 30, offTargetPct: 10,
+        hasData: true,
+      },
+      complexityStats: [
+        {
+          evaluatorId: 'vocabulary', name: 'Vocabulary',
+          average: 2.4, label: 'Moderately Complex',
+          distribution: [45, 120, 95, 27],
+        },
+        {
+          evaluatorId: 'sentence-structure', name: 'Sentence Structure',
+          average: 1.9, label: 'Moderately Complex',
+          distribution: [88, 105, 72, 22],
+        },
+      ],
+      gradeBandDistribution: {
+        bands: ['K-1', '2-3', '4-5', '6-8', '9-10', '11-CCR'],
+        data: [
+          { onBand: 0,  adjacent: 0,  offTarget: 0,  total: 0  },
+          { onBand: 32, adjacent: 18, offTarget: 5,  total: 55 },
+          { onBand: 58, adjacent: 22, offTarget: 8,  total: 88 },
+          { onBand: 48, adjacent: 25, offTarget: 10, total: 83 },
+          { onBand: 22, adjacent: 14, offTarget: 5,  total: 41 },
+          { onBand: 12, adjacent: 6,  offTarget: 2,  total: 20 },
+        ],
+      },
+      complexityHeatmap: {
+        bands: ['K-1', '2-3', '4-5', '6-8', '9-10', '11-CCR'],
+        evaluators: ['Vocabulary', 'Sentence Structure'],
+        evaluatorIds: ['vocabulary', 'sentence-structure'],
+        values: [
+          [null, null],
+          [1.6,  1.4 ],
+          [2.1,  1.8 ],
+          [2.5,  2.2 ],
+          [2.9,  2.6 ],
+          [3.2,  2.8 ],
+        ],
+      },
+      insights: [
+        'Review texts marked as Off Target — they may need content revision or grade-level adjustment before distribution.',
+        'Texts evaluated as Adjacent may benefit from light scaffolding strategies such as vocabulary pre-teaching.',
+        'Higher grade bands tend to show greater text complexity. Consider whether complexity aligns with instructional goals.',
+      ],
+      fullResults: {
+        originalColumns: ['row_id', 'text', 'grade', 'source'],
+        hasGLA: true,
+        complexityEvaluators: [
+          { evaluatorId: 'vocabulary',         name: 'Vocabulary',         prefix: 'vocabulary'         },
+          { evaluatorId: 'sentence-structure', name: 'Sentence Structure', prefix: 'sentence_structure' },
+        ],
+        rows: [
+          {
+            row_id: '1', grade: '5', source: 'science_unit_3',
+            text: 'The water cycle describes how water evaporates from surfaces, rises into the atmosphere, cools and condenses into clouds, and falls back to the ground as precipitation.',
+            __gla_status: 'On Band', __gla_band: '4-5',
+            __gla_reasoning: 'Uses grade-appropriate science vocabulary with a clear explanatory structure suitable for grades 4–5.',
+            __vocabulary_score: 'moderately complex',
+            __vocabulary_reasoning: 'Contains domain-specific terms (evaporates, condenses, precipitation) that require pre-teaching for grade 5 students.',
+            __sentence_structure_score: 'Slightly Complex',
+            __sentence_structure_reasoning: 'Primarily compound sentences with clear connective structure appropriate for grade 5.',
+          },
+          {
+            row_id: '2', grade: '6', source: 'science_unit_1',
+            text: 'Photosynthesis is the process by which green plants use sunlight, water and carbon dioxide to produce food and oxygen.',
+            __gla_status: 'Adjacent', __gla_band: '4-5',
+            __gla_reasoning: 'Content is accessible but slightly below typical grade 6 complexity expectations.',
+            __vocabulary_score: 'moderately complex',
+            __vocabulary_reasoning: 'Key scientific terms are present but relatively straightforward for grade 6 readers.',
+            __sentence_structure_score: 'Slightly Complex',
+            __sentence_structure_reasoning: 'Single main clause with a relative clause; well within grade 6 reading ability.',
+          },
+          {
+            row_id: '3', grade: '8', source: 'biology_unit_2',
+            text: 'The mitochondria, often described as the powerhouse of the cell, are organelles found in the cytoplasm of eukaryotic cells, where they generate most of the adenosine triphosphate used for cellular energy.',
+            __gla_status: 'Off Target', __gla_band: '11-CCR',
+            __gla_reasoning: 'Text uses advanced biochemical terminology (adenosine triphosphate, eukaryotic) more appropriate for upper secondary or college-level readers.',
+            __vocabulary_score: 'exceedingly complex',
+            __vocabulary_reasoning: 'High density of Tier 3 domain-specific words significantly exceeds typical grade 8 vocabulary expectations.',
+            __sentence_structure_score: 'Very Complex',
+            __sentence_structure_reasoning: 'Long, embedded clauses with multiple modifying phrases create significant syntactic complexity for grade 8.',
+          },
+          {
+            row_id: '4', grade: '3', source: 'science_unit_3',
+            text: 'Rain falls from clouds when tiny water droplets join together and become heavy enough to fall to the ground.',
+            __gla_status: 'On Band', __gla_band: '2-3',
+            __gla_reasoning: 'Simple vocabulary and sentence structure are appropriate for grades 2–3.',
+            __vocabulary_score: 'slightly complex',
+            __vocabulary_reasoning: 'Common everyday vocabulary with no domain-specific terms requiring pre-teaching.',
+            __sentence_structure_score: 'Slightly Complex',
+            __sentence_structure_reasoning: 'Short simple sentences with basic connective structure.',
+          },
+          {
+            row_id: '5', grade: '9', source: 'biology_unit_1',
+            text: 'Ecosystems are communities of organisms that interact with each other and their physical environment, shaped by both biotic and abiotic factors that influence population dynamics over time.',
+            __gla_status: 'On Band', __gla_band: '9-10',
+            __gla_reasoning: 'Appropriate complexity and terminology for a grade 9–10 biology curriculum.',
+            __vocabulary_score: 'very complex',
+            __vocabulary_reasoning: 'Multiple Tier 3 terms (biotic, abiotic, population dynamics) require strong background knowledge.',
+            __sentence_structure_score: 'Moderately Complex',
+            __sentence_structure_reasoning: 'Compound-complex sentence with a relative clause; manageable for grade 9 readers.',
+          },
+          {
+            row_id: '6', grade: '4', source: 'social_studies_unit_2',
+            text: 'Ancient Egyptians built pyramids as tombs for their pharaohs and used a picture-based writing system called hieroglyphics.',
+            __gla_status: 'On Band', __gla_band: '4-5',
+            __gla_reasoning: 'Vocabulary and sentence length are well-matched to grade 4–5 social studies content.',
+            __vocabulary_score: 'moderately complex',
+            __vocabulary_reasoning: 'Domain-specific proper nouns (pharaohs, hieroglyphics) may need brief glossing.',
+            __sentence_structure_score: 'Slightly Complex',
+            __sentence_structure_reasoning: 'Two coordinated independent clauses; clear and accessible structure.',
+          },
+          {
+            row_id: '7', grade: '11', source: 'lit_unit_4',
+            text: 'Shakespeare\'s use of dramatic irony in Othello functions as a mechanism of tragic inevitability, positioning the audience as unwilling witnesses to the protagonist\'s epistemological collapse.',
+            __gla_status: 'On Band', __gla_band: '11-CCR',
+            __gla_reasoning: 'Sophisticated literary analysis vocabulary and complex syntax are well-suited to grades 11–CCR.',
+            __vocabulary_score: 'exceedingly complex',
+            __vocabulary_reasoning: 'Tier 3 literary and philosophical vocabulary (epistemological, dramatic irony, tragic inevitability) demands high reading proficiency.',
+            __sentence_structure_score: 'Exceedingly Complex',
+            __sentence_structure_reasoning: 'Noun phrase and participial phrase stacking creates a dense, highly embedded syntactic structure.',
+          },
+          {
+            row_id: '8', grade: '7', source: 'history_unit_1',
+            text: 'The Industrial Revolution transformed European societies by shifting labor from farms to factories, driving rapid urban growth and fundamentally changing how goods were produced and traded.',
+            __gla_status: 'Adjacent', __gla_band: '9-10',
+            __gla_reasoning: 'Vocabulary and conceptual density exceed typical grade 7 expectations; better suited for grades 9–10.',
+            __vocabulary_score: 'very complex',
+            __vocabulary_reasoning: 'Abstract economic and historical vocabulary (urban growth, fundamentally) adds significant reading demand.',
+            __sentence_structure_score: 'Moderately Complex',
+            __sentence_structure_reasoning: 'Participial phrases and coordinated verb phrases add structural complexity but remain readable.',
+          },
+        ],
+      },
+    };
+
+    // ---------------------------------------------------------------------------
+    // DATA — this line is replaced by the formatter at report generation time.
+    // When opening the template directly in a browser, MOCK_REPORT_DATA is used.
+    // ---------------------------------------------------------------------------
+    var REPORT_DATA = null; // __REPLACED_BY_FORMATTER__
+    REPORT_DATA = REPORT_DATA || MOCK_REPORT_DATA;
+
+    // ---------------------------------------------------------------------------
+    // Utilities
+    // ---------------------------------------------------------------------------
+
+    function esc(str) {
+      return String(str ?? '')
+        .replace(/&/g, '&amp;')
+        .replace(/</g, '&lt;')
+        .replace(/>/g, '&gt;')
+        .replace(/"/g, '&quot;');
+    }
+
+    function hmClass(val) {
+      if (val === null || val === undefined) return 'hm-null';
+      if (val < 1.5) return 'hm-1';
+      if (val < 2.5) return 'hm-2';
+      if (val < 3.5) return 'hm-3';
+      return 'hm-4';
+    }
+
+    function statusBadge(status) {
+      const cls = {
+        'On Band':   'badge-on-band',
+        'Adjacent':  'badge-adjacent',
+        'Off Target':'badge-off-target',
+      }[status] || '';
+      return `<span class="badge ${cls}">${esc(status)}</span>`;
+    }
+
+    // ---------------------------------------------------------------------------
+    // Tab switching
+    // ---------------------------------------------------------------------------
+
+    function switchTab(tab) {
+      document.querySelectorAll('.tab-btn').forEach(b =>
+        b.classList.toggle('active', b.dataset.tab === tab)
+      );
+      document.getElementById('tab-summary').classList.toggle('active', tab === 'summary');
+      document.getElementById('tab-results').classList.toggle('active', tab === 'results');
+      if (tab === 'summary' && !window._chartDrawn) drawChart();
+    }
+
+    // ---------------------------------------------------------------------------
+    // Header
+    // ---------------------------------------------------------------------------
+
+    function renderHeader() {
+      const { meta } = REPORT_DATA;
+      document.getElementById('app-header').innerHTML = `
+        <h1>Evaluation Report</h1>
+        <div class="report-meta">
+          Generated: ${esc(meta.generatedAt)} &bull; Report ID: ${esc(meta.reportId)}
+        </div>
+      `;
+      document.title = `Report: ${meta.reportId}`;
+    }
+
+    // ---------------------------------------------------------------------------
+    // Tab bar
+    // ---------------------------------------------------------------------------
+
+    function renderTabs() {
+      document.getElementById('tab-bar').innerHTML = `
+        <button class="tab-btn active" data-tab="summary" onclick="switchTab('summary')">Summary</button>
+        <button class="tab-btn" data-tab="results" onclick="switchTab('results')">Full Results</button>
+      `;
+    }
+
+    // ---------------------------------------------------------------------------
+    // Summary tab
+    // ---------------------------------------------------------------------------
+
+    function renderSummary() {
+      const { meta, gradeLevelStats: gls, complexityStats, insights,
+              gradeBandDistribution, complexityHeatmap } = REPORT_DATA;
+
+      // ── Snapshot ──
+      const snapshotHtml = `
+        <div class="card">
+          <div class="card-title">Snapshot</div>
+          <div class="snapshot-grid">
+            <div class="snapshot-item">
+              <span class="snapshot-label">Evaluators</span>
+              <div class="tags">
+                ${meta.evaluatorNames.map(n => `<span class="tag">${esc(n)}</span>`).join('')}
+              </div>
+            </div>
+            <div class="snapshot-item">
+              <span class="snapshot-label">Rows Processed</span>
+              <span class="snapshot-value">${meta.processedRows} of ${meta.totalRows}</span>
+            </div>
+            <div class="snapshot-item">
+              <span class="snapshot-label">Rows Errored</span>
+              <span class="snapshot-value">${meta.erroredRows}</span>
+            </div>
+            <div class="snapshot-item">
+              <span class="snapshot-label">Source File</span>
+              <span class="snapshot-value mono" title="${esc(meta.csvPath)}">${esc(meta.csvPath)}</span>
+            </div>
+          </div>
+        </div>
+      `;
+
+      // ── GLA stat cards ──
+      const glsHtml = gls.hasData ? `
+        <div class="stat-row">
+          <div class="stat-card green">
+            <div class="stat-label">On Band</div>
+            <div class="stat-value">${gls.onBandPct}%</div>
+            <div class="stat-sub">${gls.onBand} texts match the target grade band</div>
+          </div>
+          <div class="stat-card amber">
+            <div class="stat-label">Adjacent</div>
+            <div class="stat-value">${gls.adjacentPct}%</div>
+            <div class="stat-sub">${gls.adjacent} texts within one band of target</div>
+          </div>
+          <div class="stat-card red">
+            <div class="stat-label">Off Target</div>
+            <div class="stat-value">${gls.offTargetPct}%</div>
+            <div class="stat-sub">${gls.offTarget} texts need review</div>
+          </div>
+        </div>
+      ` : '';
+
+      // ── Complexity dimension cards ──
+      const COMPLEXITY_LABELS = ['Slightly Complex', 'Moderately Complex', 'Very Complex', 'Exceedingly Complex'];
+      const cxHtml = complexityStats.length > 0 ? `
+        <div class="complexity-row">
+          ${complexityStats.map(cs => {
+            const maxDist = Math.max(...cs.distribution, 1);
+            const bars = COMPLEXITY_LABELS.map((lbl, i) => `
+              <div class="dist-item">
+                <div class="dl" title="${lbl}">${lbl}</div>
+                <div class="dt">
+                  <div class="df" style="width:${Math.round((cs.distribution[i] / maxDist) * 100)}%"></div>
+                </div>
+                <div class="dc">${cs.distribution[i]}</div>
+              </div>
+            `).join('');
+            return `
+              <div class="complexity-card">
+                <div class="cx-name">${esc(cs.name)}</div>
+                <div class="cx-avg">${cs.average > 0 ? cs.average.toFixed(1) : '—'}</div>
+                <div class="cx-label">${esc(cs.label)}</div>
+                <div class="dist-bar">${bars}</div>
+              </div>
+            `;
+          }).join('')}
+        </div>
+      ` : '';
+
+      // ── Insights (Alpha) ──
+      const insightsHtml = `
+        <div class="card">
+          <div class="card-title">
+            Insights <span class="alpha-badge">Alpha</span>
+          </div>
+          <div class="insights-body">
+            ${insights.map(i => `
+              <div class="insight-item">
+                <span class="insight-arrow">→</span>
+                <span>${esc(i)}</span>
+              </div>
+            `).join('')}
+          </div>
+          <div class="insights-note">
+            These insights are automatically generated and may not reflect the full context of your data.
+          </div>
+        </div>
+      `;
+
+      // ── Distribution chart ──
+      const distHtml = gls.hasData ? `
+        <div class="card">
+          <div class="card-title">Grade Level Distribution by Band</div>
+          <div class="chart-container">
+            <canvas id="dist-chart"></canvas>
+          </div>
+        </div>
+      ` : '';
+
+      // ── Heatmap ──
+      const heatmapHtml = complexityStats.length > 0 && complexityHeatmap.evaluators.length > 0 ? `
+        <div class="card">
+          <div class="card-title">Text Complexity by Grade Band</div>
+          <div class="heatmap-wrap">
+            <table class="heatmap-table">
+              <thead>
+                <tr>
+                  <th>Grade Band</th>
+                  ${complexityHeatmap.evaluators.map(e => `<th>${esc(e)}</th>`).join('')}
+                </tr>
+              </thead>
+              <tbody>
+                ${complexityHeatmap.bands.map((band, bi) => `
+                  <tr>
+                    <td>${esc(band)}</td>
+                    ${complexityHeatmap.values[bi].map(val => {
+                      const cls = hmClass(val);
+                      const label = val !== null ? val.toFixed(1) : '—';
+                      return `<td><span class="hm-cell ${cls}">${label}</span></td>`;
+                    }).join('')}
+                  </tr>
+                `).join('')}
+              </tbody>
+            </table>
+          </div>
+        </div>
+      ` : '';
+
+      document.getElementById('tab-summary').innerHTML =
+        snapshotHtml + glsHtml + cxHtml + insightsHtml + distHtml + heatmapHtml;
+    }
+
+    // ---------------------------------------------------------------------------
+    // Distribution bar chart (Chart.js)
+    // ---------------------------------------------------------------------------
+
+    window._chartDrawn = false;
+
+    function drawChart() {
+      const { gradeBandDistribution: dist } = REPORT_DATA;
+      const ctx = document.getElementById('dist-chart');
+      if (!ctx) return;
+
+      // Only show bands that have at least one classified row
+      const activeBands = dist.bands.filter((_, i) => dist.data[i].total > 0);
+      const activeData  = dist.data.filter(d => d.total > 0);
+      const toPct = (n, total) => total > 0 ? Math.round((n / total) * 100) : 0;
+
+      new Chart(ctx, {
+        type: 'bar',
+        data: {
+          labels: activeBands,
+          datasets: [
+            {
+              label: 'On Band',
+              data: activeData.map(d => toPct(d.onBand, d.total)),
+              backgroundColor: '#10b981',
+              borderRadius: 4,
+            },
+            {
+              label: 'Adjacent',
+              data: activeData.map(d => toPct(d.adjacent, d.total)),
+              backgroundColor: '#f59e0b',
+              borderRadius: 4,
+            },
+            {
+              label: 'Off Target',
+              data: activeData.map(d => toPct(d.offTarget, d.total)),
+              backgroundColor: '#ef4444',
+              borderRadius: 4,
+            },
+          ],
+        },
+        options: {
+          responsive: true,
+          maintainAspectRatio: false,
+          plugins: {
+            legend: { position: 'top' },
+            tooltip: { callbacks: { label: ctx => `${ctx.dataset.label}: ${ctx.raw}%` } },
+          },
+          scales: {
+            x: { grid: { display: false } },
+            y: {
+              max: 100,
+              ticks: { callback: v => v + '%' },
+              grid: { color: '#f1f5f9' },
+            },
+          },
+        },
+      });
+
+      window._chartDrawn = true;
+    }
+
+    // ---------------------------------------------------------------------------
+    // Full Results tab
+    // ---------------------------------------------------------------------------
+
+    function renderResults() {
+      const { fullResults } = REPORT_DATA;
+      const { originalColumns, hasGLA, complexityEvaluators, rows } = fullResults;
+
+      // We'll set sticky left offsets after the table is in the DOM.
+      // Assign each original column a preliminary min-width; real offsets are
+      // calculated in applyFrozenOffsets() below.
+      const COL_MIN_WIDTH = 160; // px — initial estimate
+      const frozenCount = originalColumns.length;
+
+      // Header
+      const thOriginal = originalColumns.map((col, i) => {
+        const isLast = i === frozenCount - 1;
+        return `<th class="frozen${isLast ? ' frozen-last' : ''}" data-frozen="${i}"
+                    style="min-width:${COL_MIN_WIDTH}px">${esc(col)}</th>`;
+      }).join('');
+
+      const thGLA = hasGLA ? `
+        <th class="group-start" style="min-width:120px">Grade Level Status</th>
+        <th style="min-width:110px">GLA Grade Band</th>
+        <th style="min-width:260px">GLA Reasoning</th>
+      ` : '';
+
+      const thCX = complexityEvaluators.map(e => `
+        <th class="group-start" style="min-width:150px">${esc(e.name)} Score</th>
+        <th style="min-width:260px">${esc(e.name)} Reasoning</th>
+      `).join('');
+
+      // Rows
+      const bodyRows = rows.map(row => {
+        const tdOriginal = originalColumns.map((col, i) => {
+          const isLast = i === frozenCount - 1;
+          return `<td class="frozen${isLast ? ' frozen-last' : ''}" data-frozen="${i}"
+                      style="min-width:${COL_MIN_WIDTH}px">${esc(row[col])}</td>`;
+        }).join('');
+
+        const glaStatus = row['__gla_status'] || '';
+        const tdGLA = hasGLA ? `
+          <td class="group-start">${statusBadge(glaStatus)}</td>
+          <td>${esc(row['__gla_band'])}</td>
+          <td class="reasoning-cell" title="${esc(row['__gla_reasoning'])}">${esc(row['__gla_reasoning'])}</td>
+        ` : '';
+
+        const tdCX = complexityEvaluators.map(e => {
+          const prefix = `__${e.prefix}`;
+          return `
+            <td class="group-start">${esc(row[prefix + '_score'])}</td>
+            <td class="reasoning-cell" title="${esc(row[prefix + '_reasoning'])}">${esc(row[prefix + '_reasoning'])}</td>
+          `;
+        }).join('');
+
+        return `<tr>${tdOriginal}${tdGLA}${tdCX}</tr>`;
+      }).join('');
+
+      document.getElementById('tab-results').innerHTML = `
+        <div class="results-outer">
+          <table class="results-table" id="results-table">
+            <thead><tr>${thOriginal}${thGLA}${thCX}</tr></thead>
+            <tbody>${bodyRows}</tbody>
+          </table>
+        </div>
+      `;
+
+      // Apply real sticky left offsets once the table is rendered
+      applyFrozenOffsets('results-table', frozenCount);
+    }
+
+    /**
+     * Reads the rendered widths of frozen header cells and applies correct
+     * `left` offsets to all frozen <th> and <td> cells in the table.
+     */
+    function applyFrozenOffsets(tableId, frozenCount) {
+      const table = document.getElementById(tableId);
+      if (!table) return;
+
+      const headerCells = table.querySelectorAll('thead th.frozen');
+      const offsets = [];
+      let cumLeft = 0;
+      headerCells.forEach(th => {
+        offsets.push(cumLeft);
+        cumLeft += th.offsetWidth;
+      });
+
+      table.querySelectorAll('tr').forEach(row => {
+        const cells = row.querySelectorAll('td.frozen, th.frozen');
+        cells.forEach((cell, i) => {
+          if (i < frozenCount) cell.style.left = (offsets[i] ?? 0) + 'px';
+        });
+      });
+    }
+
+    // ---------------------------------------------------------------------------
+    // Bootstrap
+    // ---------------------------------------------------------------------------
+
+    renderHeader();
+    renderTabs();
+    renderSummary();
+    renderResults();
+    // Draw chart after DOM is ready
+    setTimeout(drawChart, 80);
+  </script>
+</body>
+</html>
diff --git a/sdks/typescript/src/types/html.d.ts b/sdks/typescript/src/types/html.d.ts
new file mode 100644
index 0000000..448f7d1
--- /dev/null
+++ b/sdks/typescript/src/types/html.d.ts
@@ -0,0 +1,4 @@
+declare module '*.html' {
+  const content: string;
+  export default content;
+}
diff --git a/sdks/typescript/tests/unit/batch/formatters.test.ts b/sdks/typescript/tests/unit/batch/formatters.test.ts
index 3ee84db..b3fcaf6 100644
--- a/sdks/typescript/tests/unit/batch/formatters.test.ts
+++ b/sdks/typescript/tests/unit/batch/formatters.test.ts
@@ -1,252 +1,358 @@
 import { describe, it, expect } from 'vitest';
-import { formatAsCSV, formatAsJSON, formatAsHTML } from '../../../src/batch/formatters.js';
+import { formatAsCSV, formatAsHTML, type ReportMeta } from '../../../src/batch/formatters.js';
 import type { BatchOutput, BatchResult } from '../../../src/batch/types.js';
 
-describe('Batch Formatters', () => {
-  const sampleResults: BatchResult[] = [
-    {
-      rowIndex: 1,
-      text: 'The cat sat on the mat.',
-      grade: '3',
-      evaluatorId: 'vocabulary',
-      status: 'success',
-      score: 'slightly complex',
-      reasoning: 'Simple vocabulary',
-      processingTimeMs: 1250,
-      originalRow: { row_id: '1', text: 'The cat sat on the mat.', grade: '3', source: 'test' },
-    },
-    {
-      rowIndex: 1,
-      text: 'The cat sat on the mat.',
-      grade: '3',
-      evaluatorId: 'sentence-structure',
-      status: 'success',
-      score: 'Moderately Complex',
-      reasoning: 'Simple sentence structure',
-      processingTimeMs: 1100,
-      originalRow: { row_id: '1', text: 'The cat sat on the mat.', grade: '3', source: 'test' },
-    },
-    {
-      rowIndex: 2,
-      text: 'The quick brown fox jumps over the lazy dog.',
-      grade: '4',
-      evaluatorId: 'vocabulary',
-      status: 'error',
-      error: 'API timeout',
-      processingTimeMs: 5000,
-      originalRow: { row_id: '2', text: 'The quick brown fox jumps over the lazy dog.', grade: '4', source: 'test' },
-    },
-  ];
+// ---- Test fixtures ----
+
+function makeResult(overrides: Partial<BatchResult>): BatchResult {
+  return {
+    rowIndex: 1,
+    text: 'Sample text.',
+    grade: '5',
+    evaluatorId: 'vocabulary',
+    status: 'success',
+    score: 'slightly complex',
+    reasoning: 'ok',
+    processingTimeMs: 100,
+    originalRow: { text: 'Sample text.', grade: '5' },
+    ...overrides,
+  };
+}
 
-  const sampleOutput: BatchOutput = {
-    results: sampleResults,
+function makeOutput(results: BatchResult[]): BatchOutput {
+  return {
+    results,
     summary: {
-      totalTasks: 3,
-      successful: 2,
-      failed: 1,
-      durationMs: 7500,
-      resultsPerEvaluator: {
-        vocabulary: { successful: 1, failed: 1 },
-        'sentence-structure': { successful: 1, failed: 0 },
-      },
+      totalTasks: results.length,
+      successful: results.filter(r => r.status === 'success').length,
+      failed: results.filter(r => r.status === 'error').length,
+      durationMs: 1000,
+      resultsPerEvaluator: {},
     },
   };
+}
+
+function makeMeta(overrides?: Partial<ReportMeta>): ReportMeta {
+  return {
+    csvPath: '/data/input.csv',
+    evaluatorIds: ['vocabulary'],
+    reportId: 'test_20260301T0000',
+    generatedAt: new Date('2026-03-01T00:00:00Z'),
+    totalInputRows: 1,
+    ...overrides,
+  };
+}
+
+/**
+ * Extracts and parses the REPORT_DATA JSON injected into the HTML by formatAsHTML.
+ * This lets us make assertions on actual computed values rather than raw string presence.
+ */
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+function extractReportData(html: string): any {
+  const marker = 'var REPORT_DATA = ';
+  const start = html.indexOf(marker) + marker.length;
+  const line = html.slice(start, html.indexOf('\n', start));
+  const json = line.endsWith(';') ? line.slice(0, -1) : line;
+  return JSON.parse(json);
+}
+
+// ============================================================
+// formatAsCSV
+// ============================================================
+
+describe('formatAsCSV', () => {
+  it('returns empty string for empty results', () => {
+    expect(formatAsCSV(makeOutput([]))).toBe('');
+  });
 
-  describe('formatAsCSV', () => {
-    it('should format results as CSV with columns per evaluator', () => {
-      const csv = formatAsCSV(sampleOutput);
+  it('produces one data row per input row, not per evaluator task', () => {
+    // Row 1 has two evaluators → should collapse into a single CSV row
+    const output = makeOutput([
+      makeResult({ rowIndex: 1, evaluatorId: 'vocabulary',         score: 'slightly complex' }),
+      makeResult({ rowIndex: 1, evaluatorId: 'sentence-structure', score: 'Moderately Complex' }),
+    ]);
 
-      // Should include original columns
-      expect(csv).toContain('row_id');
-      expect(csv).toContain('source');
+    const lines = formatAsCSV(output).split('\n');
+    expect(lines).toHaveLength(2); // 1 header + 1 data row
+  });
 
-      // Should have evaluator-specific columns (not "evaluator" column)
-      expect(csv).toContain('vocabulary_score');
-      expect(csv).toContain('vocabulary_reasoning');
-      expect(csv).toContain('vocabulary_status');
-      expect(csv).toContain('sentence_structure_score');
-      expect(csv).toContain('sentence_structure_reasoning');
-      expect(csv).toContain('sentence_structure_status');
+  it('places evaluator columns in alphabetical order after original columns', () => {
+    const output = makeOutput([
+      makeResult({ evaluatorId: 'vocabulary',         originalRow: { id: '1', text: 'txt', grade: '5' } }),
+      makeResult({ evaluatorId: 'sentence-structure', originalRow: { id: '1', text: 'txt', grade: '5' } }),
+    ]);
 
-      // Should have one row per input row (not per evaluator)
-      expect(csv.split('\n')).toHaveLength(3); // Header + 2 data rows
-    });
+    const header = formatAsCSV(output).split('\n')[0];
+    const cols = header.split(',');
 
-    it('should escape CSV fields with quotes', () => {
-      const resultsWithCommas: BatchResult[] = [
-        {
-          rowIndex: 1,
-          text: 'Text with, comma',
-          grade: '3',
-          evaluatorId: 'vocabulary',
-          status: 'success',
-          score: 'slightly complex',
-          reasoning: 'Reasoning with, comma',
-          processingTimeMs: 1000,
-          originalRow: { text: 'Text with, comma', grade: '3' },
-        },
-      ];
-
-      const output: BatchOutput = {
-        results: resultsWithCommas,
-        summary: {
-          totalTasks: 1,
-          successful: 1,
-          failed: 0,
-          durationMs: 1000,
-          resultsPerEvaluator: { vocabulary: { successful: 1, failed: 0 } },
-        },
-      };
-
-      const csv = formatAsCSV(output);
-      expect(csv).toContain('"Text with, comma"');
-      expect(csv).toContain('"Reasoning with, comma"');
-    });
+    // Original columns come first
+    expect(cols[0]).toBe('id');
+    // sentence-structure sorts before vocabulary alphabetically
+    expect(cols.indexOf('sentence_structure_score')).toBeLessThan(cols.indexOf('vocabulary_score'));
+  });
 
-    it('should handle errors in evaluator columns', () => {
-      const csv = formatAsCSV(sampleOutput);
+  it('leaves score empty and puts error message in reasoning for failed evaluations', () => {
+    const output = makeOutput([
+      makeResult({ status: 'error', error: 'API timeout', score: undefined }),
+    ]);
 
-      // Row 2 has vocabulary error - should have empty score, error as reasoning, status=error
-      expect(csv).toContain('API timeout'); // Error message in reasoning column
-      expect(csv).toContain('error'); // Status column
-    });
+    const csv = formatAsCSV(output);
+    const dataRow = csv.split('\n')[1];
+    const cols = dataRow.split(',');
+    const header = csv.split('\n')[0].split(',');
 
-    it('should preserve original columns in order', () => {
-      const csv = formatAsCSV(sampleOutput);
-      const lines = csv.split('\n');
+    const scoreIdx = header.indexOf('vocabulary_score');
+    const reasoningIdx = header.indexOf('vocabulary_reasoning');
+    const statusIdx = header.indexOf('vocabulary_status');
 
-      // First line should be headers with original columns first
-      const headers = lines[0];
-      expect(headers).toContain('row_id');
-      expect(headers).toContain('text');
-      expect(headers).toContain('grade');
-      expect(headers).toContain('source');
+    expect(cols[scoreIdx]).toBe('');           // score is blank for errors
+    expect(cols[reasoningIdx]).toBe('API timeout');
+    expect(cols[statusIdx]).toBe('error');
+  });
 
-      // Should have evaluator columns (not single "evaluator" column)
-      expect(headers).toContain('vocabulary_score');
+  it('outputs not_run when an evaluator produced no result for a row', () => {
+    // Row 1: vocabulary ran; sentence-structure did not
+    const output = makeOutput([
+      makeResult({ rowIndex: 1, evaluatorId: 'vocabulary', originalRow: { text: 'x', grade: '5' } }),
+    ]);
+    // Manually add sentence-structure to the results so the column exists but not for row 1
+    output.results.push(makeResult({
+      rowIndex: 2, evaluatorId: 'sentence-structure',
+      originalRow: { text: 'y', grade: '5' },
+    }));
+
+    const csv = formatAsCSV(output);
+    const [header, row1] = csv.split('\n');
+    const cols = header.split(',');
+    const ssStatusIdx = cols.indexOf('sentence_structure_status');
+
+    expect(row1.split(',')[ssStatusIdx]).toBe('not_run');
+  });
 
-      // Data rows should have original data first - now one row per input row
-      expect(lines[1].startsWith('1,')).toBe(true); // Row 1
-      expect(lines[2].startsWith('2,')).toBe(true); // Row 2
-    });
+  it('wraps fields containing commas, quotes, or newlines in double-quotes', () => {
+    const output = makeOutput([
+      makeResult({
+        score: 'slightly complex',
+        reasoning: 'Has "quotes" and, comma',
+        originalRow: { text: 'Line1\nLine2', grade: '5' },
+      }),
+    ]);
+
+    const csv = formatAsCSV(output);
+    expect(csv).toContain('"Line1\nLine2"');
+    expect(csv).toContain('"Has ""quotes"" and, comma"');
   });
+});
 
-  describe('formatAsJSON', () => {
-    it('should format results as valid JSON', () => {
-      const json = formatAsJSON(sampleOutput);
+// ============================================================
+// formatAsHTML — computed report data
+// ============================================================
+
+describe('formatAsHTML', () => {
+  describe('snapshot counts', () => {
+    it('counts a row as errored if any of its evaluator results failed', () => {
+      // Row 1: vocabulary ok, sentence-structure errored → should be "errored"
+      // Row 2: both ok → should be "processed"
+      const output = makeOutput([
+        makeResult({ rowIndex: 1, evaluatorId: 'vocabulary',         status: 'success' }),
+        makeResult({ rowIndex: 1, evaluatorId: 'sentence-structure', status: 'error', error: 'timeout' }),
+        makeResult({ rowIndex: 2, evaluatorId: 'vocabulary',         status: 'success' }),
+        makeResult({ rowIndex: 2, evaluatorId: 'sentence-structure', status: 'success' }),
+      ]);
+
+      const { meta } = extractReportData(formatAsHTML(output, makeMeta({ totalInputRows: 2 })));
+      expect(meta.processedRows).toBe(1);
+      expect(meta.erroredRows).toBe(1);
+    });
+  });
 
-      expect(() => JSON.parse(json)).not.toThrow();
+  describe('GLA status classification', () => {
+    function glaOutput(inputGrade: string, glaBand: string) {
+      return makeOutput([makeResult({
+        grade: inputGrade,
+        evaluatorId: 'grade-level-appropriateness',
+        score: glaBand,
+      })]);
+    }
+
+    it('classifies on-band when input grade falls within the GLA band', () => {
+      const { gradeLevelStats } = extractReportData(
+        formatAsHTML(glaOutput('3', '2-3'), makeMeta({ evaluatorIds: ['grade-level-appropriateness'] }))
+      );
+      expect(gradeLevelStats.onBand).toBe(1);
+      expect(gradeLevelStats.adjacent).toBe(0);
+      expect(gradeLevelStats.offTarget).toBe(0);
     });
 
-    it('should include results and summary', () => {
-      const json = formatAsJSON(sampleOutput);
-      const parsed = JSON.parse(json);
+    it('classifies adjacent when input grade is one band away from the GLA result', () => {
+      // Grade 4 → band index 2 (4-5); GLA "2-3" → band index 1; diff = 1
+      const { gradeLevelStats } = extractReportData(
+        formatAsHTML(glaOutput('4', '2-3'), makeMeta({ evaluatorIds: ['grade-level-appropriateness'] }))
+      );
+      expect(gradeLevelStats.onBand).toBe(0);
+      expect(gradeLevelStats.adjacent).toBe(1);
+      expect(gradeLevelStats.offTarget).toBe(0);
+    });
 
-      expect(parsed).toHaveProperty('results');
-      expect(parsed).toHaveProperty('summary');
-      expect(parsed.results).toHaveLength(3);
+    it('classifies off-target when input grade is two or more bands away', () => {
+      // Grade 8 → band index 3 (6-8); GLA "2-3" → band index 1; diff = 2
+      const { gradeLevelStats } = extractReportData(
+        formatAsHTML(glaOutput('8', '2-3'), makeMeta({ evaluatorIds: ['grade-level-appropriateness'] }))
+      );
+      expect(gradeLevelStats.onBand).toBe(0);
+      expect(gradeLevelStats.adjacent).toBe(0);
+      expect(gradeLevelStats.offTarget).toBe(1);
     });
 
-    it('should preserve all result fields', () => {
-      const json = formatAsJSON(sampleOutput);
-      const parsed = JSON.parse(json);
-
-      const firstResult = parsed.results[0];
-      expect(firstResult).toHaveProperty('rowIndex');
-      expect(firstResult).toHaveProperty('text');
-      expect(firstResult).toHaveProperty('grade');
-      expect(firstResult).toHaveProperty('evaluatorId');
-      expect(firstResult).toHaveProperty('status');
-      expect(firstResult).toHaveProperty('processingTimeMs');
+    it('maps grade K and grade 1 to the same K-1 band (both on-band with K-1 GLA result)', () => {
+      for (const grade of ['K', '1']) {
+        const { gradeLevelStats } = extractReportData(
+          formatAsHTML(glaOutput(grade, 'K-1'), makeMeta({ evaluatorIds: ['grade-level-appropriateness'] }))
+        );
+        expect(gradeLevelStats.onBand).toBe(1);
+      }
     });
 
-    it('should include summary statistics', () => {
-      const json = formatAsJSON(sampleOutput);
-      const parsed = JSON.parse(json);
+    it('maps grade 11, 12, and CCR to the same 11-CCR band', () => {
+      for (const grade of ['11', '12', 'CCR']) {
+        const { gradeLevelStats } = extractReportData(
+          formatAsHTML(glaOutput(grade, '11-CCR'), makeMeta({ evaluatorIds: ['grade-level-appropriateness'] }))
+        );
+        expect(gradeLevelStats.onBand).toBe(1);
+      }
+    });
 
-      expect(parsed.summary.totalTasks).toBe(3);
-      expect(parsed.summary.successful).toBe(2);
-      expect(parsed.summary.failed).toBe(1);
-      expect(parsed.summary.durationMs).toBe(7500);
+    it('treats an unrecognised grade as off-target (tests the -1 guard, not coincidental diff arithmetic)', () => {
+      // Grade '99' → gradeToBandIndex returns -1. GLA 'K-1' is index 0, so without
+      // the "inputIdx === -1" guard the diff would be |(-1) - 0| = 1 → 'adjacent'.
+      // The guard must fire for this to be 'off-target'.
+      const { gradeLevelStats } = extractReportData(
+        formatAsHTML(glaOutput('99', 'K-1'), makeMeta({ evaluatorIds: ['grade-level-appropriateness'] }))
+      );
+      expect(gradeLevelStats.offTarget).toBe(1);
     });
   });
 
-  describe('formatAsHTML', () => {
-    it('should generate valid HTML', () => {
-      const html = formatAsHTML(sampleOutput);
-
-      expect(html).toContain('<!DOCTYPE html>');
-      expect(html).toContain('<html');
-      expect(html).toContain('</html>');
+  describe('complexity stats', () => {
+    it('normalises score strings case-insensitively (Title Case and lowercase both map to the same numeric value)', () => {
+      // vocabulary returns lowercase; sentence-structure returns Title Case
+      const output = makeOutput([
+        makeResult({ rowIndex: 1, evaluatorId: 'vocabulary',         score: 'slightly complex' }),
+        makeResult({ rowIndex: 1, evaluatorId: 'sentence-structure', score: 'Slightly Complex' }),
+      ]);
+
+      const { complexityStats } = extractReportData(
+        formatAsHTML(output, makeMeta({ evaluatorIds: ['vocabulary', 'sentence-structure'] }))
+      );
+
+      // Both evaluators must appear — verifies GLA is excluded and neither evaluator was silently dropped
+      expect(complexityStats).toHaveLength(2);
+      for (const stat of complexityStats) {
+        expect(stat.average).toBe(1.0);
+        expect(stat.label).toBe('Slightly Complex');
+        expect(stat.distribution[0]).toBe(1); // one score of 1
+      }
     });
 
-    it('should include AG Grid script', () => {
-      const html = formatAsHTML(sampleOutput);
+    it('excludes GLA from complexity stats even when it runs alongside complexity evaluators', () => {
+      const output = makeOutput([
+        makeResult({ rowIndex: 1, evaluatorId: 'grade-level-appropriateness', score: '4-5' }),
+        makeResult({ rowIndex: 1, evaluatorId: 'vocabulary', score: 'slightly complex' }),
+      ]);
+
+      const { complexityStats } = extractReportData(
+        formatAsHTML(output, makeMeta({ evaluatorIds: ['grade-level-appropriateness', 'vocabulary'] }))
+      );
 
-      expect(html).toContain('ag-grid-community');
+      expect(complexityStats).toHaveLength(1);
+      expect(complexityStats[0].evaluatorId).toBe('vocabulary');
     });
 
-    it('should include summary statistics', () => {
-      const html = formatAsHTML(sampleOutput);
+    it('computes average and distribution correctly across multiple rows', () => {
+      // scores: 1, 2, 3 → avg 2.0
+      const output = makeOutput([
+        makeResult({ rowIndex: 1, evaluatorId: 'vocabulary', score: 'slightly complex' }),
+        makeResult({ rowIndex: 2, evaluatorId: 'vocabulary', score: 'moderately complex' }),
+        makeResult({ rowIndex: 3, evaluatorId: 'vocabulary', score: 'very complex' }),
+      ]);
 
-      expect(html).toContain('3'); // Total tasks
-      expect(html).toContain('2'); // Successful
-      expect(html).toContain('1'); // Failed
+      const { complexityStats } = extractReportData(formatAsHTML(output, makeMeta({ totalInputRows: 3 })));
+      const vocab = complexityStats[0];
+
+      expect(vocab.average).toBe(2.0);
+      expect(vocab.label).toBe('Moderately Complex');
+      expect(vocab.distribution).toEqual([1, 1, 1, 0]); // one each of scores 1, 2, 3
     });
 
-    it('should include grid data as JSON', () => {
-      const html = formatAsHTML(sampleOutput);
+    it('excludes error results from complexity averages', () => {
+      const output = makeOutput([
+        makeResult({ rowIndex: 1, evaluatorId: 'vocabulary', status: 'success', score: 'very complex' }),
+        makeResult({ rowIndex: 2, evaluatorId: 'vocabulary', status: 'error', error: 'timeout' }),
+      ]);
 
-      expect(html).toContain('const rowData');
-      expect(html).toContain('vocabulary_status');
-      expect(html).toContain('sentence_structure_status');
+      const { complexityStats } = extractReportData(formatAsHTML(output, makeMeta({ totalInputRows: 2 })));
+      expect(complexityStats[0].average).toBe(3.0); // only the successful score counts
+      expect(complexityStats[0].distribution).toEqual([0, 0, 1, 0]);
     });
+  });
 
-    it('should include HTML-like content in JSON data', () => {
-      const resultsWithHTML: BatchResult[] = [
-        {
-          rowIndex: 1,
-          text: 'Text with <script>alert("xss")</script>',
-          grade: '3',
-          evaluatorId: 'vocabulary',
-          status: 'success',
-          score: 'slightly complex',
-          reasoning: 'Reasoning with <b>bold</b>',
-          processingTimeMs: 1000,
-          originalRow: { text: 'Text with <script>alert("xss")</script>', grade: '3' },
-        },
-      ];
-
-      const output: BatchOutput = {
-        results: resultsWithHTML,
-        summary: {
-          totalTasks: 1,
-          successful: 1,
-          failed: 0,
-          durationMs: 1000,
-          resultsPerEvaluator: { vocabulary: { successful: 1, failed: 0 } },
-        },
-      };
-
-      const html = formatAsHTML(output);
-
-      // JSON.stringify automatically escapes HTML, so it's safe
-      // The content will be in the JSON data but escaped
-      expect(html).toContain('const rowData');
-      expect(html).toContain('vocabulary');
+  describe('grade band distribution', () => {
+    it('groups by the INPUT grade band, not the GLA result band', () => {
+      // Grade 3 → "2-3" bucket (index 1). GLA says "9-10" (off-target, diff=3).
+      const output = makeOutput([makeResult({
+        grade: '3',
+        evaluatorId: 'grade-level-appropriateness',
+        score: '9-10',
+      })]);
+
+      const { gradeBandDistribution } = extractReportData(
+        formatAsHTML(output, makeMeta({ evaluatorIds: ['grade-level-appropriateness'] }))
+      );
+
+      const band23 = gradeBandDistribution.data[1]; // index 1 = "2-3" (input grade)
+      const band910 = gradeBandDistribution.data[4]; // index 4 = "9-10" (GLA result)
+
+      expect(band23.total).toBe(1);    // row belongs to the "2-3" input bucket
+      expect(band23.offTarget).toBe(1);
+      expect(band910.total).toBe(0);   // NOT in the GLA result's bucket
+    });
+  });
+
+  describe('complexity heatmap', () => {
+    it('produces null for grade bands that have no data', () => {
+      // Only grade 5 rows → only "4-5" band (index 2) has data; others are null
+      const output = makeOutput([
+        makeResult({ grade: '5', evaluatorId: 'vocabulary', score: 'moderately complex' }),
+      ]);
+
+      const { complexityHeatmap } = extractReportData(formatAsHTML(output, makeMeta()));
+      const k1Values = complexityHeatmap.values[0]; // K-1 band
+      expect(k1Values[0]).toBeNull();
     });
 
-    it('should include column definitions with evaluator columns', () => {
-      const html = formatAsHTML(sampleOutput);
+    it('computes the correct per-cell average', () => {
+      // Two grade-5 rows: scores 1 and 3 → average 2.0
+      const output = makeOutput([
+        makeResult({ rowIndex: 1, grade: '5', evaluatorId: 'vocabulary', score: 'slightly complex' }),
+        makeResult({ rowIndex: 2, grade: '5', evaluatorId: 'vocabulary', score: 'very complex' }),
+      ]);
+
+      const { complexityHeatmap } = extractReportData(formatAsHTML(output, makeMeta({ totalInputRows: 2 })));
+      const band45Values = complexityHeatmap.values[2]; // "4-5" is index 2
+      expect(band45Values[0]).toBe(2.0);
+    });
+  });
 
-      expect(html).toContain('columnDefs');
-      expect(html).toContain('field: \'row\'');
-      expect(html).toContain('field: \'text\'');
+  describe('XSS safety', () => {
+    it('Unicode-escapes < > & so injected data cannot break out of the script tag', () => {
+      const output = makeOutput([makeResult({
+        text: '<script>alert("xss")</script>',
+        originalRow: { text: '<script>alert("xss")</script>', grade: '5' },
+      })]);
 
-      // Should have evaluator-specific columns (not single "status" column)
-      expect(html).toContain('vocabulary_status');
-      expect(html).toContain('vocabulary_score');
-      expect(html).toContain('sentence_structure_status');
+      const html = formatAsHTML(output, makeMeta());
+      expect(html).not.toContain('<script>alert');
+      expect(html).toContain('\\u003cscript\\u003e');
     });
   });
 });
diff --git a/sdks/typescript/tsup.config.ts b/sdks/typescript/tsup.config.ts
index de81c72..50d2910 100644
--- a/sdks/typescript/tsup.config.ts
+++ b/sdks/typescript/tsup.config.ts
@@ -11,6 +11,7 @@ export default defineConfig({
   minify: false,
   external: ['ai', '@ai-sdk/openai', '@ai-sdk/anthropic', '@ai-sdk/google'],
   loader: {
-    '.txt': 'text', // Inline prompt .txt files as strings at build time
+    '.txt': 'text',  // Inline prompt .txt files as strings at build time
+    '.html': 'text', // Inline HTML report templates as strings at build time
   },
 });
diff --git a/sdks/typescript/vitest.config.ts b/sdks/typescript/vitest.config.ts
index 06f43e3..b2fbd01 100644
--- a/sdks/typescript/vitest.config.ts
+++ b/sdks/typescript/vitest.config.ts
@@ -19,8 +19,23 @@ function txtPlugin(): Plugin {
   };
 }
 
+function htmlPlugin(): Plugin {
+  return {
+    name: 'html-loader',
+    enforce: 'pre',
+    resolveId(source, importer) {
+      if (!source.endsWith('.html') || !importer) return;
+      return resolve(dirname(importer), source);
+    },
+    load(id) {
+      if (!id.endsWith('.html')) return;
+      return `export default ${JSON.stringify(readFileSync(id, 'utf-8'))};`;
+    },
+  };
+}
+
 export default defineConfig(({ mode }) => ({
-  plugins: [txtPlugin()],
+  plugins: [txtPlugin(), htmlPlugin()],
   test: {
     globals: true,
     environment: 'node',