diff --git a/.agents/skills/refactor/SKILL.md b/.agents/skills/refactor/SKILL.md
deleted file mode 100644
index ba8313b..0000000
--- a/.agents/skills/refactor/SKILL.md
+++ /dev/null
@@ -1,645 +0,0 @@
----
-name: refactor
-description: 'Surgical code refactoring to improve maintainability without changing behavior. Covers extracting functions, renaming variables, breaking down god functions, improving type safety, eliminating code smells, and applying design patterns. Less drastic than repo-rebuilder; use for gradual improvements.'
-license: MIT
----
-
-# Refactor
-
-## Overview
-
-Improve code structure and readability without changing external behavior. Refactoring is gradual evolution, not revolution. Use this for improving existing code, not rewriting from scratch.
-
-## When to Use
-
-Use this skill when:
-
-- Code is hard to understand or maintain
-- Functions/classes are too large
-- Code smells need addressing
-- Adding features is difficult due to code structure
-- User asks "clean up this code", "refactor this", "improve this"
-
----
-
-## Refactoring Principles
-
-### The Golden Rules
-
-1. **Behavior is preserved** - Refactoring doesn't change what the code does, only how
-2. **Small steps** - Make tiny changes, test after each
-3. **Version control is your friend** - Commit before and after each safe state
-4. **Tests are essential** - Without tests, you're not refactoring, you're editing
-5. **One thing at a time** - Don't mix refactoring with feature changes
-
-### When NOT to Refactor
-
-```
-- Code that works and won't change again (if it ain't broke...)
-- Critical production code without tests (add tests first)
-- When you're under a tight deadline
-- "Just because" - need a clear purpose
-```
-
----
-
-## Common Code Smells & Fixes
-
-### 1. Long Method/Function
-
-```diff
-# BAD: 200-line function that does everything
-- async function processOrder(orderId) {
--   // 50 lines: fetch order
--   // 30 lines: validate order
--   // 40 lines: calculate pricing
--   // 30 lines: update inventory
--   // 20 lines: create shipment
--   // 30 lines: send notifications
-- }
-
-# GOOD: Broken into focused functions
-+ async function processOrder(orderId) {
-+   const order = await fetchOrder(orderId);
-+   validateOrder(order);
-+   const pricing = calculatePricing(order);
-+   await updateInventory(order);
-+   const shipment = await createShipment(order);
-+   await sendNotifications(order, pricing, shipment);
-+   return { order, pricing, shipment };
-+ }
-```
-
-### 2. Duplicated Code
-
-```diff
-# BAD: Same logic in multiple places
-- function calculateUserDiscount(user) {
--   if (user.membership === 'gold') return user.total * 0.2;
--   if (user.membership === 'silver') return user.total * 0.1;
--   return 0;
-- }
--
-- function calculateOrderDiscount(order) {
--   if (order.user.membership === 'gold') return order.total * 0.2;
--   if (order.user.membership === 'silver') return order.total * 0.1;
--   return 0;
-- }
-
-# GOOD: Extract common logic
-+ function getMembershipDiscountRate(membership) {
-+   const rates = { gold: 0.2, silver: 0.1 };
-+   return rates[membership] || 0;
-+ }
-+
-+ function calculateUserDiscount(user) {
-+   return user.total * getMembershipDiscountRate(user.membership);
-+ }
-+
-+ function calculateOrderDiscount(order) {
-+   return order.total * getMembershipDiscountRate(order.user.membership);
-+ }
-```
-
-### 3. Large Class/Module
-
-```diff
-# BAD: God object that knows too much
-- class UserManager {
--   createUser() { /* ... */ }
--   updateUser() { /* ... */ }
--   deleteUser() { /* ... */ }
--   sendEmail() { /* ... */ }
--   generateReport() { /* ... */ }
--   handlePayment() { /* ... */ }
--   validateAddress() { /* ... */ }
--   // 50 more methods...
-- }
-
-# GOOD: Single responsibility per class
-+ class UserService {
-+   create(data) { /* ... */ }
-+   update(id, data) { /* ... */ }
-+   delete(id) { /* ... */ }
-+ }
-+
-+ class EmailService {
-+   send(to, subject, body) { /* ... */ }
-+ }
-+
-+ class ReportService {
-+   generate(type, params) { /* ... */ }
-+ }
-+
-+ class PaymentService {
-+   process(amount, method) { /* ... */ }
-+ }
-```
-
-### 4. Long Parameter List
-
-```diff
-# BAD: Too many parameters
-- function createUser(email, password, name, age, address, city, country, phone) {
--   /* ... */
-- }
-
-# GOOD: Group related parameters
-+ interface UserData {
-+   email: string;
-+   password: string;
-+   name: string;
-+   age?: number;
-+   address?: Address;
-+   phone?: string;
-+ }
-+
-+ function createUser(data: UserData) {
-+   /* ... */
-+ }
-
-# EVEN BETTER: Use builder pattern for complex construction
-+ const user = UserBuilder
-+   .email('test@example.com')
-+   .password('secure123')
-+   .name('Test User')
-+   .address(address)
-+   .build();
-```
-
-### 5. Feature Envy
-
-```diff
-# BAD: Method that uses another object's data more than its own
-- class Order {
--   calculateDiscount(user) {
--     if (user.membershipLevel === 'gold') {
-+       return this.total * 0.2;
-+     }
-+     if (user.accountAge > 365) {
-+       return this.total * 0.1;
-+     }
-+     return 0;
-+   }
-+ }
-
-# GOOD: Move logic to the object that owns the data
-+ class User {
-+   getDiscountRate(orderTotal) {
-+     if (this.membershipLevel === 'gold') return 0.2;
-+     if (this.accountAge > 365) return 0.1;
-+     return 0;
-+   }
-+ }
-+
-+ class Order {
-+   calculateDiscount(user) {
-+     return this.total * user.getDiscountRate(this.total);
-+   }
-+ }
-```
-
-### 6. Primitive Obsession
-
-```diff
-# BAD: Using primitives for domain concepts
-- function sendEmail(to, subject, body) { /* ... */ }
-- sendEmail('user@example.com', 'Hello', '...');
-
-- function createPhone(country, number) {
--   return `${country}-${number}`;
-- }
-
-# GOOD: Use domain types
-+ class Email {
-+   private constructor(public readonly value: string) {
-+     if (!Email.isValid(value)) throw new Error('Invalid email');
-+   }
-+   static create(value: string) { return new Email(value); }
-+   static isValid(email: string) { return /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(email); }
-+ }
-+
-+ class PhoneNumber {
-+   constructor(
-+     public readonly country: string,
-+     public readonly number: string
-+   ) {
-+     if (!PhoneNumber.isValid(country, number)) throw new Error('Invalid phone');
-+   }
-+   toString() { return `${this.country}-${this.number}`; }
-+   static isValid(country: string, number: string) { /* ... */ }
-+ }
-+
-+ // Usage
-+ const email = Email.create('user@example.com');
-+ const phone = new PhoneNumber('1', '555-1234');
-```
-
-### 7. Magic Numbers/Strings
-
-```diff
-# BAD: Unexplained values
-- if (user.status === 2) { /* ... */ }
-- const discount = total * 0.15;
-- setTimeout(callback, 86400000);
-
-# GOOD: Named constants
-+ const UserStatus = {
-+   ACTIVE: 1,
-+   INACTIVE: 2,
-+   SUSPENDED: 3
-+ } as const;
-+
-+ const DISCOUNT_RATES = {
-+   STANDARD: 0.1,
-+   PREMIUM: 0.15,
-+   VIP: 0.2
-+ } as const;
-+
-+ const ONE_DAY_MS = 24 * 60 * 60 * 1000;
-+
-+ if (user.status === UserStatus.INACTIVE) { /* ... */ }
-+ const discount = total * DISCOUNT_RATES.PREMIUM;
-+ setTimeout(callback, ONE_DAY_MS);
-```
-
-### 8. Nested Conditionals
-
-```diff
-# BAD: Arrow code
-- function process(order) {
--   if (order) {
--     if (order.user) {
--       if (order.user.isActive) {
--         if (order.total > 0) {
--           return processOrder(order);
-+         } else {
-+           return { error: 'Invalid total' };
-+         }
-+       } else {
-+         return { error: 'User inactive' };
-+       }
-+     } else {
-+       return { error: 'No user' };
-+     }
-+   } else {
-+     return { error: 'No order' };
-+   }
-+ }
-
-# GOOD: Guard clauses / early returns
-+ function process(order) {
-+   if (!order) return { error: 'No order' };
-+   if (!order.user) return { error: 'No user' };
-+   if (!order.user.isActive) return { error: 'User inactive' };
-+   if (order.total <= 0) return { error: 'Invalid total' };
-+   return processOrder(order);
-+ }
-
-# EVEN BETTER: Using Result type
-+ function process(order): Result<ProcessedOrder, Error> {
-+   return Result.combine([
-+     validateOrderExists(order),
-+     validateUserExists(order),
-+     validateUserActive(order.user),
-+     validateOrderTotal(order)
-+   ]).flatMap(() => processOrder(order));
-+ }
-```
-
-### 9. Dead Code
-
-```diff
-# BAD: Unused code lingers
-- function oldImplementation() { /* ... */ }
-- const DEPRECATED_VALUE = 5;
-- import { unusedThing } from './somewhere';
-- // Commented out code
-- // function oldCode() { /* ... */ }
-
-# GOOD: Remove it
-+ // Delete unused functions, imports, and commented code
-+ // If you need it again, git history has it
-```
-
-### 10. Inappropriate Intimacy
-
-```diff
-# BAD: One class reaches deep into another
-- class OrderProcessor {
--   process(order) {
--     order.user.profile.address.street;  // Too intimate
--     order.repository.connection.config;  // Breaking encapsulation
-+   }
-+ }
-
-# GOOD: Ask, don't tell
-+ class OrderProcessor {
-+   process(order) {
-+     order.getShippingAddress();  // Order knows how to get it
-+     order.save();  // Order knows how to save itself
-+   }
-+ }
-```
-
----
-
-## Extract Method Refactoring
-
-### Before and After
-
-```diff
-# Before: One long function
-- function printReport(users) {
--   console.log('USER REPORT');
--   console.log('============');
--   console.log('');
--   console.log(`Total users: ${users.length}`);
--   console.log('');
--   console.log('ACTIVE USERS');
--   console.log('------------');
--   const active = users.filter(u => u.isActive);
--   active.forEach(u => {
--     console.log(`- ${u.name} (${u.email})`);
--   });
--   console.log('');
--   console.log(`Active: ${active.length}`);
--   console.log('');
--   console.log('INACTIVE USERS');
--   console.log('--------------');
--   const inactive = users.filter(u => !u.isActive);
--   inactive.forEach(u => {
--     console.log(`- ${u.name} (${u.email})`);
--   });
--   console.log('');
--   console.log(`Inactive: ${inactive.length}`);
-- }
-
-# After: Extracted methods
-+ function printReport(users) {
-+   printHeader('USER REPORT');
-+   console.log(`Total users: ${users.length}\n`);
-+   printUserSection('ACTIVE USERS', users.filter(u => u.isActive));
-+   printUserSection('INACTIVE USERS', users.filter(u => !u.isActive));
-+ }
-+
-+ function printHeader(title) {
-+   const line = '='.repeat(title.length);
-+   console.log(title);
-+   console.log(line);
-+   console.log('');
-+ }
-+
-+ function printUserSection(title, users) {
-+   console.log(title);
-+   console.log('-'.repeat(title.length));
-+   users.forEach(u => console.log(`- ${u.name} (${u.email})`));
-+   console.log('');
-+   console.log(`${title.split(' ')[0]}: ${users.length}`);
-+   console.log('');
-+ }
-```
-
----
-
-## Introducing Type Safety
-
-### From Untyped to Typed
-
-```diff
-# Before: No types
-- function calculateDiscount(user, total, membership, date) {
--   if (membership === 'gold' && date.getDay() === 5) {
--     return total * 0.25;
--   }
--   if (membership === 'gold') return total * 0.2;
--   return total * 0.1;
-- }
-
-# After: Full type safety
-+ type Membership = 'bronze' | 'silver' | 'gold';
-+
-+ interface User {
-+   id: string;
-+   name: string;
-+   membership: Membership;
-+ }
-+
-+ interface DiscountResult {
-+   original: number;
-+   discount: number;
-+   final: number;
-+   rate: number;
-+ }
-+
-+ function calculateDiscount(
-+   user: User,
-+   total: number,
-+   date: Date = new Date()
-+ ): DiscountResult {
-+   if (total < 0) throw new Error('Total cannot be negative');
-+
-+   let rate = 0.1; // Default bronze
-+
-+   if (user.membership === 'gold' && date.getDay() === 5) {
-+     rate = 0.25; // Friday bonus for gold
-+   } else if (user.membership === 'gold') {
-+     rate = 0.2;
-+   } else if (user.membership === 'silver') {
-+     rate = 0.15;
-+   }
-+
-+   const discount = total * rate;
-+
-+   return {
-+     original: total,
-+     discount,
-+     final: total - discount,
-+     rate
-+   };
-+ }
-```
-
----
-
-## Design Patterns for Refactoring
-
-### Strategy Pattern
-
-```diff
-# Before: Conditional logic
-- function calculateShipping(order, method) {
--   if (method === 'standard') {
--     return order.total > 50 ? 0 : 5.99;
--   } else if (method === 'express') {
--     return order.total > 100 ? 9.99 : 14.99;
-+   } else if (method === 'overnight') {
-+     return 29.99;
-+   }
-+ }
-
-# After: Strategy pattern
-+ interface ShippingStrategy {
-+   calculate(order: Order): number;
-+ }
-+
-+ class StandardShipping implements ShippingStrategy {
-+   calculate(order: Order) {
-+     return order.total > 50 ? 0 : 5.99;
-+   }
-+ }
-+
-+ class ExpressShipping implements ShippingStrategy {
-+   calculate(order: Order) {
-+     return order.total > 100 ? 9.99 : 14.99;
-+   }
-+ }
-+
-+ class OvernightShipping implements ShippingStrategy {
-+   calculate(order: Order) {
-+     return 29.99;
-+   }
-+ }
-+
-+ function calculateShipping(order: Order, strategy: ShippingStrategy) {
-+   return strategy.calculate(order);
-+ }
-```
-
-### Chain of Responsibility
-
-```diff
-# Before: Nested validation
-- function validate(user) {
--   const errors = [];
--   if (!user.email) errors.push('Email required');
-+   else if (!isValidEmail(user.email)) errors.push('Invalid email');
-+   if (!user.name) errors.push('Name required');
-+   if (user.age < 18) errors.push('Must be 18+');
-+   if (user.country === 'blocked') errors.push('Country not supported');
-+   return errors;
-+ }
-
-# After: Chain of responsibility
-+ abstract class Validator {
-+   abstract validate(user: User): string | null;
-+   setNext(validator: Validator): Validator {
-+     this.next = validator;
-+     return validator;
-+   }
-+   validate(user: User): string | null {
-+     const error = this.doValidate(user);
-+     if (error) return error;
-+     return this.next?.validate(user) ?? null;
-+   }
-+ }
-+
-+ class EmailRequiredValidator extends Validator {
-+   doValidate(user: User) {
-+     return !user.email ? 'Email required' : null;
-+   }
-+ }
-+
-+ class EmailFormatValidator extends Validator {
-+   doValidate(user: User) {
-+     return user.email && !isValidEmail(user.email) ? 'Invalid email' : null;
-+   }
-+ }
-+
-+ // Build the chain
-+ const validator = new EmailRequiredValidator()
-+   .setNext(new EmailFormatValidator())
-+   .setNext(new NameRequiredValidator())
-+   .setNext(new AgeValidator())
-+   .setNext(new CountryValidator());
-```
-
----
-
-## Refactoring Steps
-
-### Safe Refactoring Process
-
-```
-1. PREPARE
-   - Ensure tests exist (write them if missing)
-   - Commit current state
-   - Create feature branch
-
-2. IDENTIFY
-   - Find the code smell to address
-   - Understand what the code does
-   - Plan the refactoring
-
-3. REFACTOR (small steps)
-   - Make one small change
-   - Run tests
-   - Commit if tests pass
-   - Repeat
-
-4. VERIFY
-   - All tests pass
-   - Manual testing if needed
-   - Performance unchanged or improved
-
-5. CLEAN UP
-   - Update comments
-   - Update documentation
-   - Final commit
-```
-
----
-
-## Refactoring Checklist
-
-### Code Quality
-
-- [ ] Functions are small (< 50 lines)
-- [ ] Functions do one thing
-- [ ] No duplicated code
-- [ ] Descriptive names (variables, functions, classes)
-- [ ] No magic numbers/strings
-- [ ] Dead code removed
-
-### Structure
-
-- [ ] Related code is together
-- [ ] Clear module boundaries
-- [ ] Dependencies flow in one direction
-- [ ] No circular dependencies
-
-### Type Safety
-
-- [ ] Types defined for all public APIs
-- [ ] No `any` types without justification
-- [ ] Nullable types explicitly marked
-
-### Testing
-
-- [ ] Refactored code is tested
-- [ ] Tests cover edge cases
-- [ ] All tests pass
-
----
-
-## Common Refactoring Operations
-
-| Operation                                     | Description                           |
-| --------------------------------------------- | ------------------------------------- |
-| Extract Method                                | Turn code fragment into method        |
-| Extract Class                                 | Move behavior to new class            |
-| Extract Interface                             | Create interface from implementation  |
-| Inline Method                                 | Move method body back to caller       |
-| Inline Class                                  | Move class behavior to caller         |
-| Pull Up Method                                | Move method to superclass             |
-| Push Down Method                              | Move method to subclass               |
-| Rename Method/Variable                        | Improve clarity                       |
-| Introduce Parameter Object                    | Group related parameters              |
-| Replace Conditional with Polymorphism         | Use polymorphism instead of switch/if |
-| Replace Magic Number with Constant            | Named constants                       |
-| Decompose Conditional                         | Break complex conditions              |
-| Consolidate Conditional                       | Combine duplicate conditions          |
-| Replace Nested Conditional with Guard Clauses | Early returns                         |
-| Introduce Null Object                         | Eliminate null checks                 |
-| Replace Type Code with Class/Enum             | Strong typing                         |
-| Replace Inheritance with Delegation           | Composition over inheritance          |
diff --git a/.claude/skills/refactor b/.claude/skills/refactor
deleted file mode 120000
index dc7a9b2..0000000
--- a/.claude/skills/refactor
+++ /dev/null
@@ -1 +0,0 @@
-../../.agents/skills/refactor
\ No newline at end of file
diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..b9f54c3
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,6 @@
+*
+!Dockerfile
+!package.json
+!tsconfig.json
+!src/
+!src/**
diff --git a/.gcloudignore b/.gcloudignore
new file mode 100644
index 0000000..7311d1f
--- /dev/null
+++ b/.gcloudignore
@@ -0,0 +1,9 @@
+# Allowlist: only upload what the Dockerfile consumes.
+# Everything else (test fixtures, binaries, terraform, docs, .git, etc.)
+# is excluded to keep the Cloud Build context small.
+*
+!Dockerfile
+!package.json
+!tsconfig.json
+!src/
+!src/**
diff --git a/.gitignore b/.gitignore
index c5815b0..9420a0b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -23,9 +23,18 @@ npm-debug.log*
 
 # Environment
 .env
-.env.local
+.env.*
 .envrc
 
+# Cloud credentials (defensive; never commit service-account keys or similar)
+*credentials*.json
+*service-account*.json
+*-sa-key*.json
+gcp-key*.json
+
+# TypeScript incremental build cache
+*.tsbuildinfo
+
 # Test coverage
 coverage/
 
@@ -42,3 +51,6 @@ deploy/terraform.tfstate*
 skills-lock.json
 
 .private/
+
+# Playwright MCP artifact directory
+.playwright-mcp/
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4f26b8a..b88b4c3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,25 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/).
 
+## [1.2.3] - 2026-04-18
+
+### Added
+- `anthropic` and `anthropic-vertex` providers now expose `claude-opus-4-7` as the default flagship model. `claude-opus-4-6` remains selectable via `PDF_ANALYZER_MODEL`.
+- Cloud Run deployment supports all five providers (`google-vertex`, `anthropic-vertex`, `google`, `anthropic`, `openai`) via a single `PDF_ANALYZER_PROVIDER` knob; direct-API providers read their key from Secret Manager at runtime.
+
+### Fixed
+- `analyzePdf` no longer crashes when a `gs://` source falls back to chunked processing. The chunking branch now handles the `bytes` source kind via a new exhaustive `resolveSourceBytes` helper.
+- MCP Streamable HTTP: the `/mcp` route handles any method (GET/POST/DELETE) instead of POST-only. GET was previously returning 404 and causing clients to misreport "SDK auth failed".
+
+### Changed
+- Cloud Run deploys are now private by default (`--no-allow-unauthenticated`). Connect via `gcloud run services proxy` locally.
+- Cloud Build context slimmed from ~20 MiB to ~120 KiB via allowlist `.gcloudignore` / `.dockerignore`.
+- Deploy scripts (`deploy/gcloud.sh`, `deploy/main.tf`) and templates generalized for any provider + auth mode; see `deploy/README.md` for the matrix.
+- HTTP transport tests now drive the real production request handler via a new exported `createRequestHandler`; the old tests ran against an inline copy that could diverge from production.
+
+### Security
+- `.gitignore` hardened with defensive patterns for common credential, env, and build-cache leaks (`*credentials*.json`, `*service-account*.json`, `.env.*`, `*.tsbuildinfo`, etc.).
+
 ## [1.2.2] - 2026-04-08
 
 ### Added
diff --git a/CLAUDE.md b/CLAUDE.md
index b3f8c3c..5d028f3 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -23,7 +23,7 @@ git push -u origin <branch-name>  # Push and create PR
 Models per provider (do not change without discussion). Users choose during `--setup`:
 
 - **Google Gemini**: `gemini-3-flash-preview` (fast) / `gemini-3.1-pro-preview` (flagship)
-- **Anthropic Claude**: `claude-sonnet-4-6` (fast) / `claude-opus-4-6` (flagship)
+- **Anthropic Claude**: `claude-sonnet-4-6` (fast) / `claude-opus-4-7` (flagship) / `claude-opus-4-6` (previous flagship, still selectable)
 - **OpenAI**: `gpt-5.4-mini` (fast) / `gpt-5.4` (flagship)
 
 Thinking/reasoning is set to minimum for all models (document analysis doesn't benefit from extended thinking).
@@ -85,6 +85,26 @@ npm run type-check && npm run lint && npm test
 
 Always use `test/fixtures/1-pager.pdf` for MCP tool testing. It is small and cheap on LLM API calls. Never use `test/fixtures/oversized-doc.pdf` or other large PDFs unless the user gives explicit approval.
 
+## Deploying to Cloud Run
+
+The deploy scripts (`deploy/gcloud.sh` and `deploy/main.tf`) support every provider and both auth modes; which one runs is decided by `PDF_ANALYZER_PROVIDER` in `deploy/env` (gcloud) or `provider_id` in `terraform.tfvars`:
+
+- `google-vertex`, `anthropic-vertex` → ADC via attached service account, no API key required
+- `google`, `anthropic`, `openai` → API key pulled from a Secret Manager secret named in `API_KEY_SECRET_NAME` / `api_key_secret_name`
+
+See `deploy/README.md` for the full matrix, required IAM roles per provider, and the one-time `gcloud secrets create` command for the direct-API providers. The service is always deployed `--no-allow-unauthenticated` (private).
+
+### Running the remote MCP locally
+
+Because the service requires authenticated invocation, MCP clients connect through a local proxy that mints fresh identity tokens per request:
+
+```bash
+gcloud run services proxy <service-name> \
+  --project=<project-id> --region=<region> --port=8080
+```
+
+Point `.mcp.json`'s HTTP MCP entry at `http://localhost:8080/mcp`. When the proxy stops, the MCP disconnects until you start it again. No secrets live in `.mcp.json` — auth is handled per-request by the proxy against your ADC.
+
 ## Release Process
 
 Branch protection requires releases to go through a PR:
diff --git a/deploy/README.md b/deploy/README.md
index 8c4bd70..769a4e0 100644
--- a/deploy/README.md
+++ b/deploy/README.md
@@ -1,62 +1,73 @@
 # Deploying PDF Analyzer to Cloud Run
 
-This guide walks you through deploying the PDF Analyzer MCP server to Google Cloud Run. After deployment, any MCP client can connect to it over HTTP.
+This guide walks you through deploying the PDF Analyzer MCP server to Google Cloud Run. After deployment, any MCP client can connect to it over authenticated HTTP.
 
 ## What gets created
 
 | Resource | Purpose |
 |----------|---------|
 | **Cloud Run service** | Runs the MCP server, listens on `/mcp` |
-| **Service account** | Identity for the service (Vertex AI + GCS read access). Optional if you bring your own. |
+| **Service account** | Identity for the service. Roles depend on provider choice (see below). Optional if you bring your own. |
 | **Artifact Registry repo** | Stores the container image |
 
+The service is deployed **private** (`--no-allow-unauthenticated`). Callers must authenticate with a Google identity token — use `gcloud run services proxy` locally, or add your own `run.invoker` IAM bindings for specific identities.
+
+## Provider and auth matrix
+
+Pick one `PDF_ANALYZER_PROVIDER` value; the deploy scripts handle the rest.
+
+| Provider | Auth | Required config | Roles granted to Cloud Run SA |
+|---|---|---|---|
+| `google-vertex` | ADC | `VERTEX_LOCATION` | `aiplatform.user`, `storage.objectViewer` |
+| `anthropic-vertex` | ADC | `VERTEX_LOCATION` | `aiplatform.user`, `storage.objectViewer` |
+| `google` | API key | `API_KEY_SECRET_NAME` | `secretmanager.secretAccessor`, `storage.objectViewer` |
+| `anthropic` | API key | `API_KEY_SECRET_NAME` | `secretmanager.secretAccessor`, `storage.objectViewer` |
+| `openai` | API key | `API_KEY_SECRET_NAME` | `secretmanager.secretAccessor`, `storage.objectViewer` |
+
+For direct-API providers, create the Secret Manager secret once before deploying:
+
+```bash
+echo -n 'YOUR_API_KEY' | gcloud secrets create my-pdf-analyzer-key \
+  --project="$PROJECT_ID" --data-file=-
+```
+
+The secret name goes into `API_KEY_SECRET_NAME` (gcloud path) or `api_key_secret_name` (Terraform path). The deploy script grants the Cloud Run service account `secretAccessor` on that secret and injects it into the container as `PDF_ANALYZER_API_KEY` at runtime via `--set-secrets`.
+
 ## Prerequisites
 
 - A GCP project with billing enabled
 - `gcloud` CLI installed and authenticated (`gcloud auth login`)
-- These APIs will be enabled automatically by the deploy script:
-  - Vertex AI (`aiplatform.googleapis.com`)
-  - Artifact Registry (`artifactregistry.googleapis.com`)
-  - Cloud Build (`cloudbuild.googleapis.com`)
-  - Cloud Run (`run.googleapis.com`)
-  - Cloud Storage (`storage.googleapis.com`)
 
-## Finding your GCP values
-
-| Variable | Where to find it |
-|----------|-----------------|
-| `PROJECT_ID` | GCP Console top-left project dropdown, or **IAM & Admin > Settings**. Use the **ID** (e.g., `my-project-123`), not the display name. |
-| `REGION` | Your choice. [Cloud Run regions list](https://cloud.google.com/run/docs/locations). `us-central1` (Iowa) is a common low-cost default. |
-| `VERTEX_LOCATION` | Use `global` for preview models (`gemini-3-flash-preview`, `gemini-3.1-pro-preview`). Use a region like `us-central1` for GA models. |
-| `AR_REPOSITORY` | Your choice of name, or find existing ones at **Artifact Registry > Repositories**. The script creates it if it doesn't exist. |
-| `SERVICE_NAME` | Your choice of name, or find existing ones at **Cloud Run > Services**. |
-
-Only `PROJECT_ID` requires a lookup. The rest are either your choice or have sensible defaults.
+APIs are enabled automatically: `artifactregistry.googleapis.com`, `cloudbuild.googleapis.com`, `run.googleapis.com`, `storage.googleapis.com`, plus either `aiplatform.googleapis.com` (Vertex providers) or `secretmanager.googleapis.com` (direct-API providers).
 
 ## Option A: Deploy with gcloud CLI
 
 1. Copy the config template and fill in your values:
 
-```bash
-cp deploy/env.example deploy/env
-# Edit deploy/env with your project ID and region
-```
+   ```bash
+   cp deploy/env.example deploy/env
+   # Edit deploy/env: set PROJECT_ID, REGION, PDF_ANALYZER_PROVIDER,
+   # and (if using a direct-API provider) API_KEY_SECRET_NAME
+   ```
 
 2. Run the deploy script:
 
-```bash
-./deploy/gcloud.sh
-```
+   ```bash
+   ./deploy/gcloud.sh
+   ```
 
-The script enables APIs, creates resources (service account, Artifact Registry repo), builds the container image via Cloud Build, deploys to Cloud Run, and verifies the health endpoint.
+   The script enables APIs, creates resources (service account, Artifact Registry repo), builds the container via Cloud Build, deploys Cloud Run (private), and verifies `/health` with an ADC identity token.
 
 ### Configuration (`deploy/env`)
 
 | Variable | Required | Default | Description |
-|----------|----------|---------|-------------|
+|---|---|---|---|
 | `PROJECT_ID` | Yes | | Your GCP project ID |
 | `REGION` | Yes | | GCP region (e.g., `us-central1`) |
-| `VERTEX_LOCATION` | No | `global` | Vertex AI endpoint location (see note below) |
+| `PDF_ANALYZER_PROVIDER` | Yes | | `google`, `google-vertex`, `anthropic`, `anthropic-vertex`, or `openai` |
+| `VERTEX_LOCATION` | Vertex only | `global` | Vertex AI endpoint location |
+| `API_KEY_SECRET_NAME` | API-key only | | Name of the Secret Manager secret holding the provider API key |
+| `PDF_ANALYZER_MODEL` | No | provider default | Pin a specific model |
 | `AR_REPOSITORY` | No | `pdf-analyzer` | Artifact Registry repository name |
 | `SERVICE_NAME` | No | `pdf-analyzer` | Cloud Run service name |
 | `CREATE_SA` | No | `true` | Set `false` to use an existing service account |
@@ -65,179 +76,118 @@ The script enables APIs, creates resources (service account, Artifact Registry r
 
 ## Option B: Deploy with Terraform
 
-1. Copy the config template and fill in your values:
+1. Copy the tfvars template:
 
-```bash
-cd deploy
-cp terraform.tfvars.example terraform.tfvars
-# Edit terraform.tfvars
-```
+   ```bash
+   cd deploy
+   cp terraform.tfvars.example terraform.tfvars
+   # Edit: set project_id, provider_id, and (if direct-API) api_key_secret_name
+   ```
 
 2. Initialize and build the container image (Terraform does not run Cloud Build):
 
-```bash
-terraform init
+   ```bash
+   terraform init
 
-gcloud builds submit \
-  --tag <region>-docker.pkg.dev/<project-id>/pdf-analyzer/pdf-analyzer:latest \
-  --project=<project-id> ..
-```
+   gcloud builds submit \
+     --tag <region>-docker.pkg.dev/<project-id>/pdf-analyzer/pdf-analyzer:latest \
+     --project=<project-id> ..
+   ```
 
 3. Apply:
 
-```bash
-terraform apply
-```
+   ```bash
+   terraform apply
+   ```
 
 ### Terraform variables (`deploy/terraform.tfvars`)
 
 | Variable | Required | Default | Description |
-|----------|----------|---------|-------------|
-| `project_id` | Yes | | Your GCP project ID |
+|---|---|---|---|
+| `project_id` | Yes | | GCP project ID |
+| `provider_id` | Yes | | Provider ID (see matrix above) |
 | `region` | No | `us-central1` | GCP region |
-| `vertex_location` | No | `global` | Vertex AI endpoint location |
-| `ar_repository` | No | `pdf-analyzer` | Artifact Registry repository name |
-| `service_account_email` | No | (creates new) | Email of an existing service account to use |
-| `image` | No | auto-generated | Container image URI |
+| `vertex_location` | Vertex only | `global` | Vertex AI endpoint |
+| `api_key_secret_name` | API-key only | | Secret Manager secret name |
+| `model_id` | No | provider default | Pin a specific model |
+| `ar_repository` | No | `pdf-analyzer` | Artifact Registry repository |
+| `service_account_email` | No | (creates new) | Use an existing service account |
+| `image` | No | auto | Container image URI |
 
 ### Terraform outputs
 
 | Output | Description |
-|--------|-------------|
+|---|---|
 | `service_url` | Cloud Run service URL |
-| `mcp_endpoint` | Full MCP endpoint URL for client config |
+| `mcp_endpoint` | MCP endpoint URL (requires authenticated invocation) |
 | `service_account` | Service account email |
 
-## Service account and IAM
-
-The Cloud Run service needs a service account with two IAM roles:
-
-| Role | Why |
-|------|-----|
-| `roles/aiplatform.user` | Call Vertex AI Gemini models |
-| `roles/storage.objectViewer` | Read PDFs from GCS buckets (when using gs:// URLs) |
-
-**Option 1: Let the script create one** (default). Both the gcloud script and Terraform config create a `pdf-analyzer` service account and grant these roles automatically.
-
-**Option 2: Bring your own.** If you already have a service account with the right permissions:
-
-- **gcloud**: Set `CREATE_SA=false` and `SA_EMAIL=your-sa@project.iam.gserviceaccount.com` in `deploy/env`
-- **Terraform**: Set `service_account_email = "your-sa@project.iam.gserviceaccount.com"` in `deploy/terraform.tfvars`
-
-## Environment variables
-
-The Cloud Run service is configured with these env vars:
-
-| Variable | Value | Purpose |
-|----------|-------|---------|
-| `PDF_ANALYZER_PROVIDER` | `google-vertex` | Use Gemini via Vertex AI (ADC auth, no API key needed) |
-| `VERTEX_PROJECT` | your project ID | Which GCP project to call Vertex AI in |
-| `VERTEX_LOCATION` | `global` | Vertex AI endpoint (see note below) |
-| `PORT` | `8080` (set in Dockerfile) | Triggers HTTP mode instead of stdio |
-
-### PDF sources
-
-The deployed server accepts these PDF sources in `analyze_pdf`:
-
-- **Public web URLs**: `https://example.com/doc.pdf`
-- **GCS URIs**: `gs://my-bucket/doc.pdf` (requires `roles/storage.objectViewer` on the bucket)
-
-Local file paths only work with the stdio transport (local development).
-
-### Using Anthropic Claude via Vertex AI
-
-To use Claude models instead of Gemini, change the provider:
-
-```
-PDF_ANALYZER_PROVIDER=anthropic-vertex
-```
-
-This uses Anthropic Claude models routed through Vertex AI. Same service account, same ADC auth. No Anthropic API key needed.
+## Connecting an MCP client
 
-### Vertex AI location: why "global"?
-
-Preview models like `gemini-3-flash-preview` and `gemini-3.1-pro-preview` are only available on the [global endpoint](https://docs.cloud.google.com/vertex-ai/generative-ai/docs/learn/locations). If you switch to GA models (e.g., `gemini-2.5-flash`), you can use a regional location like `us-central1` instead.
-
-## Request timeout and memory
-
-The deploy scripts configure Cloud Run with a **15-minute request timeout** and **4 GiB memory**. This is needed because large PDFs (100+ pages) are sent inline to Vertex AI and may require chunking into multiple sequential API calls. The PDF bytes, base64 encoding, and V8 heap overhead can exceed 1 GiB for large documents.
-
-To adjust after deployment:
+Because the service is private, MCP clients can't hit the Cloud Run URL directly — identity tokens expire hourly and most clients can't mint them. The supported pattern is a local authenticated proxy:
 
 ```bash
-gcloud run services update pdf-analyzer \
-  --timeout=900 \
-  --memory=4Gi \
-  --project=<project-id> --region=<region>
+gcloud run services proxy pdf-analyzer \
+  --project=<project-id> --region=<region> --port=8080
 ```
 
-The maximum Cloud Run timeout is 3600 seconds (60 minutes). If you're analyzing very large documents and hitting timeouts, increase it. Memory can be bumped to `8Gi` if needed for exceptionally large PDFs.
-
-## Connecting MCP clients
-
-After deployment, add the HTTP MCP server to your client config.
-
-### Claude Code
+Point your MCP client at `http://localhost:8080/mcp`. The proxy forwards each request to Cloud Run with a fresh identity token from your ADC.
 
-```bash
-claude mcp add pdf-analyzer --transport http https://<your-service-url>/mcp
-```
-
-### JSON config (Claude Code, VS Code, etc.)
+Claude Code `.mcp.json` example:
 
 ```json
 {
   "mcpServers": {
     "pdf-analyzer": {
-      "type": "url",
-      "url": "https://<your-service-url>/mcp"
+      "type": "http",
+      "url": "http://localhost:8080/mcp"
     }
   }
 }
 ```
 
+## PDF sources accepted by the deployed server
+
+- **Public web URLs**: `https://example.com/doc.pdf`
+- **GCS URIs**: `gs://my-bucket/doc.pdf` (requires `roles/storage.objectViewer` on the bucket)
+
+Local file paths only work with the stdio transport (local development).
+
 ## Verifying the deployment
 
-Health check:
+Health check (needs a Google identity token because the service is private):
 
 ```bash
-curl https://<your-service-url>/health
+TOKEN="$(gcloud auth print-identity-token)"
+curl -H "Authorization: Bearer $TOKEN" https://<service-url>/health
 # Expected: ok
 ```
 
-MCP initialize:
+MCP initialize via the local proxy:
 
 ```bash
-curl -X POST https://<your-service-url>/mcp \
+gcloud run services proxy pdf-analyzer \
+  --project=<project-id> --region=<region> --port=8080 &
+
+curl -X POST http://localhost:8080/mcp \
   -H "Content-Type: application/json" \
   -H "Accept: application/json, text/event-stream" \
   -d '{"jsonrpc":"2.0","id":1,"method":"initialize","params":{"protocolVersion":"2025-03-26","capabilities":{},"clientInfo":{"name":"test","version":"1.0"}}}'
 ```
 
-Full E2E test suite (from repo root):
+## Updating
 
-```bash
-CLOUD_RUN_URL=https://<your-service-url> npx tsx test/test-e2e-cloud-run.ts
-```
+Re-running `./deploy/gcloud.sh` (or `terraform apply`) rebuilds and rolls out a new revision with the current config.
 
-## Updating
+## Request timeout and memory
 
-To deploy a new version after code changes:
+The deploy scripts configure Cloud Run with a **15-minute request timeout** and **4 GiB memory**. Large PDFs (100+ pages) are sent inline and may be chunked into multiple sequential model calls; PDF bytes + base64 + V8 heap overhead can exceed 1 GiB for large documents.
 
-```bash
-# Rebuild and push
-gcloud builds submit \
-  --tag <region>-docker.pkg.dev/<project-id>/pdf-analyzer/pdf-analyzer:latest \
-  --project=<project-id>
-
-# Deploy new revision
-gcloud run deploy pdf-analyzer \
-  --image <region>-docker.pkg.dev/<project-id>/pdf-analyzer/pdf-analyzer:latest \
-  --project=<project-id> --region=<region> --quiet
-```
+Cloud Run's max timeout is 3600 seconds. Memory can be bumped to `8Gi` if needed.
 
 ## Cost considerations
 
-- **Cloud Run**: Pay per request. Scales to zero when idle (no cost when not in use).
-- **Vertex AI**: Pay per token. Gemini 3 Flash is significantly cheaper than Gemini 3.1 Pro.
-- **Artifact Registry**: Minimal cost for container image storage.
+- **Cloud Run**: pay per request, scales to zero when idle.
+- **Model provider**: Vertex and direct-API billing are separate lines (Vertex bills against GCP; direct API bills against the provider account). Token costs depend on the model.
+- **Artifact Registry**: minimal cost for container image storage.
+- **Secret Manager** (direct-API path): a few cents per 10k accesses.
diff --git a/deploy/env.example b/deploy/env.example
index 875b2e5..fbff71f 100644
--- a/deploy/env.example
+++ b/deploy/env.example
@@ -3,30 +3,89 @@
 #
 #   cp deploy/env.example deploy/env
 
-# Required: your GCP project ID
+# ---------------------------------------------------------------------------
+# Required: GCP project + region
+# ---------------------------------------------------------------------------
+
+# Your GCP project ID.
 PROJECT_ID="my-project-123"
 
-# Required: GCP region for Cloud Run, Artifact Registry, and GCS
+# GCP region for Cloud Run, Artifact Registry, and the Cloud Build job.
 REGION="us-central1"
 
-# Optional: Vertex AI endpoint location.
-# Use "global" for preview models (gemini-3-flash-preview, gemini-3.1-pro-preview).
-# Use a region like "us-central1" for GA models (gemini-2.5-flash).
-# VERTEX_LOCATION=global
+# ---------------------------------------------------------------------------
+# Required: which LLM provider the service should use
+# ---------------------------------------------------------------------------
+#
+# Pick one. Two auth modes are supported automatically based on the choice:
+#
+#   Vertex providers (ADC auth, no API key):
+#     google-vertex       Gemini via Vertex AI
+#     anthropic-vertex    Anthropic Claude via Vertex AI
+#
+#   Direct API providers (API key from Secret Manager):
+#     google              Gemini Developer API
+#     anthropic           Anthropic direct API
+#     openai              OpenAI API
+#
+PDF_ANALYZER_PROVIDER="google-vertex"
 
-# Optional: Artifact Registry repository name (default: "pdf-analyzer").
-# Set this if you want to use an existing repository.
-# AR_REPOSITORY=my-docker-repo
+# ---------------------------------------------------------------------------
+# Vertex providers only (ignored for direct API providers)
+# ---------------------------------------------------------------------------
 
-# Optional: Cloud Run service name (default: "pdf-analyzer").
-# SERVICE_NAME=pdf-analyzer
+# Vertex AI endpoint location. "global" is recommended where supported.
+# Use a specific region (e.g. "us-central1") when a model isn't on global.
+VERTEX_LOCATION="global"
 
-# Optional: use an existing service account instead of creating one.
-# Set CREATE_SA=false and SA_EMAIL to the full email address.
-# The service account must have roles/aiplatform.user and roles/storage.objectViewer.
-# CREATE_SA=false
-# SA_EMAIL=my-sa@my-project-123.iam.gserviceaccount.com
+# ---------------------------------------------------------------------------
+# Direct API providers only (ignored for Vertex providers)
+# ---------------------------------------------------------------------------
+
+# Name of the Secret Manager secret in PROJECT_ID that holds the provider's
+# API key. The deploy script grants the Cloud Run service account access
+# and injects the key into the container as PDF_ANALYZER_API_KEY.
+#
+# Create the secret once before deploying:
+#
+#   echo -n 'YOUR_API_KEY' | gcloud secrets create my-pdf-analyzer-key \
+#     --project="$PROJECT_ID" --data-file=-
+#
+# API_KEY_SECRET_NAME="my-pdf-analyzer-key"
 
-# Optional: service account name for auto-created SA (default: "pdf-analyzer").
+# ---------------------------------------------------------------------------
+# Optional: pin a specific model (uses provider default if unset)
+# ---------------------------------------------------------------------------
+#
+# Examples:
+#   google-vertex    / google   → "gemini-3-flash-preview" | "gemini-3.1-pro-preview"
+#   anthropic-vertex / anthropic → "claude-sonnet-4-6" | "claude-opus-4-6" | "claude-opus-4-7"
+#   openai                       → "gpt-5.4-mini" | "gpt-5.4"
+#
+# PDF_ANALYZER_MODEL="gemini-3.1-pro-preview"
+
+# ---------------------------------------------------------------------------
+# Optional: infrastructure naming
+# ---------------------------------------------------------------------------
+
+# Artifact Registry repository name (default: "pdf-analyzer").
+# AR_REPOSITORY="pdf-analyzer"
+
+# Cloud Run service name (default: "pdf-analyzer").
+# SERVICE_NAME="pdf-analyzer"
+
+# Service account name for auto-created SA (default: "pdf-analyzer").
 # Only used when CREATE_SA=true (the default).
-# SA_NAME=pdf-analyzer
+# SA_NAME="pdf-analyzer"
+
+# ---------------------------------------------------------------------------
+# Optional: bring your own service account
+# ---------------------------------------------------------------------------
+# Set CREATE_SA=false and SA_EMAIL to the full email address.
+# Required roles on the SA:
+#   - roles/storage.objectViewer (all providers, for gs:// PDF sources)
+#   - roles/aiplatform.user (Vertex providers only)
+#   - roles/secretmanager.secretAccessor on API_KEY_SECRET_NAME (direct-API providers only)
+#
+# CREATE_SA=false
+# SA_EMAIL="my-sa@my-project-123.iam.gserviceaccount.com"
diff --git a/deploy/gcloud.sh b/deploy/gcloud.sh
index 4e85805..44cb19f 100755
--- a/deploy/gcloud.sh
+++ b/deploy/gcloud.sh
@@ -1,16 +1,29 @@
 #!/usr/bin/env bash
 #
-# Deploy pdf-analyzer to Cloud Run using gcloud CLI.
+# Deploy pdf-analyzer to Cloud Run.
 #
-# Configuration is read from deploy/env (see deploy/env.example).
-# The script creates resources that don't exist yet and skips ones that do.
+# Configuration is read from deploy/env (see deploy/env.example). The script
+# supports all PDF_ANALYZER_PROVIDER values:
+#
+#   Vertex providers (ADC auth, no API key):
+#     - google-vertex       Gemini via Vertex AI
+#     - anthropic-vertex    Claude via Vertex AI
+#
+#   Direct API providers (API key from Secret Manager):
+#     - google              Gemini Developer API
+#     - anthropic           Anthropic direct API
+#     - openai              OpenAI API
 #
 # Prerequisites:
 #   - gcloud CLI installed and authenticated (gcloud auth login)
 #   - A GCP project with billing enabled
-#   - Copy deploy/env.example to deploy/env and fill in your values
+#   - For direct-API providers: a Secret Manager secret containing the API key.
+#     Create it once with:
+#       echo -n 'YOUR_KEY' | gcloud secrets create <name> \
+#         --project=<project> --data-file=-
 #
 # Usage:
+#   cp deploy/env.example deploy/env   # edit with your values
 #   ./deploy/gcloud.sh
 
 set -euo pipefail
@@ -27,9 +40,29 @@ fi
 # shellcheck source=/dev/null
 source "${ENV_FILE}"
 
-# Validate required variables
+# Required config
 : "${PROJECT_ID:?PROJECT_ID is required in deploy/env}"
 : "${REGION:?REGION is required in deploy/env}"
+: "${PDF_ANALYZER_PROVIDER:?PDF_ANALYZER_PROVIDER is required in deploy/env (see deploy/env.example for valid values)}"
+
+# Classify provider: vertex providers use ADC, api-key providers pull from Secret Manager.
+case "${PDF_ANALYZER_PROVIDER}" in
+  google-vertex|anthropic-vertex)
+    PROVIDER_AUTH="vertex"
+    ;;
+  google|anthropic|openai)
+    PROVIDER_AUTH="apikey"
+    ;;
+  *)
+    echo "Error: unknown PDF_ANALYZER_PROVIDER='${PDF_ANALYZER_PROVIDER}'."
+    echo "Valid values: google, google-vertex, anthropic, anthropic-vertex, openai"
+    exit 1
+    ;;
+esac
+
+if [[ "${PROVIDER_AUTH}" == "apikey" ]]; then
+  : "${API_KEY_SECRET_NAME:?API_KEY_SECRET_NAME is required for provider ${PDF_ANALYZER_PROVIDER}. Create a Secret Manager secret with your API key and set API_KEY_SECRET_NAME in deploy/env.}"
+fi
 
 # Defaults
 SERVICE_NAME="${SERVICE_NAME:-pdf-analyzer}"
@@ -38,12 +71,20 @@ AR_REPOSITORY="${AR_REPOSITORY:-${SERVICE_NAME}}"
 IMAGE="${REGION}-docker.pkg.dev/${PROJECT_ID}/${AR_REPOSITORY}/${SERVICE_NAME}:latest"
 CREATE_SA="${CREATE_SA:-true}"
 SA_NAME="${SA_NAME:-pdf-analyzer}"
-# If SA_EMAIL is set in env, use it directly (existing service account).
-# Otherwise, derive from SA_NAME.
+# If SA_EMAIL is set, use it directly. Otherwise, derive from SA_NAME.
 SA_EMAIL="${SA_EMAIL:-${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com}"
 
 echo "==> Project:    ${PROJECT_ID}"
 echo "==> Region:     ${REGION}"
+echo "==> Provider:   ${PDF_ANALYZER_PROVIDER} (${PROVIDER_AUTH})"
+if [[ "${PROVIDER_AUTH}" == "vertex" ]]; then
+  echo "==> Location:   ${VERTEX_LOCATION}"
+else
+  echo "==> Secret:     ${API_KEY_SECRET_NAME}"
+fi
+if [[ -n "${PDF_ANALYZER_MODEL:-}" ]]; then
+  echo "==> Model:      ${PDF_ANALYZER_MODEL}"
+fi
 echo "==> SA:         ${SA_EMAIL}"
 echo "==> Repository: ${AR_REPOSITORY}"
 echo "==> Image:      ${IMAGE}"
@@ -51,13 +92,18 @@ echo ""
 
 # ---- Enable required APIs ----
 echo "==> Enabling APIs..."
-gcloud services enable \
-  aiplatform.googleapis.com \
-  artifactregistry.googleapis.com \
-  cloudbuild.googleapis.com \
-  run.googleapis.com \
-  storage.googleapis.com \
-  --project="${PROJECT_ID}" --quiet
+APIS=(
+  artifactregistry.googleapis.com
+  cloudbuild.googleapis.com
+  run.googleapis.com
+  storage.googleapis.com
+)
+if [[ "${PROVIDER_AUTH}" == "vertex" ]]; then
+  APIS+=(aiplatform.googleapis.com)
+else
+  APIS+=(secretmanager.googleapis.com)
+fi
+gcloud services enable "${APIS[@]}" --project="${PROJECT_ID}" --quiet
 
 # ---- Artifact Registry ----
 echo "==> Creating Artifact Registry repository (if needed)..."
@@ -75,14 +121,44 @@ if [[ "${CREATE_SA}" == "true" ]]; then
       --project="${PROJECT_ID}" --display-name="PDF Analyzer MCP Server"
 
   echo "==> Granting IAM roles..."
-  for ROLE in roles/aiplatform.user roles/storage.objectViewer; do
+  # All providers need GCS read for gs:// PDF sources.
+  ROLES=(roles/storage.objectViewer)
+  if [[ "${PROVIDER_AUTH}" == "vertex" ]]; then
+    ROLES+=(roles/aiplatform.user)
+  fi
+  for ROLE in "${ROLES[@]}"; do
     gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
       --member="serviceAccount:${SA_EMAIL}" \
       --role="${ROLE}" --condition=None --quiet > /dev/null
   done
 else
   echo "==> Using existing service account: ${SA_EMAIL}"
-  echo "    Make sure it has roles/aiplatform.user and roles/storage.objectViewer"
+  echo "    Required roles: roles/storage.objectViewer"
+  if [[ "${PROVIDER_AUTH}" == "vertex" ]]; then
+    echo "                    roles/aiplatform.user"
+  else
+    echo "                    roles/secretmanager.secretAccessor on ${API_KEY_SECRET_NAME}"
+  fi
+fi
+
+# ---- Secret Manager access (api-key providers only) ----
+if [[ "${PROVIDER_AUTH}" == "apikey" ]]; then
+  echo "==> Verifying secret ${API_KEY_SECRET_NAME} exists..."
+  if ! gcloud secrets describe "${API_KEY_SECRET_NAME}" --project="${PROJECT_ID}" &>/dev/null; then
+    echo ""
+    echo "Error: Secret '${API_KEY_SECRET_NAME}' not found in project ${PROJECT_ID}."
+    echo "Create it once with your provider API key:"
+    echo ""
+    echo "  echo -n 'YOUR_API_KEY' | gcloud secrets create ${API_KEY_SECRET_NAME} \\"
+    echo "    --project=${PROJECT_ID} --data-file=-"
+    echo ""
+    exit 1
+  fi
+  echo "==> Granting secretAccessor on ${API_KEY_SECRET_NAME}..."
+  gcloud secrets add-iam-policy-binding "${API_KEY_SECRET_NAME}" \
+    --project="${PROJECT_ID}" \
+    --member="serviceAccount:${SA_EMAIL}" \
+    --role=roles/secretmanager.secretAccessor --quiet > /dev/null
 fi
 
 # ---- Build container image ----
@@ -93,17 +169,34 @@ gcloud builds submit \
 
 # ---- Deploy to Cloud Run ----
 echo "==> Deploying to Cloud Run..."
-gcloud run deploy "${SERVICE_NAME}" \
-  --image "${IMAGE}" \
-  --project="${PROJECT_ID}" \
-  --platform managed \
-  --region "${REGION}" \
-  --set-env-vars "PDF_ANALYZER_PROVIDER=google-vertex,VERTEX_PROJECT=${PROJECT_ID},VERTEX_LOCATION=${VERTEX_LOCATION}" \
-  --service-account "${SA_EMAIL}" \
-  --timeout=900 \
-  --memory=4Gi \
-  --allow-unauthenticated \
+
+# Env vars to pass to the service.
+ENV_VARS="PDF_ANALYZER_PROVIDER=${PDF_ANALYZER_PROVIDER}"
+if [[ "${PROVIDER_AUTH}" == "vertex" ]]; then
+  ENV_VARS="${ENV_VARS},VERTEX_PROJECT=${PROJECT_ID},VERTEX_LOCATION=${VERTEX_LOCATION}"
+fi
+if [[ -n "${PDF_ANALYZER_MODEL:-}" ]]; then
+  ENV_VARS="${ENV_VARS},PDF_ANALYZER_MODEL=${PDF_ANALYZER_MODEL}"
+fi
+
+DEPLOY_ARGS=(
+  "${SERVICE_NAME}"
+  --image "${IMAGE}"
+  --project="${PROJECT_ID}"
+  --platform=managed
+  --region="${REGION}"
+  --service-account="${SA_EMAIL}"
+  --set-env-vars="${ENV_VARS}"
+  --timeout=900
+  --memory=4Gi
+  --no-allow-unauthenticated
   --quiet
+)
+if [[ "${PROVIDER_AUTH}" == "apikey" ]]; then
+  DEPLOY_ARGS+=(--set-secrets="PDF_ANALYZER_API_KEY=${API_KEY_SECRET_NAME}:latest")
+fi
+
+gcloud run deploy "${DEPLOY_ARGS[@]}"
 
 # ---- Verify ----
 SERVICE_URL=$(gcloud run services describe "${SERVICE_NAME}" \
@@ -111,23 +204,25 @@ SERVICE_URL=$(gcloud run services describe "${SERVICE_NAME}" \
 
 echo ""
 echo "==> Verifying health..."
-curl -sf "${SERVICE_URL}/health" && echo " OK"
+ID_TOKEN="$(gcloud auth print-identity-token 2>/dev/null || true)"
+if [[ -n "${ID_TOKEN}" ]]; then
+  curl -sf -H "Authorization: Bearer ${ID_TOKEN}" "${SERVICE_URL}/health" && echo " OK"
+else
+  echo " (skipped: could not mint identity token; run \`gcloud auth login\` first)"
+fi
 
 echo ""
 echo "==================================="
 echo "Deployment complete!"
 echo ""
-echo "Service URL: ${SERVICE_URL}"
+echo "Service URL:  ${SERVICE_URL}"
 echo "MCP endpoint: ${SERVICE_URL}/mcp"
 echo ""
-echo "Add to your MCP client config:"
+echo "The service is private (--no-allow-unauthenticated)."
+echo "To call it from an MCP client, run the gcloud proxy locally:"
+echo ""
+echo "  gcloud run services proxy ${SERVICE_NAME} \\"
+echo "    --project=${PROJECT_ID} --region=${REGION} --port=8080"
 echo ""
-echo "  {"
-echo "    \"mcpServers\": {"
-echo "      \"pdf-analyzer\": {"
-echo "        \"type\": \"url\","
-echo "        \"url\": \"${SERVICE_URL}/mcp\""
-echo "      }"
-echo "    }"
-echo "  }"
+echo "Then point your MCP client at http://localhost:8080/mcp"
 echo "==================================="
diff --git a/deploy/main.tf b/deploy/main.tf
index 9d001f2..3648ea8 100644
--- a/deploy/main.tf
+++ b/deploy/main.tf
@@ -11,6 +11,10 @@
 #   - gcloud CLI authenticated (for building the container image)
 #   - Container image must be built before applying:
 #       gcloud builds submit --tag <region>-docker.pkg.dev/<project>/pdf-analyzer/pdf-analyzer:latest ..
+#   - For direct-API providers (google, anthropic, openai): a Secret Manager
+#     secret containing the API key must already exist. Create once with:
+#       echo -n 'YOUR_KEY' | gcloud secrets create <name> \
+#         --project=<project> --data-file=-
 
 terraform {
   required_version = ">= 1.5"
@@ -32,25 +36,47 @@ variable "project_id" {
 }
 
 variable "region" {
-  description = "GCP region for Cloud Run, Artifact Registry, and GCS"
+  description = "GCP region for Cloud Run, Artifact Registry, and Cloud Build"
   type        = string
   default     = "us-central1"
 }
 
+variable "provider_id" {
+  description = "PDF_ANALYZER_PROVIDER. One of: google, google-vertex, anthropic, anthropic-vertex, openai."
+  type        = string
+
+  validation {
+    condition     = contains(["google", "google-vertex", "anthropic", "anthropic-vertex", "openai"], var.provider_id)
+    error_message = "provider_id must be one of: google, google-vertex, anthropic, anthropic-vertex, openai."
+  }
+}
+
+variable "model_id" {
+  description = "Optional: pin a specific model. Leave empty to use the provider's default."
+  type        = string
+  default     = ""
+}
+
 variable "vertex_location" {
-  description = "Vertex AI endpoint location. Use 'global' for preview models."
+  description = "Vertex AI endpoint location (used when provider_id is a *-vertex variant)."
   type        = string
   default     = "global"
 }
 
+variable "api_key_secret_name" {
+  description = "Secret Manager secret name holding the provider API key (required when provider_id is google, anthropic, or openai; ignored otherwise)."
+  type        = string
+  default     = ""
+}
+
 variable "ar_repository" {
-  description = "Artifact Registry repository name. Set to use an existing repo."
+  description = "Artifact Registry repository name."
   type        = string
   default     = "pdf-analyzer"
 }
 
 variable "image" {
-  description = "Container image URI. Build it first with gcloud builds submit. Leave empty to auto-generate from region/project/repository."
+  description = "Container image URI. Build it first with gcloud builds submit. Leave empty to auto-generate."
   type        = string
   default     = ""
 }
@@ -62,11 +88,26 @@ variable "service_account_email" {
 }
 
 locals {
-  service_name   = "pdf-analyzer"
-  sa_name        = "pdf-analyzer"
-  image          = var.image != "" ? var.image : "${var.region}-docker.pkg.dev/${var.project_id}/${var.ar_repository}/${local.service_name}:latest"
-  create_sa      = var.service_account_email == ""
-  sa_email       = local.create_sa ? google_service_account.pdf_analyzer[0].email : var.service_account_email
+  service_name = "pdf-analyzer"
+  sa_name      = "pdf-analyzer"
+  image        = var.image != "" ? var.image : "${var.region}-docker.pkg.dev/${var.project_id}/${var.ar_repository}/${local.service_name}:latest"
+  create_sa    = var.service_account_email == ""
+  sa_email     = local.create_sa ? google_service_account.pdf_analyzer[0].email : var.service_account_email
+
+  uses_vertex = contains(["google-vertex", "anthropic-vertex"], var.provider_id)
+  uses_apikey = !local.uses_vertex
+
+  # Base env vars always set on the service.
+  base_env = concat(
+    [{ name = "PDF_ANALYZER_PROVIDER", value = var.provider_id }],
+    local.uses_vertex ? [
+      { name = "VERTEX_PROJECT", value = var.project_id },
+      { name = "VERTEX_LOCATION", value = var.vertex_location },
+    ] : [],
+    var.model_id != "" ? [
+      { name = "PDF_ANALYZER_MODEL", value = var.model_id },
+    ] : [],
+  )
 }
 
 provider "google" {
@@ -74,18 +115,31 @@ provider "google" {
   region  = var.region
 }
 
+# --------------------------------------------------------------------------
+# Config validation: api-key providers must supply a secret name.
+# --------------------------------------------------------------------------
+
+check "api_key_secret_provided" {
+  assert {
+    condition     = !local.uses_apikey || var.api_key_secret_name != ""
+    error_message = "api_key_secret_name is required when provider_id is google, anthropic, or openai."
+  }
+}
+
 # --------------------------------------------------------------------------
 # APIs
 # --------------------------------------------------------------------------
 
 resource "google_project_service" "apis" {
-  for_each = toset([
-    "aiplatform.googleapis.com",
-    "artifactregistry.googleapis.com",
-    "cloudbuild.googleapis.com",
-    "run.googleapis.com",
-    "storage.googleapis.com",
-  ])
+  for_each = toset(concat(
+    [
+      "artifactregistry.googleapis.com",
+      "cloudbuild.googleapis.com",
+      "run.googleapis.com",
+      "storage.googleapis.com",
+    ],
+    local.uses_vertex ? ["aiplatform.googleapis.com"] : ["secretmanager.googleapis.com"],
+  ))
   service            = each.value
   disable_on_destroy = false
 }
@@ -113,20 +167,31 @@ resource "google_service_account" "pdf_analyzer" {
   depends_on   = [google_project_service.apis]
 }
 
-resource "google_project_iam_member" "vertex_ai_user" {
+# All providers read PDFs from GCS (gs:// URIs).
+resource "google_project_iam_member" "storage_object_viewer" {
   count   = local.create_sa ? 1 : 0
   project = var.project_id
-  role    = "roles/aiplatform.user"
+  role    = "roles/storage.objectViewer"
   member  = "serviceAccount:${local.sa_email}"
 }
 
-resource "google_project_iam_member" "storage_object_viewer" {
-  count   = local.create_sa ? 1 : 0
+# Vertex providers need aiplatform.user.
+resource "google_project_iam_member" "vertex_ai_user" {
+  count   = local.create_sa && local.uses_vertex ? 1 : 0
   project = var.project_id
-  role    = "roles/storage.objectViewer"
+  role    = "roles/aiplatform.user"
   member  = "serviceAccount:${local.sa_email}"
 }
 
+# API-key providers need read access to the specific Secret Manager secret.
+resource "google_secret_manager_secret_iam_member" "api_key_accessor" {
+  count     = local.uses_apikey && var.api_key_secret_name != "" ? 1 : 0
+  project   = var.project_id
+  secret_id = var.api_key_secret_name
+  role      = "roles/secretmanager.secretAccessor"
+  member    = "serviceAccount:${local.sa_email}"
+}
+
 # --------------------------------------------------------------------------
 # Cloud Run
 # --------------------------------------------------------------------------
@@ -148,18 +213,28 @@ resource "google_cloud_run_v2_service" "pdf_analyzer" {
 
       image = local.image
 
-      env {
-        name  = "PDF_ANALYZER_PROVIDER"
-        value = "google-vertex"
-      }
-      env {
-        name  = "VERTEX_PROJECT"
-        value = var.project_id
+      dynamic "env" {
+        for_each = local.base_env
+        content {
+          name  = env.value.name
+          value = env.value.value
+        }
       }
-      env {
-        name  = "VERTEX_LOCATION"
-        value = var.vertex_location
+
+      # API key for direct-API providers, sourced from Secret Manager at runtime.
+      dynamic "env" {
+        for_each = local.uses_apikey && var.api_key_secret_name != "" ? [1] : []
+        content {
+          name = "PDF_ANALYZER_API_KEY"
+          value_source {
+            secret_key_ref {
+              secret  = var.api_key_secret_name
+              version = "latest"
+            }
+          }
+        }
       }
+
       ports {
         container_port = 8080
       }
@@ -171,14 +246,11 @@ resource "google_cloud_run_v2_service" "pdf_analyzer" {
   ]
 }
 
-# Allow unauthenticated access (public MCP endpoint)
-resource "google_cloud_run_v2_service_iam_member" "public" {
-  project  = var.project_id
-  location = var.region
-  name     = google_cloud_run_v2_service.pdf_analyzer.name
-  role     = "roles/run.invoker"
-  member   = "allUsers"
-}
+# NOTE: This configuration does NOT grant public (allUsers) access to the
+# service. Callers must authenticate with a Google identity token. For local
+# development, use `gcloud run services proxy` to forward authenticated
+# requests to http://localhost:<port>. Add your own run.invoker IAM bindings
+# here if you have specific identities that should invoke the service.
 
 # --------------------------------------------------------------------------
 # Outputs
@@ -190,7 +262,7 @@ output "service_url" {
 }
 
 output "mcp_endpoint" {
-  description = "MCP endpoint URL for client config"
+  description = "MCP endpoint URL (requires authenticated invocation)"
   value       = "${google_cloud_run_v2_service.pdf_analyzer.uri}/mcp"
 }
 
diff --git a/deploy/terraform.tfvars.example b/deploy/terraform.tfvars.example
index 7f6c381..460d124 100644
--- a/deploy/terraform.tfvars.example
+++ b/deploy/terraform.tfvars.example
@@ -3,23 +3,60 @@
 #
 #   cp deploy/terraform.tfvars.example deploy/terraform.tfvars
 
-# Required: your GCP project ID
+# -----------------------------------------------------------------------------
+# Required
+# -----------------------------------------------------------------------------
+
+# Your GCP project ID.
 project_id = "my-project-123"
 
-# Optional: GCP region (default: "us-central1")
+# Provider. Pick one:
+#   google-vertex     Gemini via Vertex AI (ADC auth, no API key)
+#   anthropic-vertex  Anthropic Claude via Vertex AI (ADC auth, no API key)
+#   google            Gemini Developer API (Secret Manager key)
+#   anthropic         Anthropic direct API (Secret Manager key)
+#   openai            OpenAI API (Secret Manager key)
+provider_id = "google-vertex"
+
+# -----------------------------------------------------------------------------
+# Required only for direct-API providers (google, anthropic, openai)
+# -----------------------------------------------------------------------------
+# Name of a Secret Manager secret (in project_id) holding the API key.
+# Create it once before `terraform apply`:
+#
+#   echo -n 'YOUR_KEY' | gcloud secrets create my-pdf-analyzer-key \
+#     --project="$PROJECT" --data-file=-
+#
+# api_key_secret_name = "my-pdf-analyzer-key"
+
+# -----------------------------------------------------------------------------
+# Optional
+# -----------------------------------------------------------------------------
+
+# GCP region for Cloud Run and Artifact Registry (default: "us-central1").
 # region = "us-central1"
 
-# Optional: Vertex AI endpoint location (default: "global").
-# Use "global" for preview models (gemini-3-flash-preview, gemini-3.1-pro-preview).
-# Use a region like "us-central1" for GA models (gemini-2.5-flash).
+# Vertex AI endpoint location (ignored for non-Vertex providers).
+# "global" is recommended where supported; use a region like "us-central1"
+# or "us-east5" if a model isn't on the global endpoint.
 # vertex_location = "global"
 
-# Optional: Artifact Registry repository name (default: "pdf-analyzer").
-# ar_repository = "pdf-analyzer"
+# Pin a specific model. Leave empty to use the provider's default.
+# Examples:
+#   google-vertex    / google   → "gemini-3.1-pro-preview"
+#   anthropic-vertex / anthropic → "claude-opus-4-7"
+#   openai                       → "gpt-5.4"
+# model_id = ""
 
-# Optional: use an existing service account instead of creating one.
-# The service account must have roles/aiplatform.user and roles/storage.objectViewer.
-# service_account_email = "my-sa@my-project-123.iam.gserviceaccount.com"
+# Artifact Registry repository name (default: "pdf-analyzer").
+# ar_repository = "pdf-analyzer"
 
-# Optional: container image URI. Leave empty to auto-generate.
+# Container image URI. Leave empty to auto-generate from region/project/repository.
 # image = "us-central1-docker.pkg.dev/my-project/pdf-analyzer/pdf-analyzer:latest"
+
+# Use an existing service account instead of creating one.
+# Required roles on the SA:
+#   - roles/storage.objectViewer (all providers)
+#   - roles/aiplatform.user (Vertex providers only)
+#   - roles/secretmanager.secretAccessor on api_key_secret_name (direct-API providers only)
+# service_account_email = "my-sa@my-project-123.iam.gserviceaccount.com"
diff --git a/package.json b/package.json
index 0c644ff..f1ab4d9 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@intelligentelectron/pdf-analyzer",
-  "version": "1.2.2",
+  "version": "1.2.3",
   "description": "MCP server for analyzing PDF documents using AI (Google Gemini, Anthropic Claude, OpenAI)",
   "type": "module",
   "main": "dist/index.js",
diff --git a/src/providers/anthropic-vertex.test.ts b/src/providers/anthropic-vertex.test.ts
index 2c1d731..4a31b93 100644
--- a/src/providers/anthropic-vertex.test.ts
+++ b/src/providers/anthropic-vertex.test.ts
@@ -15,8 +15,8 @@ describe("anthropicVertexProvider", () => {
     expect(anthropicVertexProvider.apiKeyUrl).toBe("");
   });
 
-  it("defaults to claude-opus-4-6", () => {
-    expect(anthropicVertexProvider.defaultModel).toBe("claude-opus-4-6");
+  it("defaults to claude-opus-4-7", () => {
+    expect(anthropicVertexProvider.defaultModel).toBe("claude-opus-4-7");
   });
 });
 
diff --git a/src/providers/anthropic-vertex.ts b/src/providers/anthropic-vertex.ts
index c343a33..9a5a466 100644
--- a/src/providers/anthropic-vertex.ts
+++ b/src/providers/anthropic-vertex.ts
@@ -15,10 +15,11 @@ const MODELS: ModelOption[] = [
     displayName: "Claude Sonnet 4.6",
     hint: "Fast and cost-effective",
   },
-  { id: "claude-opus-4-6", displayName: "Claude Opus 4.6", hint: "Best and most expensive" },
+  { id: "claude-opus-4-6", displayName: "Claude Opus 4.6", hint: "Previous flagship" },
+  { id: "claude-opus-4-7", displayName: "Claude Opus 4.7", hint: "Best and most expensive" },
 ];
 
-const DEFAULT_MODEL = "claude-opus-4-6";
+const DEFAULT_MODEL = "claude-opus-4-7";
 
 function getProject(): string {
   const p = process.env.VERTEX_PROJECT;
diff --git a/src/providers/anthropic.ts b/src/providers/anthropic.ts
index caa4bb2..b092e6b 100644
--- a/src/providers/anthropic.ts
+++ b/src/providers/anthropic.ts
@@ -15,10 +15,11 @@ const MODELS: ModelOption[] = [
     displayName: "Claude Sonnet 4.6",
     hint: "Fast and cost-effective",
   },
-  { id: "claude-opus-4-6", displayName: "Claude Opus 4.6", hint: "Best and most expensive" },
+  { id: "claude-opus-4-6", displayName: "Claude Opus 4.6", hint: "Previous flagship" },
+  { id: "claude-opus-4-7", displayName: "Claude Opus 4.7", hint: "Best and most expensive" },
 ];
 
-const DEFAULT_MODEL = "claude-opus-4-6";
+const DEFAULT_MODEL = "claude-opus-4-7";
 
 /**
  * Prepare a PDF source for Anthropic by reading bytes inline.
diff --git a/src/service.test.ts b/src/service.test.ts
index 48885b7..dd6555a 100644
--- a/src/service.test.ts
+++ b/src/service.test.ts
@@ -1,5 +1,12 @@
 import { describe, it, expect } from "vitest";
-import { isGeminiFileUri, isUrl, validateLocalPath, classifySource } from "./service.js";
+import { readFileSync } from "node:fs";
+import {
+  isGeminiFileUri,
+  isUrl,
+  validateLocalPath,
+  classifySource,
+  resolveSourceBytes,
+} from "./service.js";
 import { AnalyzePdfInputSchema } from "./types.js";
 
 describe("isGeminiFileUri", () => {
@@ -161,3 +168,23 @@ describe("classifySource", () => {
     expect(result).toEqual({ kind: "path", path: "/tmp/doc.pdf" });
   });
 });
+
+describe("resolveSourceBytes", () => {
+  // Regression: gs:// sources get converted to { kind: "bytes" } before the
+  // chunking fallback. Before the fix, the fallback cast to { kind: "path" }
+  // and crashed with "Cannot read properties of undefined (reading 'trim')".
+  it("returns the same bytes for kind: bytes", async () => {
+    const bytes = new Uint8Array([0x25, 0x50, 0x44, 0x46]); // "%PDF"
+    const out = await resolveSourceBytes({ kind: "bytes", bytes });
+    expect(out).toBe(bytes);
+  });
+
+  it("reads file contents for kind: path", async () => {
+    const path = process.cwd() + "/test/fixtures/1-pager.pdf";
+    const expected = readFileSync(path);
+    const out = await resolveSourceBytes({ kind: "path", path });
+    expect(out.byteLength).toBe(expected.byteLength);
+    expect(out[0]).toBe(0x25); // "%"
+    expect(out[1]).toBe(0x50); // "P"
+  });
+});
diff --git a/src/service.ts b/src/service.ts
index 67903f3..aca884c 100644
--- a/src/service.ts
+++ b/src/service.ts
@@ -286,6 +286,24 @@ async function downloadFromGcs(gcsUri: string): Promise<Uint8Array> {
   return new Uint8Array(buffer);
 }
 
+/**
+ * Resolve a non-cached PdfSource to raw bytes for chunking.
+ * The exhaustive switch catches at compile time if a new source kind is added
+ * without being handled here.
+ */
+export async function resolveSourceBytes(
+  source: Exclude<PdfSource, { kind: "cachedUri" }>
+): Promise<Uint8Array> {
+  switch (source.kind) {
+    case "url":
+      return new Uint8Array(await fetchPdfFromUrl(source.url));
+    case "bytes":
+      return source.bytes;
+    case "path":
+      return new Uint8Array(readPdfBytes(source.path));
+  }
+}
+
 /**
  * Classify a PDF source string into a typed PdfSource union.
  */
@@ -351,13 +369,10 @@ export async function analyzePdf(
     );
   }
 
-  // Token limit exceeded, read bytes, split into chunks, and process via work queue
-  // At this point source is "path" or "url" (cachedUri was handled above)
-  const pdfBytes =
-    source.kind === "url"
-      ? await fetchPdfFromUrl(source.url)
-      : readPdfBytes((source as { kind: "path"; path: string }).path);
-  const initialChunk = await pdfBytesToChunk(new Uint8Array(pdfBytes));
+  // Token limit exceeded, read bytes, split into chunks, and process via work queue.
+  // At this point source is "path", "url", or "bytes" (cachedUri was handled above).
+  const pdfBytes = await resolveSourceBytes(source);
+  const initialChunk = await pdfBytesToChunk(pdfBytes);
   return processChunkQueue(provider, apiKey, modelId, [initialChunk], queries, pdf_source);
 }
 
diff --git a/src/transports/http.test.ts b/src/transports/http.test.ts
index e4afee2..2625c43 100644
--- a/src/transports/http.test.ts
+++ b/src/transports/http.test.ts
@@ -2,47 +2,17 @@ import { describe, it, expect, afterEach } from "vitest";
 import { createServer as createHttpServer } from "node:http";
 import type { Server } from "node:http";
 import { createServer } from "../server.js";
+import { createRequestHandler } from "./http.js";
 
 /**
- * Helper: start the HTTP server on a random port and return the base URL + server handle.
+ * Start an HTTP server that uses the exact production request handler.
+ * Using createRequestHandler (and not a hand-rolled copy) ensures any change
+ * in production routing is reflected in these tests.
  */
 function startTestServer(): Promise<{ baseUrl: string; server: Server }> {
   return new Promise((resolve) => {
-    const httpServer = createHttpServer();
-    // Reuse startHttpServer's logic by calling it with port 0 (random)
-    // Instead, we replicate the approach: start our own to get a handle
-    httpServer.close(); // close the dummy
-
-    // We need the actual server handle. Use a workaround: start on port 0.
-    // startHttpServer doesn't return the server, so we test at integration level.
-    // Use a direct HTTP server with the same handler pattern.
-    const { StreamableHTTPServerTransport } =
-      require("@modelcontextprotocol/sdk/server/streamableHttp.js") as typeof import("@modelcontextprotocol/sdk/server/streamableHttp.js");
-
-    const server = createHttpServer(async (req, res) => {
-      if (req.method === "POST" && req.url === "/mcp") {
-        const transport = new StreamableHTTPServerTransport({
-          sessionIdGenerator: undefined,
-        });
-        const mcpServer = createServer("http");
-        res.on("close", () => {
-          transport.close();
-          mcpServer.close();
-        });
-        await mcpServer.connect(transport);
-        await transport.handleRequest(req, res);
-        return;
-      }
-
-      if (req.method === "GET" && req.url === "/health") {
-        res.writeHead(200, { "Content-Type": "text/plain" });
-        res.end("ok");
-        return;
-      }
-
-      res.writeHead(404);
-      res.end();
-    });
+    const handler = createRequestHandler(() => createServer("http"));
+    const server = createHttpServer(handler);
 
     server.listen(0, () => {
       const addr = server.address();
@@ -111,4 +81,27 @@ describe("HTTP transport", () => {
     expect(body.jsonrpc).toBe("2.0");
     expect(body.result.serverInfo.name).toBe("pdf-analyzer");
   });
+
+  // Regression: SDK clients probe GET /mcp for SSE streaming during session
+  // setup. Before the fix, the handler only matched POST /mcp and returned
+  // 404 for GET /mcp, which clients interpreted as "SDK auth failed: HTTP 404".
+  // The handler must route any method on /mcp to the SDK transport.
+  it("GET /mcp is routed to the SDK transport, not 404", async () => {
+    const { baseUrl, server } = await startTestServer();
+    testServer = server;
+
+    const res = await fetch(`${baseUrl}/mcp`, { method: "GET" });
+    // The SDK responds with 405 (method-not-allowed for stateless mode) or
+    // 400 (bad request); the key assertion is that our router does NOT drop
+    // the request to the 404 branch.
+    expect(res.status).not.toBe(404);
+  });
+
+  it("DELETE /mcp is routed to the SDK transport, not 404", async () => {
+    const { baseUrl, server } = await startTestServer();
+    testServer = server;
+
+    const res = await fetch(`${baseUrl}/mcp`, { method: "DELETE" });
+    expect(res.status).not.toBe(404);
+  });
 });
diff --git a/src/transports/http.ts b/src/transports/http.ts
index 5f3e2d5..bc69cbf 100644
--- a/src/transports/http.ts
+++ b/src/transports/http.ts
@@ -2,9 +2,9 @@
  * Streamable HTTP transport for cloud deployments.
  *
  * Creates a stateless HTTP server that handles:
- * - POST /mcp       MCP protocol (Streamable HTTP)
- * - POST /analyze   Direct REST endpoint (no MCP overhead)
- * - GET  /health    Health check
+ * - POST/GET/DELETE /mcp  MCP protocol (Streamable HTTP; SDK decides per-method behavior)
+ * - POST /analyze         Direct REST endpoint (no MCP overhead)
+ * - GET  /health          Health check
  */
 
 import { createServer as createHttpServer } from "node:http";
@@ -54,11 +54,15 @@ async function handleAnalyze(req: IncomingMessage, res: ServerResponse): Promise
 }
 
 /**
- * Start a stateless HTTP server for MCP over Streamable HTTP.
+ * Build the request handler used by both startHttpServer and the test suite.
+ * Exported so tests can drive the real production routing logic instead of
+ * replicating it (which is how the pre-fix bug slipped past our tests).
  */
-export function startHttpServer(createMcpServer: () => McpServer, port: number): void {
-  const httpServer = createHttpServer(async (req, res) => {
-    if (req.method === "POST" && req.url === "/mcp") {
+export function createRequestHandler(
+  createMcpServer: () => McpServer
+): (req: IncomingMessage, res: ServerResponse) => Promise<void> {
+  return async (req, res) => {
+    if (req.url === "/mcp") {
       const transport = new StreamableHTTPServerTransport({
         sessionIdGenerator: undefined,
       });
@@ -85,7 +89,14 @@ export function startHttpServer(createMcpServer: () => McpServer, port: number):
 
     res.writeHead(404);
     res.end();
-  });
+  };
+}
+
+/**
+ * Start a stateless HTTP server for MCP over Streamable HTTP.
+ */
+export function startHttpServer(createMcpServer: () => McpServer, port: number): void {
+  const httpServer = createHttpServer(createRequestHandler(createMcpServer));
 
   httpServer.listen(port, () => {
     console.log(`MCP server listening on port ${port}`);