From 489de0a084386e16cb850ddb9f7638226e97a6d1 Mon Sep 17 00:00:00 2001 From: Frank Steiler Date: Sat, 23 May 2026 17:42:17 +0200 Subject: [PATCH] fix(auto-itemize): set strict=true on Anthropic json_schema response format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anthropic's OpenAI-compat layer rejected our schema with: response_format.json_schema.strict: Input should be True Per the diagnostic captured by the new error logging (PR #1558), Anthropic requires `strict: true` on the json_schema (where OpenAI treats it as optional). With strict mode, OpenAI's structured-outputs spec — which Anthropic mirrors — also imposes: 1. additionalProperties: false on every object node 2. EVERY property must be listed in `required` (optional fields use union-typed nulls: type: ['number', 'null']) Our `validateExtractedLines` already tolerates null on the optional fields, so the LLM emitting quantity: null instead of omitting the key is fine — the validator coerces null → undefined for the runtime ExtractedLine. Test updated to assert all three strict-mode invariants. Co-Authored-By: Claude backend-developer (Haiku 4.5) --- .../budgetExtraction/providerProfiles.test.ts | 46 +++++++++++++++++-- .../budgetExtraction/providerProfiles.ts | 24 +++++++++- 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/server/src/services/budgetExtraction/providerProfiles.test.ts b/server/src/services/budgetExtraction/providerProfiles.test.ts index eb234a68a..a15125425 100644 --- a/server/src/services/budgetExtraction/providerProfiles.test.ts +++ b/server/src/services/budgetExtraction/providerProfiles.test.ts @@ -87,18 +87,54 @@ describe('buildRequestBody', () => { expect(body.response_format).toEqual({ type: 'json_object' }); }); - it('anthropic → response_format: json_schema with full ExtractedLine schema', () => { + it('anthropic → response_format: json_schema with strict mode + full ExtractedLine schema', () => { const body = buildRequestBody({ ...common, provider: 'anthropic' }); assertBaseFields(body); const rf = body.response_format as { type: string; - json_schema: { name: string; schema: { properties: { lines: unknown } } }; + json_schema: { + name: string; + strict: boolean; + schema: { + type: string; + required: string[]; + additionalProperties: boolean; + properties: { + lines: { + type: string; + items: { + type: string; + required: string[]; + additionalProperties: boolean; + properties: Record; + }; + }; + }; + }; + }; }; expect(rf.type).toBe('json_schema'); expect(rf.json_schema.name).toBe('extracted_lines'); - // Schema must allow our optional fields with type unions; we don't snapshot the full - // shape (brittle) but verify the top-level lines array is declared. - expect(rf.json_schema.schema.properties.lines).toBeDefined(); + // Anthropic requires strict: true (the original strict: false produced a 400). + expect(rf.json_schema.strict).toBe(true); + // strict mode requires additionalProperties: false on every object node. + expect(rf.json_schema.schema.additionalProperties).toBe(false); + expect(rf.json_schema.schema.properties.lines.items.additionalProperties).toBe(false); + // strict mode requires EVERY property to be listed in `required` (optional + // fields use union-typed nulls). + expect(rf.json_schema.schema.properties.lines.items.required).toEqual( + expect.arrayContaining([ + 'description', + 'quantity', + 'unit', + 'unitPrice', + 'totalAmount', + 'includesVat', + 'vatRate', + 'vendorName', + 'confidence', + ]), + ); }); it('generic → no response_format hint', () => { diff --git a/server/src/services/budgetExtraction/providerProfiles.ts b/server/src/services/budgetExtraction/providerProfiles.ts index c933002b0..9334aa9ca 100644 --- a/server/src/services/budgetExtraction/providerProfiles.ts +++ b/server/src/services/budgetExtraction/providerProfiles.ts @@ -33,9 +33,17 @@ export const LLM_PROVIDERS: readonly LlmProvider[] = [ * Anthropic's OpenAI-compat layer requires this when `response_format.type` * is `'json_schema'`. */ +// Anthropic's OpenAI-compat layer requires `strict: true` (Input must literally +// be `true`, not `false`). OpenAI's structured-outputs spec — which Anthropic +// mirrors — imposes additional rules when `strict: true`: +// 1. `additionalProperties: false` must be set on every object schema +// 2. EVERY property must be listed in `required` (optional fields use +// union-typed nulls: `type: ['number', 'null']`) +// Our `validateExtractedLines` already tolerates null for the optional fields, +// so the LLM emitting `quantity: null` instead of omitting it is fine. const EXTRACTED_LINES_SCHEMA = { name: 'extracted_lines', - strict: false, + strict: true, schema: { type: 'object', properties: { @@ -54,11 +62,23 @@ const EXTRACTED_LINES_SCHEMA = { vendorName: { type: ['string', 'null'] }, confidence: { type: 'number' }, }, - required: ['description', 'totalAmount', 'confidence'], + required: [ + 'description', + 'quantity', + 'unit', + 'unitPrice', + 'totalAmount', + 'includesVat', + 'vatRate', + 'vendorName', + 'confidence', + ], + additionalProperties: false, }, }, }, required: ['lines'], + additionalProperties: false, }, } as const;