Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 41 additions & 5 deletions server/src/services/budgetExtraction/providerProfiles.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,18 +87,54 @@ describe('buildRequestBody', () => {
expect(body.response_format).toEqual({ type: 'json_object' });
});

it('anthropic → response_format: json_schema with full ExtractedLine schema', () => {
it('anthropic → response_format: json_schema with strict mode + full ExtractedLine schema', () => {
const body = buildRequestBody({ ...common, provider: 'anthropic' });
assertBaseFields(body);
const rf = body.response_format as {
type: string;
json_schema: { name: string; schema: { properties: { lines: unknown } } };
json_schema: {
name: string;
strict: boolean;
schema: {
type: string;
required: string[];
additionalProperties: boolean;
properties: {
lines: {
type: string;
items: {
type: string;
required: string[];
additionalProperties: boolean;
properties: Record<string, unknown>;
};
};
};
};
};
};
expect(rf.type).toBe('json_schema');
expect(rf.json_schema.name).toBe('extracted_lines');
// Schema must allow our optional fields with type unions; we don't snapshot the full
// shape (brittle) but verify the top-level lines array is declared.
expect(rf.json_schema.schema.properties.lines).toBeDefined();
// Anthropic requires strict: true (the original strict: false produced a 400).
expect(rf.json_schema.strict).toBe(true);
// strict mode requires additionalProperties: false on every object node.
expect(rf.json_schema.schema.additionalProperties).toBe(false);
expect(rf.json_schema.schema.properties.lines.items.additionalProperties).toBe(false);
// strict mode requires EVERY property to be listed in `required` (optional
// fields use union-typed nulls).
expect(rf.json_schema.schema.properties.lines.items.required).toEqual(
expect.arrayContaining([
'description',
'quantity',
'unit',
'unitPrice',
'totalAmount',
'includesVat',
'vatRate',
'vendorName',
'confidence',
]),
);
});

it('generic → no response_format hint', () => {
Expand Down
24 changes: 22 additions & 2 deletions server/src/services/budgetExtraction/providerProfiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,17 @@ export const LLM_PROVIDERS: readonly LlmProvider[] = [
* Anthropic's OpenAI-compat layer requires this when `response_format.type`
* is `'json_schema'`.
*/
// Anthropic's OpenAI-compat layer requires `strict: true` (Input must literally
// be `true`, not `false`). OpenAI's structured-outputs spec — which Anthropic
// mirrors — imposes additional rules when `strict: true`:
// 1. `additionalProperties: false` must be set on every object schema
// 2. EVERY property must be listed in `required` (optional fields use
// union-typed nulls: `type: ['number', 'null']`)
// Our `validateExtractedLines` already tolerates null for the optional fields,
// so the LLM emitting `quantity: null` instead of omitting it is fine.
const EXTRACTED_LINES_SCHEMA = {
name: 'extracted_lines',
strict: false,
strict: true,
schema: {
type: 'object',
properties: {
Expand All @@ -54,11 +62,23 @@ const EXTRACTED_LINES_SCHEMA = {
vendorName: { type: ['string', 'null'] },
confidence: { type: 'number' },
},
required: ['description', 'totalAmount', 'confidence'],
required: [
'description',
'quantity',
'unit',
'unitPrice',
'totalAmount',
'includesVat',
'vatRate',
'vendorName',
'confidence',
],
additionalProperties: false,
},
},
},
required: ['lines'],
additionalProperties: false,
},
} as const;

Expand Down