From 4601e7bb92bd2b8f0882847082ce5787beca4b2f Mon Sep 17 00:00:00 2001
From: Jhin Lee <leehack@gmail.com>
Date: Sat, 4 Jul 2026 12:23:47 -0400
Subject: [PATCH 1/3] Add structured JSON helper APIs

---
 CHANGELOG.md                                  |    6 +
 README.md                                     |   61 +-
 lib/llamadart.dart                            |    1 +
 lib/src/core/engine/engine.dart               |   40 +
 .../models/inference/structured_output.dart   | 1105 +++++++++++++++++
 test/unit/core/engine/engine_test.dart        |   32 +
 .../inference/structured_output_test.dart     |  256 ++++
 website/docs/changelog/recent-releases.md     |    6 +
 .../docs/guides/chat-template-and-parsing.md  |    5 +
 .../docs/guides/generation-and-streaming.md   |   62 +
 10 files changed, 1569 insertions(+), 5 deletions(-)
 create mode 100644 lib/src/core/models/inference/structured_output.dart
 create mode 100644 test/unit/core/models/inference/structured_output_test.dart
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8aaf0fda..220124ca 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,9 @@
+## Unreleased
+
+* Added `LlamaStructuredOutput` and `LlamaEngine.createStructuredJson(...)`
+  helpers for strict JSON-object / JSON-schema generation with final-output
+  validation and typed decoding.
+
 ## 0.8.12
 
 * Updated the default LiteRT-LM native runtime pin to
diff --git a/README.md b/README.md
index c334b36f..392bd64b 100644
--- a/README.md
+++ b/README.md
@@ -960,7 +960,58 @@ void main() async {
 }
 ```
 
-### 2. Advanced Usage (ChatSession)
+### 2. Structured JSON Output
+
+Use `LlamaStructuredOutput` for strict JSON generation with final validation and
+typed decoding on grammar-capable backends.
+
+```dart
+class Contact {
+  Contact({required this.name, required this.email});
+
+  final String name;
+  final String email;
+
+  static Contact fromJson(Map<String, dynamic> json) {
+    return Contact(
+      name: json['name'] as String,
+      email: json['email'] as String,
+    );
+  }
+}
+
+final output = LlamaStructuredOutput<Contact>.jsonSchema(
+  schema: const {
+    'type': 'object',
+    'properties': {
+      'name': {'type': 'string'},
+      'email': {'type': 'string'},
+    },
+    'required': ['name', 'email'],
+    'additionalProperties': false,
+  },
+  decoder: Contact.fromJson,
+);
+
+final contact = await engine.createStructuredJson(
+  const [
+    LlamaChatMessage.fromText(
+      role: LlamaChatRole.user,
+      text: 'Extract a contact from: Ada Lovelace <ada@example.com>',
+    ),
+  ],
+  output: output,
+  params: const GenerationParams(temp: 0, maxTokens: 96),
+);
+```
+
+Streaming callers can pass `responseFormat: output.responseFormat` to
+`engine.create(...)` and call `parseStructuredJson(output)` after the stream
+completes. Supported schemas cover the practical JSON-schema-to-GBNF subset:
+primitive types, objects, arrays, `enum`/`const`, local `$ref`, `anyOf`,
+`oneOf`, `allOf`, string length, and array item-count bounds.
+
+### 3. Advanced Usage (ChatSession)
 
 Use `ChatSession` for most chat applications. It automatically manages conversation history, system prompts, and handles context window limits.
 
@@ -989,7 +1040,7 @@ void main() async {
 }
 ```
 
-### 3. Tool Calling
+### 4. Tool Calling
   
 `llamadart` supports intelligent tool calling where the model can use external functions to help it answer questions.
   
@@ -1026,7 +1077,7 @@ Notes:
 - Some handlers use lazy grammar activation (triggered when a tool-call prefix appears) to match llama.cpp behavior.
 - If you implement a custom handler grammar, prefer Dart raw strings (`r'''...'''`) for GBNF blocks to avoid escaping bugs.
 
-### 3.5 Template Routing (Strict llama.cpp parity)
+### 5. Template Routing (Strict llama.cpp parity)
 
 Template/render/parse routing is intentionally strict to match llama.cpp:
 
@@ -1053,7 +1104,7 @@ final result = await engine.chatTemplate(
 print(result.prompt);
 ```
 
-### 3.6 Logging Control
+### 6. Logging Control
 
 Use separate log levels for Dart and native output when debugging:
 
@@ -1072,7 +1123,7 @@ await engine.setNativeLogLevel(LlamaLogLevel.warn);
 await engine.setLogLevel(LlamaLogLevel.none);
 ```
 
-### 4. Multimodal Usage (Vision/Audio)
+### 7. Multimodal Usage (Vision/Audio)
 
 `llamadart` supports multimodal models (vision and audio) using `LlamaChatMessage.withContent`.
 
diff --git a/lib/llamadart.dart b/lib/llamadart.dart
index ddf55404..36e83be0 100644
--- a/lib/llamadart.dart
+++ b/lib/llamadart.dart
@@ -59,6 +59,7 @@ export 'src/backends/backend.dart'
 // Models - Inference
 export 'src/core/models/inference/model_params.dart';
 export 'src/core/models/inference/generation_params.dart';
+export 'src/core/models/inference/structured_output.dart';
 export 'src/core/models/inference/tool_choice.dart';
 
 // Models - Sources, resolution, and downloads
diff --git a/lib/src/core/engine/engine.dart b/lib/src/core/engine/engine.dart
index 8bf49ea0..3e7f0308 100644
--- a/lib/src/core/engine/engine.dart
+++ b/lib/src/core/engine/engine.dart
@@ -18,6 +18,7 @@ import '../llama_logger.dart';
 
 import '../models/inference/model_params.dart';
 import '../models/inference/generation_params.dart';
+import '../models/inference/structured_output.dart';
 import '../models/inference/tool_choice.dart';
 import '../models/model_load_options.dart';
 import '../models/model_resolver.dart';
@@ -401,6 +402,8 @@ class LlamaEngine {
   /// grammar-constrained decoding on compatible backends. Supported shapes are:
   /// - `{'type': 'json_object'}`
   /// - `{'type': 'json_schema', 'json_schema': {'schema': <JSON schema>}}`
+  /// Use [LlamaStructuredOutput.responseFormat] or [createStructuredJson] for a
+  /// typed helper that also validates and decodes the final JSON output.
   ///
   /// Backends without grammar-constrained decoding, including LiteRT-LM native
   /// and web today, throw [LlamaUnsupportedException] for strict
@@ -512,6 +515,41 @@ class LlamaEngine {
     );
   }
 
+  /// Generates strict structured JSON and decodes the final output.
+  ///
+  /// This helper applies [output.responseFormat] to [create], collects streamed
+  /// content deltas, validates the completed JSON value, and returns the typed
+  /// value produced by [output]'s decoder. Use [create] directly when you need
+  /// to render tokens live; the returned stream can still be finalized with
+  /// `parseStructuredJson(output)`.
+  Future<T> createStructuredJson<T>(
+    List<LlamaChatMessage> messages, {
+    required LlamaStructuredOutput<T> output,
+    GenerationParams? params,
+    List<ToolDefinition>? tools,
+    ToolChoice? toolChoice,
+    bool parallelToolCalls = false,
+    bool enableThinking = true,
+    String? sourceLangCode,
+    String? targetLangCode,
+    Map<String, dynamic>? chatTemplateKwargs,
+    DateTime? templateNow,
+  }) {
+    return create(
+      messages,
+      params: params,
+      tools: tools,
+      toolChoice: toolChoice,
+      parallelToolCalls: parallelToolCalls,
+      enableThinking: enableThinking,
+      responseFormat: output.responseFormat,
+      sourceLangCode: sourceLangCode,
+      targetLangCode: targetLangCode,
+      chatTemplateKwargs: chatTemplateKwargs,
+      templateNow: templateNow,
+    ).parseStructuredJson(output);
+  }
+
   /// Formats a list of [messages] into a prompt string using the model's template.
   ///
   /// This is useful for preparing messages before calling [generate] directly,
@@ -522,6 +560,8 @@ class LlamaEngine {
   /// Supported shapes are:
   /// - `{'type': 'json_object'}`
   /// - `{'type': 'json_schema', 'json_schema': {'schema': <JSON schema>}}`
+  /// Use [LlamaStructuredOutput.responseFormat] to avoid hand-writing these
+  /// maps in application code.
   ///
   /// [jsonSchema] is a legacy shortcut for
   /// `responseFormat: {'type': 'json_schema', 'json_schema': {'schema': ...}}`.
diff --git a/lib/src/core/models/inference/structured_output.dart b/lib/src/core/models/inference/structured_output.dart
new file mode 100644
index 00000000..e9259e36
--- /dev/null
+++ b/lib/src/core/models/inference/structured_output.dart
@@ -0,0 +1,1105 @@
+import 'dart:convert';
+
+import '../../exceptions.dart';
+import '../../grammar/json_schema_converter.dart';
+import '../chat/completion_chunk.dart';
+
+/// Converts a decoded JSON object into an application-specific value.
+typedef LlamaJsonObjectDecoder<T> = T Function(Map<String, dynamic> json);
+
+/// Converts any decoded JSON value into an application-specific value.
+typedef LlamaJsonValueDecoder<T> = T Function(Object? value);
+
+/// Describes a strict structured JSON response and final-output decoder.
+///
+/// Use [jsonObject] when any JSON object is acceptable. Use [jsonSchema] for
+/// the common object-shaped JSON Schema case, or [jsonValueSchema] when the
+/// schema returns a primitive or array. All schema helpers validate that the
+/// schema can be converted to the GBNF subset used by `llamadart` before a
+/// generation request is sent.
+///
+/// Example:
+/// ```dart
+/// final output = LlamaStructuredOutput<Contact>.jsonSchema(
+///   schema: {
+///     'type': 'object',
+///     'properties': {
+///       'name': {'type': 'string'},
+///       'email': {'type': 'string'},
+///     },
+///     'required': ['name', 'email'],
+///     'additionalProperties': false,
+///   },
+///   decoder: Contact.fromJson,
+/// );
+///
+/// final contact = await engine.createStructuredJson(
+///   messages,
+///   output: output,
+/// );
+/// ```
+class LlamaStructuredOutput<T> {
+  LlamaStructuredOutput._({
+    required Map<String, dynamic> responseFormat,
+    required LlamaJsonValueDecoder<T> decoder,
+    Map<String, dynamic>? schema,
+  }) : _responseFormat = _copyJsonMap(responseFormat, 'responseFormat'),
+       _schema = schema == null ? null : _copyJsonMap(schema, 'schema'),
+       _decoder = decoder;
+
+  /// Creates a strict `json_object` response format.
+  ///
+  /// The final model output must decode to a JSON object. [decoder] receives
+  /// that object after parsing succeeds.
+  factory LlamaStructuredOutput.jsonObject({
+    required LlamaJsonObjectDecoder<T> decoder,
+  }) {
+    return LlamaStructuredOutput._(
+      responseFormat: const {'type': 'json_object'},
+      decoder: (value) => decoder(_expectJsonObject(value)),
+    );
+  }
+
+  /// Creates a strict object-shaped `json_schema` response format.
+  ///
+  /// [schema] must be representable by the current JSON-schema-to-GBNF subset:
+  /// primitives, objects with properties/required/additionalProperties, arrays
+  /// with `items` or fixed `prefixItems`, enum/const, local `$ref`, `anyOf`,
+  /// `oneOf`, `allOf`, string length, and array item-count bounds. Unsupported
+  /// or malformed schemas throw [LlamaUnsupportedException] before generation.
+  ///
+  /// [decoder] receives the validated JSON object.
+  factory LlamaStructuredOutput.jsonSchema({
+    required Map<String, dynamic> schema,
+    required LlamaJsonObjectDecoder<T> decoder,
+    String? name,
+    String? description,
+    bool strict = true,
+  }) {
+    final normalizedSchema = _validateStructuredSchema(schema);
+    _validateJsonSchemaRootObject(normalizedSchema);
+    return LlamaStructuredOutput._(
+      responseFormat: _schemaResponseFormat(
+        normalizedSchema,
+        name: name,
+        description: description,
+        strict: strict,
+      ),
+      schema: normalizedSchema,
+      decoder: (value) => decoder(_expectJsonObject(value)),
+    );
+  }
+
+  /// Creates a strict `json_schema` response format for any JSON value.
+  ///
+  /// Use this when the schema's root value is not a JSON object, such as an
+  /// array or string classification label. Object-shaped results can usually
+  /// use [jsonSchema] for a narrower decoder type.
+  factory LlamaStructuredOutput.jsonValueSchema({
+    required Map<String, dynamic> schema,
+    required LlamaJsonValueDecoder<T> decoder,
+    String? name,
+    String? description,
+    bool strict = true,
+  }) {
+    final normalizedSchema = _validateStructuredSchema(schema);
+    return LlamaStructuredOutput._(
+      responseFormat: _schemaResponseFormat(
+        normalizedSchema,
+        name: name,
+        description: description,
+        strict: strict,
+      ),
+      schema: normalizedSchema,
+      decoder: decoder,
+    );
+  }
+
+  final Map<String, dynamic> _responseFormat;
+  final Map<String, dynamic>? _schema;
+  final LlamaJsonValueDecoder<T> _decoder;
+
+  /// OpenAI-compatible response format map for [LlamaEngine.create].
+  Map<String, dynamic> get responseFormat =>
+      _copyJsonMap(_responseFormat, 'responseFormat');
+
+  /// JSON Schema used for final-output validation, if this helper has one.
+  Map<String, dynamic>? get schema =>
+      _schema == null ? null : _copyJsonMap(_schema, 'schema');
+
+  /// Parses, validates, and decodes the final generated JSON text.
+  ///
+  /// This method is intentionally a final-output step. Streaming callers can
+  /// still display chunks as they arrive, then call [parse] once the content
+  /// stream has completed.
+  T parse(String output) {
+    final Object? decoded;
+    try {
+      decoded = jsonDecode(output);
+    } on FormatException catch (error) {
+      throw LlamaInferenceException(
+        'Malformed structured JSON output.',
+        error.message,
+      );
+    }
+
+    final schema = _schema;
+    if (schema == null) {
+      _expectJsonObject(decoded);
+    } else {
+      try {
+        _JsonSchemaOutputValidator(schema).validate(decoded);
+      } on _JsonSchemaValidationFailure catch (error) {
+        throw LlamaInferenceException(
+          'Structured JSON output did not match the requested schema.',
+          error.message,
+        );
+      }
+    }
+
+    try {
+      return _decoder(decoded);
+    } on LlamaException {
+      rethrow;
+    } catch (error) {
+      throw LlamaInferenceException(
+        'Failed to decode structured JSON output.',
+        error,
+      );
+    }
+  }
+}
+
+/// Collects streamed chat-completion content and parses it as structured JSON.
+extension LlamaStructuredOutputStreamExtension on Stream<LlamaCompletionChunk> {
+  /// Collects content deltas, then validates and decodes the final JSON value.
+  Future<T> parseStructuredJson<T>(LlamaStructuredOutput<T> output) async {
+    final buffer = StringBuffer();
+    await for (final chunk in this) {
+      for (final choice in chunk.choices) {
+        final content = choice.delta.content;
+        if (content != null) {
+          buffer.write(content);
+        }
+      }
+    }
+    return output.parse(buffer.toString());
+  }
+}
+
+Map<String, dynamic> _schemaResponseFormat(
+  Map<String, dynamic> schema, {
+  required String? name,
+  required String? description,
+  required bool strict,
+}) {
+  final jsonSchema = <String, dynamic>{'schema': schema, 'strict': strict};
+  if (name != null) {
+    jsonSchema['name'] = name;
+  }
+  if (description != null) {
+    jsonSchema['description'] = description;
+  }
+  return {'type': 'json_schema', 'json_schema': jsonSchema};
+}
+
+Map<String, dynamic> _validateStructuredSchema(Map<String, dynamic> schema) {
+  final normalizedSchema = _copyJsonMap(schema, 'schema');
+  _validateSupportedSchemaSubset(normalizedSchema, r'$');
+  try {
+    JsonSchemaConverter.convert(normalizedSchema);
+  } on LlamaException {
+    rethrow;
+  } catch (error) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: $error',
+    );
+  }
+  return normalizedSchema;
+}
+
+const _supportedSchemaTypes = <String>{
+  'object',
+  'array',
+  'string',
+  'integer',
+  'number',
+  'boolean',
+  'null',
+};
+
+const _supportedSchemaKeywords = <String>{
+  r'$defs',
+  r'$ref',
+  'additionalProperties',
+  'allOf',
+  'anyOf',
+  'const',
+  'definitions',
+  'enum',
+  'items',
+  'maxItems',
+  'maxLength',
+  'minItems',
+  'minLength',
+  'oneOf',
+  'prefixItems',
+  'properties',
+  'required',
+  'type',
+};
+
+void _validateSupportedSchemaSubset(Map<String, dynamic> schema, String path) {
+  for (final keyword in schema.keys) {
+    if (!_supportedSchemaKeywords.contains(keyword)) {
+      throw LlamaUnsupportedException(
+        'Unsupported structured JSON schema: keyword "$path.$keyword" is not '
+        'supported by the current JSON-schema-to-GBNF subset.',
+      );
+    }
+  }
+
+  final schemaTypes = _schemaTypes(schema['type'], path);
+  _validateRefSchemaShape(schema, path);
+  _validateKeywordContexts(schema, schemaTypes, path);
+  _validateKeywordValues(schema, path);
+  _validateNestedSchemas(schema, path);
+}
+
+Set<String>? _schemaTypes(Object? type, String path) {
+  if (type == null) {
+    return null;
+  }
+  if (type is String) {
+    _validateSchemaType(type, '$path.type');
+    return {type};
+  }
+  if (type is List) {
+    final types = <String>{};
+    for (var i = 0; i < type.length; i++) {
+      final item = type[i];
+      if (item is! String) {
+        throw LlamaUnsupportedException(
+          'Unsupported structured JSON schema: $path.type[$i] must be a '
+          'schema type string.',
+        );
+      }
+      _validateSchemaType(item, '$path.type[$i]');
+      types.add(item);
+    }
+    if (types.isEmpty) {
+      throw LlamaUnsupportedException(
+        'Unsupported structured JSON schema: $path.type must not be empty.',
+      );
+    }
+    return types;
+  }
+  throw LlamaUnsupportedException(
+    'Unsupported structured JSON schema: $path.type must be a string or list '
+    'of strings.',
+  );
+}
+
+void _validateSchemaType(String type, String path) {
+  if (!_supportedSchemaTypes.contains(type)) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: $path uses unsupported type '
+      '"$type".',
+    );
+  }
+}
+
+void _validateRefSchemaShape(Map<String, dynamic> schema, String path) {
+  if (!schema.containsKey(r'$ref')) {
+    return;
+  }
+  final ref = schema[r'$ref'];
+  if (ref is! String || !ref.startsWith('#/')) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: $path.\$ref must be a local "#/..." '
+      'reference.',
+    );
+  }
+
+  final siblingKeywords = schema.keys.where(
+    (keyword) =>
+        keyword != r'$ref' && keyword != 'definitions' && keyword != r'$defs',
+  );
+  if (siblingKeywords.isNotEmpty) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: $path.\$ref cannot be combined with '
+      'sibling validation keywords.',
+    );
+  }
+}
+
+void _validateKeywordContexts(
+  Map<String, dynamic> schema,
+  Set<String>? schemaTypes,
+  String path,
+) {
+  final hasObjectKeywords =
+      schema.containsKey('properties') ||
+      schema.containsKey('required') ||
+      schema.containsKey('additionalProperties');
+  final objectContext =
+      schemaTypes?.contains('object') ??
+      (schema.containsKey('properties') ||
+          schema.containsKey('additionalProperties'));
+  if (hasObjectKeywords && !objectContext) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: object keyword at $path requires '
+      'type "object" or object-shaped properties.',
+    );
+  }
+
+  final hasArrayKeywords =
+      schema.containsKey('items') ||
+      schema.containsKey('prefixItems') ||
+      schema.containsKey('minItems') ||
+      schema.containsKey('maxItems');
+  final arrayContext =
+      schemaTypes?.contains('array') ??
+      (schema.containsKey('items') || schema.containsKey('prefixItems'));
+  if (hasArrayKeywords && !arrayContext) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: array keyword at $path requires '
+      'type "array" or an item schema.',
+    );
+  }
+
+  final hasStringKeywords =
+      schema.containsKey('minLength') || schema.containsKey('maxLength');
+  final stringContext = schemaTypes?.contains('string') ?? false;
+  if (hasStringKeywords && !stringContext) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: string length keyword at $path '
+      'requires type "string".',
+    );
+  }
+}
+
+void _validateKeywordValues(Map<String, dynamic> schema, String path) {
+  final enumValues = schema['enum'];
+  if (enumValues != null) {
+    if (enumValues is! List || enumValues.isEmpty) {
+      throw LlamaUnsupportedException(
+        'Unsupported structured JSON schema: $path.enum must be a non-empty '
+        'list.',
+      );
+    }
+  }
+
+  final required = schema['required'];
+  if (required != null) {
+    if (required is! List) {
+      throw LlamaUnsupportedException(
+        'Unsupported structured JSON schema: $path.required must be a list.',
+      );
+    }
+    for (var i = 0; i < required.length; i++) {
+      if (required[i] is! String) {
+        throw LlamaUnsupportedException(
+          'Unsupported structured JSON schema: $path.required[$i] must be a '
+          'property name string.',
+        );
+      }
+    }
+  }
+
+  _validateNonNegativeIntKeyword(schema, 'minItems', path);
+  _validateNonNegativeIntKeyword(schema, 'maxItems', path);
+  _validateNonNegativeIntKeyword(schema, 'minLength', path);
+  _validateNonNegativeIntKeyword(schema, 'maxLength', path);
+  _validateBounds(schema, 'minItems', 'maxItems', path);
+  _validateBounds(schema, 'minLength', 'maxLength', path);
+}
+
+void _validateNonNegativeIntKeyword(
+  Map<String, dynamic> schema,
+  String keyword,
+  String path,
+) {
+  if (!schema.containsKey(keyword)) {
+    return;
+  }
+  final value = schema[keyword];
+  if (value is! int || value < 0) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: $path.$keyword must be a '
+      'non-negative integer.',
+    );
+  }
+}
+
+void _validateBounds(
+  Map<String, dynamic> schema,
+  String minKeyword,
+  String maxKeyword,
+  String path,
+) {
+  final min = schema[minKeyword];
+  final max = schema[maxKeyword];
+  if (min is int && max is int && max < min) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: $path.$maxKeyword must be greater '
+      'than or equal to $path.$minKeyword.',
+    );
+  }
+}
+
+void _validateNestedSchemas(Map<String, dynamic> schema, String path) {
+  final properties = schema['properties'];
+  if (properties != null) {
+    final propertySchemas = _schemaMapForKeyword(
+      properties,
+      '$path.properties',
+    );
+    for (final entry in propertySchemas.entries) {
+      _validateSupportedSchemaSubset(
+        _schemaMapForKeyword(entry.value, '$path.properties.${entry.key}'),
+        '$path.properties.${entry.key}',
+      );
+    }
+  }
+
+  final additionalProperties = schema['additionalProperties'];
+  if (additionalProperties != null &&
+      additionalProperties != true &&
+      additionalProperties != false) {
+    _validateSupportedSchemaSubset(
+      _schemaMapForKeyword(additionalProperties, '$path.additionalProperties'),
+      '$path.additionalProperties',
+    );
+  }
+
+  final items = schema['items'];
+  if (items != null) {
+    _validateSupportedSchemaSubset(
+      _schemaMapForKeyword(items, '$path.items'),
+      '$path.items',
+    );
+  }
+
+  final prefixItems = schema['prefixItems'];
+  if (prefixItems != null) {
+    _validateSchemaList(prefixItems, '$path.prefixItems');
+  }
+
+  for (final keyword in const ['oneOf', 'anyOf', 'allOf']) {
+    final alternatives = schema[keyword];
+    if (alternatives != null) {
+      _validateSchemaList(alternatives, '$path.$keyword');
+    }
+  }
+
+  for (final keyword in const ['definitions', r'$defs']) {
+    final definitions = schema[keyword];
+    if (definitions != null) {
+      final definitionSchemas = _schemaMapForKeyword(
+        definitions,
+        '$path.$keyword',
+      );
+      for (final entry in definitionSchemas.entries) {
+        _validateSupportedSchemaSubset(
+          _schemaMapForKeyword(entry.value, '$path.$keyword.${entry.key}'),
+          '$path.$keyword.${entry.key}',
+        );
+      }
+    }
+  }
+}
+
+void _validateSchemaList(Object? value, String path) {
+  if (value is! List || value.isEmpty) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: $path must be a non-empty list of '
+      'schemas.',
+    );
+  }
+  for (var i = 0; i < value.length; i++) {
+    _validateSupportedSchemaSubset(
+      _schemaMapForKeyword(value[i], '$path[$i]'),
+      '$path[$i]',
+    );
+  }
+}
+
+Map<String, dynamic> _schemaMapForKeyword(Object? value, String path) {
+  if (value is! Map) {
+    throw LlamaUnsupportedException(
+      'Unsupported structured JSON schema: $path must be a JSON object.',
+    );
+  }
+  final result = <String, dynamic>{};
+  for (final entry in value.entries) {
+    final key = entry.key;
+    if (key is! String) {
+      throw LlamaUnsupportedException(
+        'Unsupported structured JSON schema: $path must contain only string '
+        'keys.',
+      );
+    }
+    result[key] = entry.value;
+  }
+  return result;
+}
+
+void _validateJsonSchemaRootObject(Map<String, dynamic> schema) {
+  if (_schemaRequiresJsonObject(schema, schema, <String>{})) {
+    return;
+  }
+  throw LlamaUnsupportedException(
+    'LlamaStructuredOutput.jsonSchema requires a JSON object root schema. Use '
+    'LlamaStructuredOutput.jsonValueSchema for primitive, array, or mixed root '
+    'schemas.',
+  );
+}
+
+bool _schemaRequiresJsonObject(
+  Map<String, dynamic> schema,
+  Map<String, dynamic> rootSchema,
+  Set<String> resolvingRefs,
+) {
+  final ref = schema[r'$ref'];
+  if (ref is String) {
+    if (resolvingRefs.contains(ref)) {
+      return false;
+    }
+    final target = _resolveLocalSchemaRef(rootSchema, ref);
+    return target != null &&
+        _schemaRequiresJsonObject(target, rootSchema, {...resolvingRefs, ref});
+  }
+
+  final schemaTypes = _schemaTypes(schema['type'], r'$');
+  if (schemaTypes != null) {
+    if (schemaTypes.length != 1 || !schemaTypes.contains('object')) {
+      return false;
+    }
+  }
+
+  final constValue = schema['const'];
+  if (schema.containsKey('const')) {
+    return constValue is Map;
+  }
+
+  final enumValues = schema['enum'];
+  if (enumValues is List) {
+    return enumValues.every((value) => value is Map);
+  }
+
+  final oneOf = schema['oneOf'];
+  if (oneOf is List) {
+    return oneOf.every(
+      (value) => _schemaRequiresJsonObject(
+        _schemaMapForKeyword(value, r'$.oneOf'),
+        rootSchema,
+        resolvingRefs,
+      ),
+    );
+  }
+
+  final anyOf = schema['anyOf'];
+  if (anyOf is List) {
+    return anyOf.every(
+      (value) => _schemaRequiresJsonObject(
+        _schemaMapForKeyword(value, r'$.anyOf'),
+        rootSchema,
+        resolvingRefs,
+      ),
+    );
+  }
+
+  final allOf = schema['allOf'];
+  if (allOf is List) {
+    return allOf.every(
+      (value) => _schemaRequiresJsonObject(
+        _schemaMapForKeyword(value, r'$.allOf'),
+        rootSchema,
+        resolvingRefs,
+      ),
+    );
+  }
+
+  return schemaTypes?.contains('object') ??
+      (schema.containsKey('properties') ||
+          schema.containsKey('additionalProperties'));
+}
+
+Map<String, dynamic>? _resolveLocalSchemaRef(
+  Map<String, dynamic> rootSchema,
+  String ref,
+) {
+  if (!ref.startsWith('#/')) {
+    return null;
+  }
+  Object? target = rootSchema;
+  for (final rawSegment in ref.substring(2).split('/')) {
+    final segment = rawSegment.replaceAll('~1', '/').replaceAll('~0', '~');
+    if (target is Map && target.containsKey(segment)) {
+      target = target[segment];
+    } else {
+      return null;
+    }
+  }
+  return target is Map ? _schemaMapForKeyword(target, ref) : null;
+}
+
+Map<String, dynamic> _copyJsonMap(
+  Map<String, dynamic> value,
+  String valueName,
+) {
+  try {
+    final decoded = jsonDecode(jsonEncode(value));
+    return _toStringKeyedMap(decoded, valueName);
+  } catch (error) {
+    throw LlamaUnsupportedException(
+      '$valueName must be a JSON-encodable object: $error',
+    );
+  }
+}
+
+Map<String, dynamic> _toStringKeyedMap(Object? value, String valueName) {
+  if (value is! Map) {
+    throw LlamaUnsupportedException('$valueName must be a JSON object.');
+  }
+  final result = <String, dynamic>{};
+  for (final entry in value.entries) {
+    final key = entry.key;
+    if (key is! String) {
+      throw LlamaUnsupportedException(
+        '$valueName must contain only string keys.',
+      );
+    }
+    result[key] = entry.value;
+  }
+  return result;
+}
+
+Map<String, dynamic> _expectJsonObject(Object? value) {
+  if (value is! Map) {
+    throw LlamaInferenceException(
+      'Structured JSON output did not decode to a JSON object.',
+    );
+  }
+  final result = <String, dynamic>{};
+  for (final entry in value.entries) {
+    final key = entry.key;
+    if (key is! String) {
+      throw LlamaInferenceException(
+        'Structured JSON object contains a non-string key.',
+      );
+    }
+    result[key] = entry.value;
+  }
+  return result;
+}
+
+class _JsonSchemaOutputValidator {
+  _JsonSchemaOutputValidator(this.rootSchema);
+
+  final Map<String, dynamic> rootSchema;
+
+  void validate(Object? value) {
+    _validate(value, rootSchema, r'$');
+  }
+
+  void _validate(Object? value, Map<String, dynamic> schema, String path) {
+    final ref = schema[r'$ref'];
+    if (ref != null) {
+      _validate(value, _resolveRef(ref, path), path);
+      return;
+    }
+
+    if (schema.containsKey('const') && !_jsonEquals(value, schema['const'])) {
+      throw _JsonSchemaValidationFailure(
+        '$path must equal ${jsonEncode(schema['const'])}.',
+      );
+    }
+
+    final enumValues = schema['enum'];
+    if (enumValues != null) {
+      if (enumValues is! List) {
+        throw _JsonSchemaValidationFailure('$path schema enum must be a list.');
+      }
+      final matches = enumValues.any(
+        (enumValue) => _jsonEquals(value, enumValue),
+      );
+      if (!matches) {
+        throw _JsonSchemaValidationFailure(
+          '$path must be one of ${jsonEncode(enumValues)}.',
+        );
+      }
+    }
+
+    final oneOf = schema['oneOf'];
+    if (oneOf != null) {
+      final matches = _countMatchingAlternatives(value, oneOf, path, 'oneOf');
+      if (matches != 1) {
+        throw _JsonSchemaValidationFailure(
+          '$path must match exactly one oneOf schema; matched $matches.',
+        );
+      }
+    }
+
+    final anyOf = schema['anyOf'];
+    if (anyOf != null &&
+        _countMatchingAlternatives(value, anyOf, path, 'anyOf') == 0) {
+      throw _JsonSchemaValidationFailure(
+        '$path must match at least one anyOf schema.',
+      );
+    }
+
+    final allOf = schema['allOf'];
+    if (allOf != null) {
+      if (allOf is! List) {
+        throw _JsonSchemaValidationFailure(
+          '$path schema allOf must be a list.',
+        );
+      }
+      for (var i = 0; i < allOf.length; i++) {
+        _validate(value, _schemaMap(allOf[i], '$path.allOf[$i]'), path);
+      }
+    }
+
+    final schemaType = schema['type'];
+    if (schemaType is List) {
+      final matched = schemaType.any(
+        (type) => _matchesType(value, type, schema, path),
+      );
+      if (!matched) {
+        throw _JsonSchemaValidationFailure(
+          '$path did not match any allowed type ${jsonEncode(schemaType)}.',
+        );
+      }
+      return;
+    }
+
+    if (schemaType is String) {
+      _validateType(value, schemaType, schema, path);
+      return;
+    }
+
+    if (schema.containsKey('properties') ||
+        schema.containsKey('additionalProperties')) {
+      _validateType(value, 'object', schema, path);
+      return;
+    }
+
+    if (schema.containsKey('items') || schema.containsKey('prefixItems')) {
+      _validateType(value, 'array', schema, path);
+    }
+  }
+
+  int _countMatchingAlternatives(
+    Object? value,
+    Object? alternatives,
+    String path,
+    String keyword,
+  ) {
+    if (alternatives is! List) {
+      throw _JsonSchemaValidationFailure(
+        '$path schema $keyword must be a list.',
+      );
+    }
+    var matches = 0;
+    for (var i = 0; i < alternatives.length; i++) {
+      try {
+        _validate(
+          value,
+          _schemaMap(alternatives[i], '$path.$keyword[$i]'),
+          path,
+        );
+        matches += 1;
+      } on _JsonSchemaValidationFailure {
+        // Keep checking other alternatives.
+      }
+    }
+    return matches;
+  }
+
+  bool _matchesType(
+    Object? value,
+    Object? type,
+    Map<String, dynamic> schema,
+    String path,
+  ) {
+    if (type is! String) {
+      return false;
+    }
+    try {
+      _validateType(value, type, schema, path);
+      return true;
+    } on _JsonSchemaValidationFailure {
+      return false;
+    }
+  }
+
+  void _validateType(
+    Object? value,
+    String type,
+    Map<String, dynamic> schema,
+    String path,
+  ) {
+    switch (type) {
+      case 'object':
+        _validateObject(value, schema, path);
+        return;
+      case 'array':
+        _validateArray(value, schema, path);
+        return;
+      case 'string':
+        _validateString(value, schema, path);
+        return;
+      case 'integer':
+        if (!_isJsonInteger(value)) {
+          throw _JsonSchemaValidationFailure('$path must be an integer.');
+        }
+        return;
+      case 'number':
+        if (value is! num) {
+          throw _JsonSchemaValidationFailure('$path must be a number.');
+        }
+        return;
+      case 'boolean':
+        if (value is! bool) {
+          throw _JsonSchemaValidationFailure('$path must be a boolean.');
+        }
+        return;
+      case 'null':
+        if (value != null) {
+          throw _JsonSchemaValidationFailure('$path must be null.');
+        }
+        return;
+      default:
+        throw _JsonSchemaValidationFailure(
+          '$path has unsupported schema type "$type".',
+        );
+    }
+  }
+
+  void _validateObject(
+    Object? value,
+    Map<String, dynamic> schema,
+    String path,
+  ) {
+    final object = _jsonObjectValue(value, path);
+    final required = schema['required'];
+    if (required != null) {
+      if (required is! List) {
+        throw _JsonSchemaValidationFailure(
+          '$path schema required must be a list.',
+        );
+      }
+      for (final key in required) {
+        if (key is! String) {
+          throw _JsonSchemaValidationFailure(
+            '$path schema required entries must be strings.',
+          );
+        }
+        if (!object.containsKey(key)) {
+          throw _JsonSchemaValidationFailure('$path.$key is required.');
+        }
+      }
+    }
+
+    final properties = schema['properties'];
+    final propertySchemas = properties == null
+        ? const <String, dynamic>{}
+        : _schemaMap(properties, '$path.properties');
+    for (final entry in propertySchemas.entries) {
+      if (object.containsKey(entry.key)) {
+        _validate(
+          object[entry.key],
+          _schemaMap(entry.value, '$path.${entry.key}'),
+          '$path.${entry.key}',
+        );
+      }
+    }
+
+    final additionalProperties = schema['additionalProperties'];
+    final knownKeys = propertySchemas.keys.toSet();
+    final extraKeys = object.keys.where((key) => !knownKeys.contains(key));
+    if (additionalProperties == false) {
+      if (extraKeys.isNotEmpty) {
+        throw _JsonSchemaValidationFailure(
+          '$path contains unsupported property "${extraKeys.first}".',
+        );
+      }
+      return;
+    }
+    if (additionalProperties is Map) {
+      final additionalSchema = _schemaMap(
+        additionalProperties,
+        '$path.additionalProperties',
+      );
+      for (final key in extraKeys) {
+        _validate(object[key], additionalSchema, '$path.$key');
+      }
+    }
+  }
+
+  void _validateArray(Object? value, Map<String, dynamic> schema, String path) {
+    if (value is! List) {
+      throw _JsonSchemaValidationFailure('$path must be an array.');
+    }
+
+    final minItems = schema['minItems'];
+    if (minItems is int && value.length < minItems) {
+      throw _JsonSchemaValidationFailure(
+        '$path must contain at least $minItems items.',
+      );
+    }
+
+    final maxItems = schema['maxItems'];
+    if (maxItems is int && value.length > maxItems) {
+      throw _JsonSchemaValidationFailure(
+        '$path must contain at most $maxItems items.',
+      );
+    }
+
+    final prefixItems = schema['prefixItems'];
+    if (prefixItems is List) {
+      if (value.length != prefixItems.length) {
+        throw _JsonSchemaValidationFailure(
+          '$path must contain exactly ${prefixItems.length} tuple items.',
+        );
+      }
+      for (var i = 0; i < prefixItems.length; i++) {
+        _validate(
+          value[i],
+          _schemaMap(prefixItems[i], '$path[$i]'),
+          '$path[$i]',
+        );
+      }
+      return;
+    }
+
+    final items = schema['items'];
+    if (items != null) {
+      final itemSchema = _schemaMap(items, '$path.items');
+      for (var i = 0; i < value.length; i++) {
+        _validate(value[i], itemSchema, '$path[$i]');
+      }
+    }
+  }
+
+  void _validateString(
+    Object? value,
+    Map<String, dynamic> schema,
+    String path,
+  ) {
+    if (value is! String) {
+      throw _JsonSchemaValidationFailure('$path must be a string.');
+    }
+    final minLength = schema['minLength'];
+    if (minLength is int && value.length < minLength) {
+      throw _JsonSchemaValidationFailure(
+        '$path must contain at least $minLength characters.',
+      );
+    }
+    final maxLength = schema['maxLength'];
+    if (maxLength is int && value.length > maxLength) {
+      throw _JsonSchemaValidationFailure(
+        '$path must contain at most $maxLength characters.',
+      );
+    }
+  }
+
+  Map<String, dynamic> _resolveRef(Object? ref, String path) {
+    if (ref is! String || !ref.startsWith('#/')) {
+      throw _JsonSchemaValidationFailure(
+        '$path schema uses unsupported ref "$ref".',
+      );
+    }
+    Object? target = rootSchema;
+    for (final rawSegment in ref.substring(2).split('/')) {
+      final segment = rawSegment.replaceAll('~1', '/').replaceAll('~0', '~');
+      if (target is Map && target.containsKey(segment)) {
+        target = target[segment];
+      } else {
+        throw _JsonSchemaValidationFailure(
+          '$path schema ref "$ref" could not be resolved.',
+        );
+      }
+    }
+    return _schemaMap(target, '$path.$ref');
+  }
+
+  Map<String, dynamic> _schemaMap(Object? value, String path) {
+    if (value is! Map) {
+      throw _JsonSchemaValidationFailure('$path schema must be a JSON object.');
+    }
+    final result = <String, dynamic>{};
+    for (final entry in value.entries) {
+      final key = entry.key;
+      if (key is! String) {
+        throw _JsonSchemaValidationFailure(
+          '$path schema must contain only string keys.',
+        );
+      }
+      result[key] = entry.value;
+    }
+    return result;
+  }
+
+  Map<String, dynamic> _jsonObjectValue(Object? value, String path) {
+    if (value is! Map) {
+      throw _JsonSchemaValidationFailure('$path must be an object.');
+    }
+    final result = <String, dynamic>{};
+    for (final entry in value.entries) {
+      final key = entry.key;
+      if (key is! String) {
+        throw _JsonSchemaValidationFailure(
+          '$path object contains a non-string key.',
+        );
+      }
+      result[key] = entry.value;
+    }
+    return result;
+  }
+}
+
+class _JsonSchemaValidationFailure implements Exception {
+  const _JsonSchemaValidationFailure(this.message);
+
+  final String message;
+}
+
+bool _isJsonInteger(Object? value) {
+  if (value is int) {
+    return true;
+  }
+  return value is num && value.isFinite && value % 1 == 0;
+}
+
+bool _jsonEquals(Object? a, Object? b) {
+  if (a is num && b is num) {
+    return a == b;
+  }
+  if (a is List && b is List) {
+    if (a.length != b.length) {
+      return false;
+    }
+    for (var i = 0; i < a.length; i++) {
+      if (!_jsonEquals(a[i], b[i])) {
+        return false;
+      }
+    }
+    return true;
+  }
+  if (a is Map && b is Map) {
+    if (a.length != b.length) {
+      return false;
+    }
+    for (final entry in a.entries) {
+      if (!b.containsKey(entry.key) ||
+          !_jsonEquals(entry.value, b[entry.key])) {
+        return false;
+      }
+    }
+    return true;
+  }
+  return a == b;
+}
diff --git a/test/unit/core/engine/engine_test.dart b/test/unit/core/engine/engine_test.dart
index ca63c629..21de2619 100644
--- a/test/unit/core/engine/engine_test.dart
+++ b/test/unit/core/engine/engine_test.dart
@@ -1509,6 +1509,38 @@ void main() {
       },
     );
 
+    test(
+      'createStructuredJson forwards grammar and decodes typed output',
+      () async {
+        backend.generationText = '{"ok":true}';
+        await engine.loadModel('test-model.bin');
+
+        final result = await engine.createStructuredJson<bool>(
+          const [
+            LlamaChatMessage.fromText(
+              role: LlamaChatRole.user,
+              text: 'return status',
+            ),
+          ],
+          output: LlamaStructuredOutput<bool>.jsonSchema(
+            schema: const {
+              'type': 'object',
+              'properties': {
+                'ok': {'type': 'boolean'},
+              },
+              'required': ['ok'],
+              'additionalProperties': false,
+            },
+            decoder: (json) => json['ok'] as bool,
+          ),
+        );
+
+        expect(result, isTrue);
+        expect(backend.lastGenerationParams?.grammar, isNotNull);
+        expect(backend.lastGenerationParams?.grammar, contains('ok'));
+      },
+    );
+
     test(
       'create rejects strict response format when backend lacks grammar',
       () async {
diff --git a/test/unit/core/models/inference/structured_output_test.dart b/test/unit/core/models/inference/structured_output_test.dart
new file mode 100644
index 00000000..df01d614
--- /dev/null
+++ b/test/unit/core/models/inference/structured_output_test.dart
@@ -0,0 +1,256 @@
+import 'package:llamadart/llamadart.dart';
+import 'package:test/test.dart';
+
+void main() {
+  group('LlamaStructuredOutput', () {
+    test('builds responseFormat and decodes typed object output', () {
+      final output = LlamaStructuredOutput<_Contact>.jsonSchema(
+        name: 'contact',
+        schema: const {
+          'type': 'object',
+          'properties': {
+            'name': {'type': 'string'},
+            'email': {'type': 'string'},
+          },
+          'required': ['name', 'email'],
+          'additionalProperties': false,
+        },
+        decoder: _Contact.fromJson,
+      );
+
+      expect(output.responseFormat['type'], 'json_schema');
+      expect(output.responseFormat['json_schema']['name'], 'contact');
+
+      final contact = output.parse(
+        '{"name":"Ada Lovelace","email":"ada@example.com"}',
+      );
+
+      expect(contact.name, 'Ada Lovelace');
+      expect(contact.email, 'ada@example.com');
+    });
+
+    test('rejects schema that cannot be represented safely', () {
+      expect(
+        () => LlamaStructuredOutput<List<Object?>>.jsonValueSchema(
+          schema: const {
+            'type': 'array',
+            'items': {'type': 'string'},
+            'minItems': 2,
+            'maxItems': 1,
+          },
+          decoder: (value) => value as List<Object?>,
+        ),
+        throwsA(
+          isA<LlamaUnsupportedException>().having(
+            (error) => error.message,
+            'message',
+            contains('Unsupported structured JSON schema'),
+          ),
+        ),
+      );
+    });
+
+    for (final testCase in const [
+      (
+        name: 'minimum',
+        keyword: 'minimum',
+        schema: {
+          'type': 'object',
+          'properties': {
+            'age': {'type': 'integer', 'minimum': 0},
+          },
+        },
+      ),
+      (
+        name: 'pattern',
+        keyword: 'pattern',
+        schema: {'type': 'string', 'pattern': r'^\w+$'},
+      ),
+      (
+        name: 'format',
+        keyword: 'format',
+        schema: {'type': 'string', 'format': 'email'},
+      ),
+      (
+        name: 'uniqueItems',
+        keyword: 'uniqueItems',
+        schema: {
+          'type': 'array',
+          'items': {'type': 'string'},
+          'uniqueItems': true,
+        },
+      ),
+    ]) {
+      test('rejects unsupported schema keyword ${testCase.name}', () {
+        expect(
+          () => LlamaStructuredOutput<Object?>.jsonValueSchema(
+            schema: testCase.schema,
+            decoder: (value) => value,
+          ),
+          throwsA(
+            isA<LlamaUnsupportedException>()
+                .having(
+                  (error) => error.message,
+                  'message',
+                  contains('Unsupported structured JSON schema'),
+                )
+                .having(
+                  (error) => error.message,
+                  'message',
+                  contains(testCase.keyword),
+                ),
+          ),
+        );
+      });
+    }
+
+    for (final testCase in const [
+      (name: 'primitive', schema: {'type': 'string'}),
+      (
+        name: 'array',
+        schema: {
+          'type': 'array',
+          'items': {'type': 'string'},
+        },
+      ),
+    ]) {
+      test('rejects ${testCase.name} root schema in jsonSchema', () {
+        expect(
+          () => LlamaStructuredOutput<Map<String, dynamic>>.jsonSchema(
+            schema: testCase.schema,
+            decoder: (json) => json,
+          ),
+          throwsA(
+            isA<LlamaUnsupportedException>()
+                .having(
+                  (error) => error.message,
+                  'message',
+                  contains('requires a JSON object root schema'),
+                )
+                .having(
+                  (error) => error.message,
+                  'message',
+                  contains('LlamaStructuredOutput.jsonValueSchema'),
+                ),
+          ),
+        );
+      });
+    }
+
+    test('decodes primitive jsonValueSchema output', () {
+      final output = LlamaStructuredOutput<String>.jsonValueSchema(
+        schema: const {
+          'type': 'string',
+          'enum': ['bug', 'feature'],
+        },
+        decoder: (value) => value as String,
+      );
+
+      expect(output.parse('"bug"'), 'bug');
+    });
+
+    test('decodes array jsonValueSchema output', () {
+      final output = LlamaStructuredOutput<List<String>>.jsonValueSchema(
+        schema: const {
+          'type': 'array',
+          'items': {'type': 'string'},
+          'minItems': 1,
+        },
+        decoder: (value) => (value as List).cast<String>(),
+      );
+
+      expect(output.parse('["red","blue"]'), ['red', 'blue']);
+    });
+
+    test('rejects malformed final JSON output', () {
+      final output = LlamaStructuredOutput<Map<String, dynamic>>.jsonObject(
+        decoder: (json) => json,
+      );
+
+      expect(
+        () => output.parse('not json'),
+        throwsA(
+          isA<LlamaInferenceException>().having(
+            (error) => error.message,
+            'message',
+            contains('Malformed structured JSON output'),
+          ),
+        ),
+      );
+    });
+
+    test('rejects final JSON that does not match schema', () {
+      final output = LlamaStructuredOutput<Map<String, dynamic>>.jsonSchema(
+        schema: const {
+          'type': 'object',
+          'properties': {
+            'ok': {'type': 'boolean'},
+          },
+          'required': ['ok'],
+          'additionalProperties': false,
+        },
+        decoder: (json) => json,
+      );
+
+      expect(
+        () => output.parse('{"ok":"yes"}'),
+        throwsA(
+          isA<LlamaInferenceException>().having(
+            (error) => error.details,
+            'details',
+            contains(r'$.ok must be a boolean'),
+          ),
+        ),
+      );
+    });
+
+    test('collects streamed chunks before final validation', () async {
+      final output = LlamaStructuredOutput<Map<String, dynamic>>.jsonSchema(
+        schema: const {
+          'type': 'object',
+          'properties': {
+            'ok': {'type': 'boolean'},
+          },
+          'required': ['ok'],
+        },
+        decoder: (json) => json,
+      );
+
+      final result = await Stream.fromIterable([
+        _chunk('{"ok":'),
+        _chunk('true}'),
+      ]).parseStructuredJson(output);
+
+      expect(result, {'ok': true});
+    });
+  });
+}
+
+class _Contact {
+  const _Contact({required this.name, required this.email});
+
+  final String name;
+  final String email;
+
+  static _Contact fromJson(Map<String, dynamic> json) {
+    return _Contact(
+      name: json['name'] as String,
+      email: json['email'] as String,
+    );
+  }
+}
+
+LlamaCompletionChunk _chunk(String content) {
+  return LlamaCompletionChunk(
+    id: 'chunk',
+    object: 'chat.completion.chunk',
+    created: 0,
+    model: 'test',
+    choices: [
+      LlamaCompletionChunkChoice(
+        index: 0,
+        delta: LlamaCompletionChunkDelta(content: content),
+      ),
+    ],
+  );
+}
diff --git a/website/docs/changelog/recent-releases.md b/website/docs/changelog/recent-releases.md
index 6f88c3a6..90e47544 100644
--- a/website/docs/changelog/recent-releases.md
+++ b/website/docs/changelog/recent-releases.md
@@ -7,6 +7,12 @@ For canonical full release notes, use:
 
 - [`CHANGELOG.md`](https://github.com/leehack/llamadart/blob/main/CHANGELOG.md)
 
+## Unreleased
+
+- Added `LlamaStructuredOutput` and `LlamaEngine.createStructuredJson(...)`
+  helpers for strict JSON-object / JSON-schema generation with final-output
+  validation and typed decoding.
+
 ## 0.8.12
 
 - Updated the default LiteRT-LM native runtime pin to
diff --git a/website/docs/guides/chat-template-and-parsing.md b/website/docs/guides/chat-template-and-parsing.md
index bea2ea05..2ac20ca3 100644
--- a/website/docs/guides/chat-template-and-parsing.md
+++ b/website/docs/guides/chat-template-and-parsing.md
@@ -52,6 +52,11 @@ print(result.format);
 `engine.create(...)` accepts `responseFormat` for strict structured output.
 Use `{'type': 'json_object'}` or
 `{'type': 'json_schema', 'json_schema': {'schema': <JSON schema>}}`.
+Application code can build those maps with `LlamaStructuredOutput`, or call
+`engine.createStructuredJson(...)` to collect streamed content and validate the
+final JSON before decoding it into an app type. Streaming UI code can still pass
+`responseFormat: output.responseFormat` and finish with
+`parseStructuredJson(output)` after the stream completes.
 Grammar-capable backends use those hints for strict output. LiteRT-LM native
 and web fail early for strict response formats because the current public
 runtime APIs do not expose JSON-schema/Lark constraint wiring.
diff --git a/website/docs/guides/generation-and-streaming.md b/website/docs/guides/generation-and-streaming.md
index 6d081de8..6d552529 100644
--- a/website/docs/guides/generation-and-streaming.md
+++ b/website/docs/guides/generation-and-streaming.md
@@ -92,6 +92,68 @@ await for (final chunk in engine.create(
 }
 ```
 
+## Structured JSON output
+
+Use `LlamaStructuredOutput` when you want strict JSON plus final validation and
+typed decoding. The helper builds the `responseFormat` map for grammar-capable
+backends and validates the completed model output before returning your value.
+
+```dart
+class TicketClassification {
+  TicketClassification({required this.priority, required this.category});
+
+  final String priority;
+  final String category;
+
+  static TicketClassification fromJson(Map<String, dynamic> json) {
+    return TicketClassification(
+      priority: json['priority'] as String,
+      category: json['category'] as String,
+    );
+  }
+}
+
+final output = LlamaStructuredOutput<TicketClassification>.jsonSchema(
+  schema: const {
+    'type': 'object',
+    'properties': {
+      'priority': {
+        'type': 'string',
+        'enum': ['low', 'medium', 'high'],
+      },
+      'category': {'type': 'string'},
+    },
+    'required': ['priority', 'category'],
+    'additionalProperties': false,
+  },
+  decoder: TicketClassification.fromJson,
+);
+
+final classification = await engine.createStructuredJson(
+  [
+    LlamaChatMessage.fromText(
+      role: LlamaChatRole.user,
+      text: 'Classify this ticket: checkout fails with card declined.',
+    ),
+  ],
+  output: output,
+  params: const GenerationParams(maxTokens: 96, temp: 0),
+);
+```
+
+For live rendering, call `engine.create(..., responseFormat:
+output.responseFormat)` and then finalize the stream with
+`parseStructuredJson(output)`. Validation is a final-output step because partial
+stream chunks are often not valid JSON yet.
+
+Supported schema features match the built-in JSON-schema-to-GBNF subset:
+primitive types, objects with `properties`, `required`, and
+`additionalProperties`, arrays with `items` or fixed `prefixItems`,
+`enum`/`const`, local `$ref`, `anyOf`, `oneOf`, `allOf`, `minLength`,
+`maxLength`, `minItems`, and `maxItems`. Unsupported schemas fail before
+generation. Backends without grammar constraints, including current LiteRT-LM
+native and web paths, still fail early for strict structured output.
+
 ## `create(...)` flow at a glance
 
 1. Build your `List<LlamaChatMessage>`.

From 227b8fc20e0f23bbc952c3cfa8ef85c8624b895c Mon Sep 17 00:00:00 2001
From: Jhin Lee <leehack@gmail.com>
Date: Sat, 4 Jul 2026 13:49:14 -0400
Subject: [PATCH 2/3] Allow structured schema annotations

---
 README.md                                     |  4 ++-
 .../models/inference/structured_output.dart   | 14 +++++++-
 .../inference/structured_output_test.dart     | 34 +++++++++++++++++++
 .../docs/guides/generation-and-streaming.md   |  6 ++--
 4 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 392bd64b..e6708013 100644
--- a/README.md
+++ b/README.md
@@ -1009,7 +1009,9 @@ Streaming callers can pass `responseFormat: output.responseFormat` to
 `engine.create(...)` and call `parseStructuredJson(output)` after the stream
 completes. Supported schemas cover the practical JSON-schema-to-GBNF subset:
 primitive types, objects, arrays, `enum`/`const`, local `$ref`, `anyOf`,
-`oneOf`, `allOf`, string length, and array item-count bounds.
+`oneOf`, `allOf`, string length, and array item-count bounds. Annotation
+metadata such as `title`, `description`, and `default` is preserved but not
+enforced as a decoding constraint.
 
 ### 3. Advanced Usage (ChatSession)
 
diff --git a/lib/src/core/models/inference/structured_output.dart b/lib/src/core/models/inference/structured_output.dart
index e9259e36..289709b8 100644
--- a/lib/src/core/models/inference/structured_output.dart
+++ b/lib/src/core/models/inference/structured_output.dart
@@ -66,7 +66,9 @@ class LlamaStructuredOutput<T> {
   /// primitives, objects with properties/required/additionalProperties, arrays
   /// with `items` or fixed `prefixItems`, enum/const, local `$ref`, `anyOf`,
   /// `oneOf`, `allOf`, string length, and array item-count bounds. Unsupported
-  /// or malformed schemas throw [LlamaUnsupportedException] before generation.
+  /// constraint keywords throw [LlamaUnsupportedException] before generation.
+  /// Annotation metadata such as `title` and `description` is preserved but not
+  /// enforced by constrained decoding.
   ///
   /// [decoder] receives the validated JSON object.
   factory LlamaStructuredOutput.jsonSchema({
@@ -229,14 +231,21 @@ const _supportedSchemaTypes = <String>{
 };
 
 const _supportedSchemaKeywords = <String>{
+  r'$comment',
   r'$defs',
+  r'$id',
   r'$ref',
+  r'$schema',
   'additionalProperties',
   'allOf',
   'anyOf',
   'const',
+  'default',
   'definitions',
+  'deprecated',
+  'description',
   'enum',
+  'examples',
   'items',
   'maxItems',
   'maxLength',
@@ -245,8 +254,11 @@ const _supportedSchemaKeywords = <String>{
   'oneOf',
   'prefixItems',
   'properties',
+  'readOnly',
   'required',
+  'title',
   'type',
+  'writeOnly',
 };
 
 void _validateSupportedSchemaSubset(Map<String, dynamic> schema, String path) {
diff --git a/test/unit/core/models/inference/structured_output_test.dart b/test/unit/core/models/inference/structured_output_test.dart
index df01d614..3289040e 100644
--- a/test/unit/core/models/inference/structured_output_test.dart
+++ b/test/unit/core/models/inference/structured_output_test.dart
@@ -29,6 +29,40 @@ void main() {
       expect(contact.email, 'ada@example.com');
     });
 
+    test('accepts JSON Schema annotation metadata', () {
+      final output = LlamaStructuredOutput<Map<String, dynamic>>.jsonSchema(
+        schema: const {
+          r'$schema': 'https://json-schema.org/draft/2020-12/schema',
+          r'$id': 'https://example.com/contact.schema.json',
+          r'$comment': 'Annotations are ignored by constrained decoding.',
+          'title': 'Contact',
+          'description': 'Extracted contact fields.',
+          'type': 'object',
+          'properties': {
+            'name': {
+              'title': 'Name',
+              'description': 'Display name.',
+              'type': 'string',
+              'default': 'Unknown',
+              'examples': ['Ada Lovelace'],
+            },
+          },
+          'required': ['name'],
+          'additionalProperties': false,
+          'readOnly': true,
+          'writeOnly': false,
+          'deprecated': false,
+        },
+        decoder: (json) => json,
+      );
+
+      expect(output.parse('{"name":"Ada Lovelace"}'), {'name': 'Ada Lovelace'});
+      expect(
+        output.responseFormat['json_schema']['schema']['properties']['name'],
+        containsPair('description', 'Display name.'),
+      );
+    });
+
     test('rejects schema that cannot be represented safely', () {
       expect(
         () => LlamaStructuredOutput<List<Object?>>.jsonValueSchema(
diff --git a/website/docs/guides/generation-and-streaming.md b/website/docs/guides/generation-and-streaming.md
index 6d552529..6e9e879a 100644
--- a/website/docs/guides/generation-and-streaming.md
+++ b/website/docs/guides/generation-and-streaming.md
@@ -151,8 +151,10 @@ primitive types, objects with `properties`, `required`, and
 `additionalProperties`, arrays with `items` or fixed `prefixItems`,
 `enum`/`const`, local `$ref`, `anyOf`, `oneOf`, `allOf`, `minLength`,
 `maxLength`, `minItems`, and `maxItems`. Unsupported schemas fail before
-generation. Backends without grammar constraints, including current LiteRT-LM
-native and web paths, still fail early for strict structured output.
+generation. Annotation metadata such as `title`, `description`, and `default`
+is preserved but not enforced as a decoding constraint. Backends without
+grammar constraints, including current LiteRT-LM native and web paths, still
+fail early for strict structured output.
 
 ## `create(...)` flow at a glance
 

From 07849290e0e04d49287eeca809ae871c10ed9009 Mon Sep 17 00:00:00 2001
From: Jhin Lee <leehack@gmail.com>
Date: Sat, 4 Jul 2026 14:33:57 -0400
Subject: [PATCH 3/3] Address structured output review feedback

---
 README.md                                     |  5 +-
 lib/src/core/engine/engine.dart               |  2 +-
 .../models/inference/structured_output.dart   | 18 +++++-
 .../inference/structured_output_test.dart     | 56 +++++++++++++++++++
 .../docs/guides/chat-template-and-parsing.md  |  2 +-
 .../docs/guides/generation-and-streaming.md   |  8 +--
 6 files changed, 82 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index e6708013..166e4f3c 100644
--- a/README.md
+++ b/README.md
@@ -1006,8 +1006,9 @@ final contact = await engine.createStructuredJson(
 ```
 
 Streaming callers can pass `responseFormat: output.responseFormat` to
-`engine.create(...)` and call `parseStructuredJson(output)` after the stream
-completes. Supported schemas cover the practical JSON-schema-to-GBNF subset:
+`engine.create(...)`, render chunks live, then await
+`stream.parseStructuredJson(output)` after the stream completes. Supported
+schemas cover the practical JSON-schema-to-GBNF subset:
 primitive types, objects, arrays, `enum`/`const`, local `$ref`, `anyOf`,
 `oneOf`, `allOf`, string length, and array item-count bounds. Annotation
 metadata such as `title`, `description`, and `default` is preserved but not
diff --git a/lib/src/core/engine/engine.dart b/lib/src/core/engine/engine.dart
index 3e7f0308..6a1bb755 100644
--- a/lib/src/core/engine/engine.dart
+++ b/lib/src/core/engine/engine.dart
@@ -521,7 +521,7 @@ class LlamaEngine {
   /// content deltas, validates the completed JSON value, and returns the typed
   /// value produced by [output]'s decoder. Use [create] directly when you need
   /// to render tokens live; the returned stream can still be finalized with
-  /// `parseStructuredJson(output)`.
+  /// `await stream.parseStructuredJson(output)`.
   Future<T> createStructuredJson<T>(
     List<LlamaChatMessage> messages, {
     required LlamaStructuredOutput<T> output,
diff --git a/lib/src/core/models/inference/structured_output.dart b/lib/src/core/models/inference/structured_output.dart
index 289709b8..057f1da8 100644
--- a/lib/src/core/models/inference/structured_output.dart
+++ b/lib/src/core/models/inference/structured_output.dart
@@ -261,6 +261,19 @@ const _supportedSchemaKeywords = <String>{
   'writeOnly',
 };
 
+const _annotationSchemaKeywords = <String>{
+  r'$comment',
+  r'$id',
+  r'$schema',
+  'default',
+  'deprecated',
+  'description',
+  'examples',
+  'readOnly',
+  'title',
+  'writeOnly',
+};
+
 void _validateSupportedSchemaSubset(Map<String, dynamic> schema, String path) {
   for (final keyword in schema.keys) {
     if (!_supportedSchemaKeywords.contains(keyword)) {
@@ -335,7 +348,10 @@ void _validateRefSchemaShape(Map<String, dynamic> schema, String path) {
 
   final siblingKeywords = schema.keys.where(
     (keyword) =>
-        keyword != r'$ref' && keyword != 'definitions' && keyword != r'$defs',
+        keyword != r'$ref' &&
+        keyword != 'definitions' &&
+        keyword != r'$defs' &&
+        !_annotationSchemaKeywords.contains(keyword),
   );
   if (siblingKeywords.isNotEmpty) {
     throw LlamaUnsupportedException(
diff --git a/test/unit/core/models/inference/structured_output_test.dart b/test/unit/core/models/inference/structured_output_test.dart
index 3289040e..521bcad5 100644
--- a/test/unit/core/models/inference/structured_output_test.dart
+++ b/test/unit/core/models/inference/structured_output_test.dart
@@ -63,6 +63,62 @@ void main() {
       );
     });
 
+    test('accepts annotation-only siblings on ref schemas', () {
+      final output = LlamaStructuredOutput<Map<String, dynamic>>.jsonSchema(
+        schema: const {
+          'type': 'object',
+          'properties': {
+            'contact': {
+              r'$ref': '#/\$defs/contact',
+              'title': 'Contact',
+              'description': 'Reference annotations are ignored.',
+              r'$comment': 'Safe alongside ref.',
+            },
+          },
+          'required': ['contact'],
+          r'$defs': {
+            'contact': {
+              'type': 'object',
+              'properties': {
+                'name': {'type': 'string'},
+              },
+              'required': ['name'],
+              'additionalProperties': false,
+            },
+          },
+        },
+        decoder: (json) => json,
+      );
+
+      expect(output.parse('{"contact":{"name":"Ada Lovelace"}}'), {
+        'contact': {'name': 'Ada Lovelace'},
+      });
+    });
+
+    test('rejects validation siblings on ref schemas', () {
+      expect(
+        () => LlamaStructuredOutput<Map<String, dynamic>>.jsonSchema(
+          schema: const {
+            'type': 'object',
+            'properties': {
+              'contact': {r'$ref': '#/\$defs/contact', 'type': 'object'},
+            },
+            r'$defs': {
+              'contact': {'type': 'object'},
+            },
+          },
+          decoder: (json) => json,
+        ),
+        throwsA(
+          isA<LlamaUnsupportedException>().having(
+            (error) => error.message,
+            'message',
+            contains(r'$ref cannot be combined'),
+          ),
+        ),
+      );
+    });
+
     test('rejects schema that cannot be represented safely', () {
       expect(
         () => LlamaStructuredOutput<List<Object?>>.jsonValueSchema(
diff --git a/website/docs/guides/chat-template-and-parsing.md b/website/docs/guides/chat-template-and-parsing.md
index 2ac20ca3..e0100fc4 100644
--- a/website/docs/guides/chat-template-and-parsing.md
+++ b/website/docs/guides/chat-template-and-parsing.md
@@ -56,7 +56,7 @@ Application code can build those maps with `LlamaStructuredOutput`, or call
 `engine.createStructuredJson(...)` to collect streamed content and validate the
 final JSON before decoding it into an app type. Streaming UI code can still pass
 `responseFormat: output.responseFormat` and finish with
-`parseStructuredJson(output)` after the stream completes.
+`await stream.parseStructuredJson(output)` after the stream completes.
 Grammar-capable backends use those hints for strict output. LiteRT-LM native
 and web fail early for strict response formats because the current public
 runtime APIs do not expose JSON-schema/Lark constraint wiring.
diff --git a/website/docs/guides/generation-and-streaming.md b/website/docs/guides/generation-and-streaming.md
index 6e9e879a..35ae3dc1 100644
--- a/website/docs/guides/generation-and-streaming.md
+++ b/website/docs/guides/generation-and-streaming.md
@@ -141,10 +141,10 @@ final classification = await engine.createStructuredJson(
 );
 ```
 
-For live rendering, call `engine.create(..., responseFormat:
-output.responseFormat)` and then finalize the stream with
-`parseStructuredJson(output)`. Validation is a final-output step because partial
-stream chunks are often not valid JSON yet.
+For live rendering, keep the returned stream, call `engine.create(...,
+responseFormat: output.responseFormat)`, and then finalize it with
+`await stream.parseStructuredJson(output)`. Validation is a final-output step
+because partial stream chunks are often not valid JSON yet.
 
 Supported schema features match the built-in JSON-schema-to-GBNF subset:
 primitive types, objects with `properties`, `required`, and