diff --git a/.gitignore b/.gitignore
index fdbd9f7c..c5b9a910 100644
--- a/.gitignore
+++ b/.gitignore
@@ -181,6 +181,14 @@ packages/
 cookbooks/cosmos3/generator/audiovisual/outputs/
 outputs/
 
+# Cosmos3 finetune cookbook runtime artifacts (downloads, converted ckpts, runs)
+cookbooks/cosmos3/generator/audiovisual/finetune/data/
+cookbooks/cosmos3/generator/audiovisual/finetune/checkpoints/
+cookbooks/cosmos3/generator/audiovisual/finetune/outputs/
+cookbooks/cosmos3/reasoner/finetune/data/
+cookbooks/cosmos3/reasoner/finetune/checkpoints/
+cookbooks/cosmos3/reasoner/finetune/outputs/
+
 # Streamlit
 .streamlit/
 
diff --git a/README.md b/README.md
index 6d3e51eb..c4fd1aea 100644
--- a/README.md
+++ b/README.md
@@ -646,9 +646,14 @@ Cosmos 3 latency and serving numbers live in [`inference_benchmarks.md`](inferen
 
 ### Finetune
 
-Finetune Cosmos 3 with the [Cosmos Framework](https://github.com/NVIDIA/cosmos-framework), NVIDIA's end-to-end Physical AI framework for training and serving world models. It provides runnable setup, inference, omni-model training, and evaluation workflows for the Generator and Reasoner surfaces, with reference recipes for vision, action, and reasoning post-training.
+Post-train Cosmos 3 on your own data with the supervised fine-tuning (SFT) cookbooks below. Each recipe is a self-contained launch script: a single `bash launch_sft_<recipe>.sh` downloads the data, prepares the base checkpoint, and runs 8×H100 training.
 
-See the [Cosmos Framework training guide](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/training.md) for the full post-training workflow, including data preparation, configuration, and launch commands.
+| Cookbook | Surface | Recipes |
+| --- | --- | --- |
+| [Vision generator SFT](cookbooks/cosmos3/generator/audiovisual/finetune/README.md) | Generator | Full SFT (Cosmos3-Nano) and LoRA SFT (Cosmos3-Super) on captioned video |
+| [Reasoner SFT](cookbooks/cosmos3/reasoner/finetune/README.md) | Reasoner | Alignment SFT on LLaVA-OneVision and physical-plausibility SFT on VideoPhy-2 |
+
+These cookbooks run on the [Cosmos Framework](https://github.com/NVIDIA/cosmos-framework), NVIDIA's end-to-end Physical AI framework for training and serving world models. For the full post-training reference — every config field, raw `torchrun`, resuming, and advanced parallelism — see the [Cosmos Framework training guide](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/training.md).
 
 ### Limitations
 
diff --git a/cookbooks/cosmos3/generator/audiovisual/finetune/README.md b/cookbooks/cosmos3/generator/audiovisual/finetune/README.md
new file mode 100644
index 00000000..77dd1a04
--- /dev/null
+++ b/cookbooks/cosmos3/generator/audiovisual/finetune/README.md
@@ -0,0 +1,58 @@
+# Cosmos3 Vision Generator Fine-Tuning (SFT)
+
+Supervised fine-tuning (SFT) of the Cosmos3 video generator on your own captioned video data. Tested on 8×H100 (80 GB).
+
+| Recipe | Launch shell | Base model | Dataset |
+| --- | --- | --- | --- |
+| Vision SFT (full) | `launch_sft_vision_nano.sh` | Cosmos3-Nano | [BridgeData2-Subset-Synthetic-Captions](https://huggingface.co/datasets/nvidia/BridgeData2-Subset-Synthetic-Captions) |
+| Vision SFT (LoRA) | `launch_sft_vision_super.sh` | Cosmos3-Super | same as above |
+
+Both recipes train on structured-JSON captions (`caption_json`, the model's native prompt format), so training stays aligned with inference.
+
+## Prerequisites
+
+1. **Install the framework.** These recipes drive `cosmos_framework.scripts.train`, so install a cosmos-framework checkout first — follow the shared [Cosmos Framework setup](../../../README.md#cosmos-framework) (clone into `packages/cosmos3`, then `uv sync --all-extras --group=cu130-train`; use `cu128-train` on a CUDA 12.x driver).
+2. **Recommended container.** For a curated CUDA + PyTorch base, NVIDIA recommends starting from the NGC PyTorch container **`nvcr.io/nvidia/pytorch:25.09-py3`** (CUDA 13; use **`:25.06-py3`** for a CUDA 12.8 driver). See the framework [setup guide](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/setup.md#recommended-base-image).
+3. **Activate** the framework venv so `cosmos_framework` is importable: `source <path-to>/packages/cosmos3/.venv/bin/activate`.
+4. **Hugging Face access.** Some assets are license-gated — accept terms on the dataset/model pages and authenticate once with `uvx hf@latest auth login` (or export `HF_TOKEN`).
+5. **Run from this directory** (`cookbooks/cosmos3/generator/audiovisual/finetune/`). Downloads, converted checkpoints, and run outputs default to `data/`, `checkpoints/`, and `outputs/` here (all git-ignored).
+
+## Quick start
+
+Each launcher is a complete recipe — run it from this folder and it downloads the dataset, fetches the Wan2.2 VAE, converts the base checkpoint, then runs 8-GPU training (the download/convert steps are skipped if their outputs already exist):
+
+```shell
+bash launch_sft_vision_nano.sh      # full SFT on Cosmos3-Nano
+# or
+bash launch_sft_vision_super.sh     # LoRA SFT on Cosmos3-Super
+```
+
+Paths are fixed at the top of each script (under this git-ignored folder) — edit them there to put data or checkpoints on another filesystem.
+
+## Outputs
+
+Training writes to `outputs/train/<project>/<group>/<name>/`:
+
+- `checkpoints/iter_<N>/` — DCP checkpoint (model / optim / scheduler / trainer state); `checkpoints/latest_checkpoint.txt` names the newest.
+- `config.yaml`, launch metadata, logs, and one directory per registered callback.
+
+## Export to Hugging Face safetensors
+
+```shell
+RUN_DIR=outputs/train/<project>/<group>/<name>
+CKPT=$RUN_DIR/checkpoints/$(cat "$RUN_DIR/checkpoints/latest_checkpoint.txt")
+python -m cosmos_framework.scripts.export_model \
+    --checkpoint-path "$CKPT" --config-file "$RUN_DIR/config.yaml" -o "$RUN_DIR/model"
+```
+
+Use the exported `$RUN_DIR/model` with the [audiovisual inference cookbook](../README.md).
+
+## Advanced configuration
+
+These recipes are intentionally minimal. For the full post-training reference — raw `torchrun`, resuming, every TOML field, parallelism / LoRA / EMA knobs, and the VFM↔VLM remap — see the canonical framework docs:
+
+- [Post-Training (SFT) guide](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/training.md)
+- [SFT structured-TOML config reference](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/sft_config.md)
+- [JSONL dataset format](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/dataset_jsonl.md) · [environment variables](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/environment_variables.md) · [FAQ / OOM during SFT](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/faq.md)
+
+> SFT here is a multi-GPU `torchrun` job, so these cookbooks ship as launch scripts + this README rather than a one-click notebook.
diff --git a/cookbooks/cosmos3/generator/audiovisual/finetune/launch_sft_vision_nano.sh b/cookbooks/cosmos3/generator/audiovisual/finetune/launch_sft_vision_nano.sh
new file mode 100644
index 00000000..52b3d9f2
--- /dev/null
+++ b/cookbooks/cosmos3/generator/audiovisual/finetune/launch_sft_vision_nano.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: OpenMDW-1.1
+
+# Complete recipe: Vision SFT on Cosmos3-Nano (T2V / I2V / V2V, 8x H100).
+# Run from this folder with the cosmos-framework venv active (see README):
+#   bash launch_sft_vision_nano.sh
+# It downloads the data, prepares the base checkpoint, and trains — in order.
+# Paths are fixed under this (git-ignored) folder; edit them below to relocate.
+
+set -euo pipefail
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+DATASET_DIR="$PWD/data/BridgeData2-Subset-Synthetic-Captions"
+CHECKPOINT_DIR="$PWD/checkpoints/Cosmos3-Nano"
+VAE_PATH="$PWD/checkpoints/wan22_vae/Wan2.2_VAE.pth"
+
+# 1. Download the SFT dataset (skipped if present; license-gated — accept terms + 'uvx hf@latest auth login').
+if [[ ! -f "$DATASET_DIR/sft_dataset_bridge/train/video_dataset_file.jsonl" ]]; then
+    uvx hf@latest download --repo-type dataset nvidia/BridgeData2-Subset-Synthetic-Captions \
+        --revision 40d018ac1c1a2a4b9734f17fdb21f3d933c49a01 --local-dir "$DATASET_DIR"
+fi
+
+# 2. Download the Wan2.2 VAE (skipped if present).
+if [[ ! -f "$VAE_PATH" ]]; then
+    uvx hf@latest download Wan-AI/Wan2.2-TI2V-5B Wan2.2_VAE.pth --local-dir "$(dirname "$VAE_PATH")"
+fi
+
+# 3. Convert the base checkpoint to DCP (skipped if present).
+if [[ ! -d "$CHECKPOINT_DIR" ]]; then
+    python -m cosmos_framework.scripts.convert_model_to_dcp -o "$CHECKPOINT_DIR" --checkpoint-path Cosmos3-Nano
+fi
+
+# 4. Train (8-GPU FSDP). The TOML reads these three paths from the environment.
+export DATASET_PATH="$DATASET_DIR/sft_dataset_bridge"
+export BASE_CHECKPOINT_PATH="$CHECKPOINT_DIR"
+export WAN_VAE_PATH="$VAE_PATH"
+IMAGINAIRE_OUTPUT_ROOT="$PWD/outputs/train" torchrun --nproc_per_node=8 \
+    -m cosmos_framework.scripts.train --sft-toml="toml/sft_config/vision_sft_nano.toml"
diff --git a/cookbooks/cosmos3/generator/audiovisual/finetune/launch_sft_vision_super.sh b/cookbooks/cosmos3/generator/audiovisual/finetune/launch_sft_vision_super.sh
new file mode 100644
index 00000000..e4dd114d
--- /dev/null
+++ b/cookbooks/cosmos3/generator/audiovisual/finetune/launch_sft_vision_super.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: OpenMDW-1.1
+
+# Complete recipe: Vision LoRA SFT on Cosmos3-Super (T2V / I2V / V2V, 8x H100).
+# Run from this folder with the cosmos-framework venv active (see README):
+#   bash launch_sft_vision_super.sh
+# It downloads the data, prepares the base checkpoint, and trains — in order.
+# Paths are fixed under this (git-ignored) folder; edit them below to relocate.
+
+set -euo pipefail
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+DATASET_DIR="$PWD/data/BridgeData2-Subset-Synthetic-Captions"
+CHECKPOINT_DIR="$PWD/checkpoints/Cosmos3-Super"
+VAE_PATH="$PWD/checkpoints/wan22_vae/Wan2.2_VAE.pth"
+
+# 1. Download the SFT dataset (skipped if present; license-gated — accept terms + 'uvx hf@latest auth login').
+if [[ ! -f "$DATASET_DIR/sft_dataset_bridge/train/video_dataset_file.jsonl" ]]; then
+    uvx hf@latest download --repo-type dataset nvidia/BridgeData2-Subset-Synthetic-Captions \
+        --revision 40d018ac1c1a2a4b9734f17fdb21f3d933c49a01 --local-dir "$DATASET_DIR"
+fi
+
+# 2. Download the Wan2.2 VAE (skipped if present).
+if [[ ! -f "$VAE_PATH" ]]; then
+    uvx hf@latest download Wan-AI/Wan2.2-TI2V-5B Wan2.2_VAE.pth --local-dir "$(dirname "$VAE_PATH")"
+fi
+
+# 3. Convert the base checkpoint to DCP (skipped if present).
+if [[ ! -d "$CHECKPOINT_DIR" ]]; then
+    python -m cosmos_framework.scripts.convert_model_to_dcp -o "$CHECKPOINT_DIR" --checkpoint-path Cosmos3-Super
+fi
+
+# 4. Train (8-GPU FSDP, CP=2 / DP=4). The 32B backbone needs the host CUDA libs
+#    cleared and the expandable_segments allocator to fit without OOM.
+export LD_LIBRARY_PATH=""
+export PYTORCH_ALLOC_CONF="expandable_segments:True"
+export DATASET_PATH="$DATASET_DIR/sft_dataset_bridge"
+export BASE_CHECKPOINT_PATH="$CHECKPOINT_DIR"
+export WAN_VAE_PATH="$VAE_PATH"
+IMAGINAIRE_OUTPUT_ROOT="$PWD/outputs/train" torchrun --nproc_per_node=8 \
+    -m cosmos_framework.scripts.train --sft-toml="toml/sft_config/vision_sft_super.toml"
diff --git a/cookbooks/cosmos3/generator/audiovisual/finetune/toml/sft_config/vision_sft_nano.toml b/cookbooks/cosmos3/generator/audiovisual/finetune/toml/sft_config/vision_sft_nano.toml
new file mode 100644
index 00000000..dbb192dc
--- /dev/null
+++ b/cookbooks/cosmos3/generator/audiovisual/finetune/toml/sft_config/vision_sft_nano.toml
@@ -0,0 +1,91 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: OpenMDW-1.1
+
+# vision_sft_nano — T2V / I2V / V2V vision-only SFT (Qwen3-VL-8B / nano)
+# Consumed by cosmos_framework.configs.toml_config.sft_config.load_experiment_from_toml.
+# Uses PackingDataLoader (no dataloader_train.seed slot — keep it omitted here).
+
+[job]
+task         = "vfm"
+experiment   = "vision_sft_nano"
+project      = "cosmos3"
+group        = "sft"
+name         = "vision_sft_nano"
+wandb_mode   = "disabled"
+
+[model]
+max_num_tokens_after_packing = 45056
+joint_attn_implementation    = "two_way"
+precision                    = "bfloat16"                # was [model.parallelism].precision
+
+[model.ema]
+enabled         = true
+rate            = 0.1
+iteration_shift = 0
+
+[model.parallelism]
+data_parallel_shard_degree      = -1                     # -1 = auto from WORLD_SIZE (matches legacy)
+data_parallel_replicate_degree  = 1
+
+[model.compile]
+enabled                         = true                   # was [model.parallelism].use_torch_compile
+compile_dynamic                 = true
+
+[model.activation_checkpointing]
+mode                = "full"
+save_ops_regex      = ["fmha"]
+preserve_rng_state  = true
+determinism_check   = "default"
+
+[model.tokenizer]
+vae_path = "${oc.env:WAN_VAE_PATH}"
+
+[optimizer]
+betas         = [0.9, 0.95]
+eps           = 1.0e-6
+fused         = true
+keys_to_select = [
+    "moe_gen",
+    "time_embedder",
+    "vae2llm",
+    "llm2vae",
+]
+lr            = 2.0e-5
+weight_decay  = 0                                        # int matches legacy YAML repr
+# lr_multipliers intentionally empty for vision SFT (Hydra default {} stands).
+
+[scheduler]
+cycle_lengths      = [1000]
+f_max              = [1.0]
+f_min              = [0.0]
+f_start            = [0.0]
+verbosity_interval = 0
+warm_up_steps      = [50]
+
+[trainer]
+distributed_parallelism = "fsdp"
+grad_accum_iter         = 2
+logging_iter            = 1
+max_iter                = 500
+
+[trainer.callbacks.compile_tokenizer]
+compile_after_iterations = 3
+enabled                  = false
+# warmup_resolutions omitted (None at experiment level)
+
+[trainer.callbacks.grad_clip]
+clip_norm    = 0.1
+force_finite = true
+
+[checkpoint]
+keys_to_skip_loading = ["net_ema."]
+load_path            = "${oc.env:BASE_CHECKPOINT_PATH}"
+save_iter            = 100
+
+[dataloader_train]
+max_sequence_length = 45056
+# Per-caption token cap before truncation. Structured-JSON captions run longer than
+# dense prose (measured max ~1790 tokens), so keep headroom; raise it for longer captions.
+max_caption_tokens = 2048
+# max_samples_per_batch omitted (None — PackingDataLoader doesn't cap by sample count)
+# seed omitted — PackingDataLoader has no seed ctor kwarg
diff --git a/cookbooks/cosmos3/generator/audiovisual/finetune/toml/sft_config/vision_sft_super.toml b/cookbooks/cosmos3/generator/audiovisual/finetune/toml/sft_config/vision_sft_super.toml
new file mode 100644
index 00000000..06a1574a
--- /dev/null
+++ b/cookbooks/cosmos3/generator/audiovisual/finetune/toml/sft_config/vision_sft_super.toml
@@ -0,0 +1,92 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: OpenMDW-1.1
+
+# vision_sft_super — LoRA-only T2V/I2V/V2V SFT on Qwen3-VL-32B (super tier).
+# Consumed by cosmos_framework.configs.toml_config.sft_config.load_experiment_from_toml.
+# Uses PackingDataLoader (no dataloader_train.seed slot — keep it omitted).
+
+[job]
+task         = "vfm"
+experiment   = "vision_sft_super"
+project      = "cosmos3"
+group        = "sft"
+name         = "vision_sft_super"
+wandb_mode   = "disabled"
+
+[model]
+max_num_tokens_after_packing = 45056
+joint_attn_implementation    = "two_way"
+lora_enabled                 = true
+lora_rank                    = 16
+lora_alpha                   = 32
+lora_target_modules          = "q_proj_moe_gen,k_proj_moe_gen,v_proj_moe_gen,o_proj_moe_gen"
+precision                    = "bfloat16"                # was [model.parallelism].precision
+
+[model.ema]
+enabled         = false                                  # super uses LoRA, no EMA
+rate            = 0.1
+iteration_shift = 0
+
+[model.parallelism]
+data_parallel_shard_degree      = -1                     # -1 = auto from WORLD_SIZE (matches legacy)
+data_parallel_replicate_degree  = 1
+context_parallel_shard_degree   = 2                      # super uses CP=2
+cfg_parallel_shard_degree       = 1
+
+[model.compile]
+enabled                         = false                  # super disables compile (was use_torch_compile)
+compile_dynamic                 = true
+
+[model.activation_checkpointing]
+mode                = "full"
+save_ops_regex      = ["fmha"]
+preserve_rng_state  = true
+determinism_check   = "default"
+
+[model.tokenizer]
+vae_path = "${oc.env:WAN_VAE_PATH}"
+
+[optimizer]
+betas          = [0.9, 0.95]
+eps            = 1.0e-6
+fused          = true
+keys_to_select = ["lora_"]                               # train LoRA adapters only
+lr             = 5.0e-4
+weight_decay   = 0                                       # int matches legacy YAML repr
+# lr_multipliers intentionally empty.
+
+[scheduler]
+cycle_lengths      = [1000]
+f_max              = [1.0]
+f_min              = [0.0]
+f_start            = [0.0]
+verbosity_interval = 0
+warm_up_steps      = [50]
+
+[trainer]
+distributed_parallelism = "fsdp"
+grad_accum_iter         = 2
+logging_iter            = 1
+max_iter                = 500
+
+[trainer.callbacks.compile_tokenizer]
+compile_after_iterations = 3
+enabled                  = false
+warmup_resolutions       = ["256", "480", "720"]
+
+[trainer.callbacks.grad_clip]
+clip_norm    = 0.1
+force_finite = true
+
+[checkpoint]
+keys_to_skip_loading = ["net_ema.", "lora_"]             # LoRA tensors freshly init
+load_path            = "${oc.env:BASE_CHECKPOINT_PATH}"
+save_iter            = 100
+
+[dataloader_train]
+max_sequence_length = 45056
+# Per-caption token cap before truncation. Structured-JSON captions run longer than
+# dense prose (measured max ~1790 tokens), so keep headroom; raise it for longer captions.
+max_caption_tokens = 2048
+# max_samples_per_batch omitted (None — PackingDataLoader doesn't cap by count)
+# seed omitted — PackingDataLoader has no seed ctor kwarg
diff --git a/cookbooks/cosmos3/reasoner/finetune/README.md b/cookbooks/cosmos3/reasoner/finetune/README.md
new file mode 100644
index 00000000..ff7816da
--- /dev/null
+++ b/cookbooks/cosmos3/reasoner/finetune/README.md
@@ -0,0 +1,58 @@
+# Cosmos3 Reasoner Fine-Tuning (SFT)
+
+Supervised fine-tuning (SFT) of the Cosmos3 Reasoner (VLM) on your own data. Tested on 8×H100 (80 GB).
+
+| Recipe | Launch shell | Dataset | Notes |
+| --- | --- | --- | --- |
+| Alignment SFT (LLaVA-OneVision) | `launch_sft_llava_ov.sh` | [lmms-lab/LLaVA-OneVision-Data](https://huggingface.co/datasets/lmms-lab/LLaVA-OneVision-Data) | Streams from HF; backbone fetched at startup — no local prep |
+| Physical-plausibility SFT (VideoPhy-2) | `launch_sft_videophy2_nano.sh` | [videophysics/videophy2_train](https://huggingface.co/datasets/videophysics/videophy2_train) | 1–5 plausibility scoring; dataset + checkpoint auto-prepared |
+
+Both use `[job].task = "vlm"` and bootstrap from `Qwen/Qwen3-VL-8B-Instruct` (optionally a merged Cosmos3-Nano reasoner snapshot).
+
+## Prerequisites
+
+1. **Install the framework.** These recipes drive `cosmos_framework.scripts.train`, so install a cosmos-framework checkout first — follow the shared [Cosmos Framework setup](../../README.md#cosmos-framework) (clone into `packages/cosmos3`, then `uv sync --all-extras --group=cu130-train`; use `cu128-train` on a CUDA 12.x driver).
+2. **Recommended container.** For a curated CUDA + PyTorch base, NVIDIA recommends starting from the NGC PyTorch container **`nvcr.io/nvidia/pytorch:25.09-py3`** (CUDA 13; use **`:25.06-py3`** for a CUDA 12.8 driver). See the framework [setup guide](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/setup.md#recommended-base-image).
+3. **Activate** the framework venv so `cosmos_framework` is importable: `source <path-to>/packages/cosmos3/.venv/bin/activate`.
+4. **Hugging Face access.** The Qwen3-VL backbone and datasets are fetched from HF — authenticate once with `uvx hf@latest auth login` (or export `HF_TOKEN`); accept any dataset terms first.
+5. **Run from this directory** (`cookbooks/cosmos3/reasoner/finetune/`). Any downloads, converted checkpoints, and run outputs default to `data/`, `checkpoints/`, and `outputs/` here (all git-ignored).
+
+## Quick start
+
+Each launcher is a complete recipe — just run it from this folder:
+
+```shell
+bash launch_sft_llava_ov.sh          # alignment SFT; dataset streams from HF, backbone fetched at startup
+# or
+bash launch_sft_videophy2_nano.sh    # first run materializes VideoPhy-2 + builds the merged Cosmos3-Nano VLM checkpoint, then trains
+```
+
+The VideoPhy-2 download/convert steps are skipped once their outputs exist. Paths are fixed at the top of each script (under this git-ignored folder) — edit them there to relocate data or checkpoints.
+
+## Outputs
+
+Training writes to `outputs/train/<project>/<group>/<name>/`:
+
+- `checkpoints/iter_<N>/` — DCP checkpoint (model / optim / scheduler / trainer state); `checkpoints/latest_checkpoint.txt` names the newest.
+- `config.yaml`, launch metadata, logs, and one directory per registered callback.
+
+## Export to Hugging Face safetensors
+
+```shell
+RUN_DIR=outputs/train/<project>/<group>/<name>
+CKPT=$RUN_DIR/checkpoints/$(cat "$RUN_DIR/checkpoints/latest_checkpoint.txt")
+python -m cosmos_framework.scripts.export_model \
+    --checkpoint-path "$CKPT" --config-file "$RUN_DIR/config.yaml" -o "$RUN_DIR/model"
+```
+
+Use the exported `$RUN_DIR/model` with the [reasoner inference cookbook](../README.md).
+
+## Advanced configuration
+
+These recipes are intentionally minimal. For the full post-training reference — raw `torchrun`, resuming, every TOML field, and advanced parallelism — see the canonical framework docs:
+
+- [Post-Training (SFT) guide](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/training.md)
+- [SFT structured-TOML config reference](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/sft_config.md)
+- [JSONL dataset format](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/dataset_jsonl.md) · [environment variables](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/environment_variables.md) · [FAQ / OOM during SFT](https://github.com/NVIDIA/cosmos-framework/blob/main/docs/faq.md)
+
+> SFT here is a multi-GPU `torchrun` job, so these cookbooks ship as launch scripts + this README rather than a one-click notebook.
diff --git a/cookbooks/cosmos3/reasoner/finetune/launch_sft_llava_ov.sh b/cookbooks/cosmos3/reasoner/finetune/launch_sft_llava_ov.sh
new file mode 100644
index 00000000..844f5a3b
--- /dev/null
+++ b/cookbooks/cosmos3/reasoner/finetune/launch_sft_llava_ov.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: OpenMDW-1.1
+
+# Complete recipe: Reasoner alignment SFT on LLaVA-OneVision (8x H100).
+# Run from this folder with the cosmos-framework venv active (see README):
+#   bash launch_sft_llava_ov.sh
+# The dataset streams from HuggingFace and the Qwen3-VL-8B-Instruct backbone is
+# fetched at startup, so there's nothing to download first — this just trains.
+
+set -euo pipefail
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+# Train (8-GPU FSDP).
+IMAGINAIRE_OUTPUT_ROOT="$PWD/outputs/train" torchrun --nproc_per_node=8 \
+    -m cosmos_framework.scripts.train --sft-toml="toml/sft_config/llava_ov.toml"
diff --git a/cookbooks/cosmos3/reasoner/finetune/launch_sft_videophy2_nano.sh b/cookbooks/cosmos3/reasoner/finetune/launch_sft_videophy2_nano.sh
new file mode 100644
index 00000000..30648a8a
--- /dev/null
+++ b/cookbooks/cosmos3/reasoner/finetune/launch_sft_videophy2_nano.sh
@@ -0,0 +1,32 @@
+#!/usr/bin/env bash
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: OpenMDW-1.1
+
+# Complete recipe: Reasoner physical-plausibility SFT on VideoPhy-2 (8x H100).
+# Run from this folder with the cosmos-framework venv active (see README):
+#   bash launch_sft_videophy2_nano.sh
+# It materializes the dataset, builds the merged Cosmos3-Nano VLM checkpoint, and
+# trains — in order. Paths are fixed under this (git-ignored) folder.
+
+set -euo pipefail
+cd "$(dirname "${BASH_SOURCE[0]}")"
+
+VIDEOPHYSICS_ROOT="$PWD/data/videophysics"
+VLM_CHECKPOINT="$PWD/checkpoints/Cosmos3-Nano-VLM"
+
+# 1. Materialize the VideoPhy-2 dataset (skipped if present).
+if [[ ! -d "$VIDEOPHYSICS_ROOT/videophy2_train" ]]; then
+    python -m cosmos_framework.scripts.vlm.prepare_videophy2_from_hf --out_root "$VIDEOPHYSICS_ROOT" --split both
+fi
+
+# 2. Merge Cosmos3-Nano LM onto the Qwen3-VL-8B-Instruct visual tower (skipped if present).
+if [[ ! -d "$VLM_CHECKPOINT" ]]; then
+    python -m cosmos_framework.scripts.convert_model_to_vlm_safetensors --checkpoint-path Cosmos3-Nano -o "$VLM_CHECKPOINT"
+fi
+
+# 3. Train (8-GPU FSDP). VIDEOPHYSICS_ROOT is read from the environment; the
+#    merged checkpoint is supplied as a config override after `--`.
+export VIDEOPHYSICS_ROOT
+IMAGINAIRE_OUTPUT_ROOT="$PWD/outputs/train" torchrun --nproc_per_node=8 \
+    -m cosmos_framework.scripts.train --sft-toml="toml/sft_config/videophy2_sft_nano.toml" \
+    -- model.config.policy.backbone.safetensors_path="$VLM_CHECKPOINT"
diff --git a/cookbooks/cosmos3/reasoner/finetune/toml/sft_config/llava_ov.toml b/cookbooks/cosmos3/reasoner/finetune/toml/sft_config/llava_ov.toml
new file mode 100644
index 00000000..41fe3502
--- /dev/null
+++ b/cookbooks/cosmos3/reasoner/finetune/toml/sft_config/llava_ov.toml
@@ -0,0 +1,108 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: OpenMDW-1.1
+
+# pre_exp012_llava_ov — VLM training on lmms-lab/LLaVA-OneVision-Data
+# via CosmosDataLoader. Base config = cosmos_framework/configs/base/vlm/config.py
+# (selected by [job].task="vlm").
+#
+# One knob that the SFTExperimentConfig dataclass does NOT model — supply
+# it as a CLI extra override at launch time:
+#
+#   data_setting.max_tokens=<int, drives both max_seq_len and dataloader.max_tokens>
+#
+# (The backbone is now modeled — see [model.backbone] below.)
+#
+# Example launch:
+#   torchrun --nproc_per_node=4 -m cosmos_framework.scripts.train \
+#       --sft-toml toml/sft_config/llava_ov.toml -- \
+#       data_setting.max_tokens=16000
+#
+# Per-task remap (see _PATH_REMAPS["vlm"]):
+#   model.parallelism.*            -> model.config.parallelism.*
+#   model.compile.*                -> model.config.compile.*
+#   model.activation_checkpointing.* -> model.config.activation_checkpointing.*
+#   model.precision                -> model.config.precision
+#   model.attn_implementation      -> model.config.policy.attn_implementation
+#   model.backbone.*               -> model.config.policy.backbone.*
+#   model.ema.*                    -> model.config.ema.*
+#   model.{max_num_tokens_after_packing, joint_attn_implementation, lora_*,
+#          tokenizer.*} and dataloader_train.{max_sequence_length, seed} -> SKIPPED
+
+[job]
+task         = "vlm"
+experiment   = "pre_exp012_llava_ov"
+project      = "cosmos3"                                 # matches legacy
+group        = "vlm_llava_ov_demo"
+name         = "pre_exp012_llava_ov"
+wandb_mode   = "disabled"
+
+[model]
+# VLM-only attention impl (PolicyConfig.attn_implementation).
+attn_implementation = "cosmos"     # "cosmos" | "flash_attention_2" | "sdpa" | "eager"
+precision           = "bfloat16"   # was [model.parallelism].precision
+
+[model.backbone]
+model_name = "Qwen/Qwen3-VL-8B-Instruct"   # → model.config.policy.backbone.model_name (VLM remap)
+
+[model.ema]
+enabled         = false
+rate            = 0.1
+iteration_shift = 0
+
+[model.parallelism]
+data_parallel_shard_degree      = 8                  # matches legacy dp_shard_size=8
+data_parallel_replicate_degree  = -1                 # matches legacy dp_replicate_size=-1
+context_parallel_shard_degree   = 1
+cfg_parallel_shard_degree       = 1
+
+[model.compile]
+enabled                         = false              # was [model.parallelism].use_torch_compile
+compile_dynamic                 = true
+
+[model.activation_checkpointing]
+mode                = "full"
+save_ops_regex      = ["fmha"]
+preserve_rng_state  = true
+determinism_check   = "default"
+
+[optimizer]
+betas         = [0.9, 0.95]
+eps           = 1.0e-8                              # skipped for VLM by _PATH_REMAPS
+fused         = true
+lr            = 1.0e-5                              # matches legacy
+weight_decay  = 0.1                                 # matches legacy
+# keys_to_select / lr_multipliers omitted — VLM Trainer defaults apply.
+
+[scheduler]
+cycle_lengths      = [500]                          # matches legacy (VLM_LAMBDACOSINE_KWARGS uses ${trainer.max_iter})
+f_max              = [1.0]
+f_min              = [0.5]                          # matches legacy
+f_start            = [0.05]                         # matches legacy
+verbosity_interval = 0                              # skipped for VLM by _PATH_REMAPS
+warm_up_steps      = [1000]                         # matches legacy
+
+[trainer]
+distributed_parallelism = "fsdp"
+grad_accum_iter         = 1
+logging_iter            = 1
+max_iter                = 500                     # matches legacy
+
+[trainer.callbacks.compile_tokenizer]
+compile_after_iterations = 3
+enabled                  = false
+
+[trainer.callbacks.grad_clip]
+clip_norm    = 1.0
+force_finite = false                                # matches VLM default in cosmos_framework/configs/base/vlm/defaults/callbacks.py:55
+
+[checkpoint]
+keys_to_skip_loading = []
+load_path            = "???"                      # MISSING sentinel; skipped by build_hydra_overrides — supply at runtime
+save_iter            = 100
+
+[dataloader_train]
+# Routed by PATH_REMAPS["vlm"] onto the CosmosDataLoader's nested PoolPackingBatcher:
+#   max_samples_per_batch -> dataloader_train.batcher.max_batch_size
+#   max_sequence_length   -> dataloader_train.batcher.max_tokens
+max_samples_per_batch = 1
+max_sequence_length   = 16000
diff --git a/cookbooks/cosmos3/reasoner/finetune/toml/sft_config/videophy2_sft_nano.toml b/cookbooks/cosmos3/reasoner/finetune/toml/sft_config/videophy2_sft_nano.toml
new file mode 100644
index 00000000..fa1ae613
--- /dev/null
+++ b/cookbooks/cosmos3/reasoner/finetune/toml/sft_config/videophy2_sft_nano.toml
@@ -0,0 +1,91 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: OpenMDW-1.1
+
+# videophy2_sft_nano — VLM dialog SFT on VideoPhy-2 via CosmosDataLoader.
+# Base config = cosmos_framework/configs/base/vlm/config.py (selected by [job].task="vlm").
+#
+# Dataset prep:
+#   python -m cosmos_framework.scripts.vlm.prepare_videophy2_from_hf \
+#       --out_root $VIDEOPHYSICS_ROOT --split train  # and again with --split val
+#
+# Required env at launch: VIDEOPHYSICS_ROOT (read by the experiment Python).
+#
+# Example launch:
+#   bash launch_sft_videophy2_nano.sh
+
+[job]
+task         = "vlm"
+experiment   = "videophy2_sft_nano"
+project      = "cosmos3"
+group        = "vlm_videophy2_sft"
+name         = "videophy2_sft_nano"
+wandb_mode   = "disabled"
+
+[model]
+attn_implementation = "cosmos"
+precision           = "bfloat16"                         # was [model.parallelism].precision
+
+[model.backbone]
+model_name = "Qwen/Qwen3-VL-8B-Instruct"
+
+[model.ema]
+enabled         = false
+rate            = 0.1
+iteration_shift = 0
+
+[model.parallelism]
+data_parallel_shard_degree      = 8
+data_parallel_replicate_degree  = -1
+context_parallel_shard_degree   = 1
+cfg_parallel_shard_degree       = 1
+
+[model.compile]
+enabled                         = false                  # was [model.parallelism].use_torch_compile
+compile_dynamic                 = true
+
+[model.activation_checkpointing]
+mode                = "full"
+save_ops_regex      = ["fmha"]
+preserve_rng_state  = true
+determinism_check   = "default"
+
+[optimizer]
+betas         = [0.9, 0.95]
+eps           = 1.0e-8
+fused         = true
+lr            = 1.0e-6
+weight_decay  = 0.1
+
+[scheduler]
+cycle_lengths      = [50]
+f_max              = [1.0]
+f_min              = [0.1]
+f_start            = [0.05]
+verbosity_interval = 0
+warm_up_steps      = [5]
+
+[trainer]
+distributed_parallelism = "fsdp"
+grad_accum_iter         = 8
+logging_iter            = 1
+max_iter                = 50
+
+[trainer.callbacks.compile_tokenizer]
+compile_after_iterations = 3
+enabled                  = false
+
+[trainer.callbacks.grad_clip]
+clip_norm    = 1.0
+force_finite = false
+
+[checkpoint]
+keys_to_skip_loading = []
+load_path            = "???"
+save_iter            = 100
+
+[dataloader_train]
+# Routed by PATH_REMAPS["vlm"] onto the CosmosDataLoader's nested PoolPackingBatcher:
+#   max_samples_per_batch -> dataloader_train.batcher.max_batch_size
+#   max_sequence_length   -> dataloader_train.batcher.max_tokens
+max_samples_per_batch = 1
+max_sequence_length   = 16000