NVIDIA · h-guo18 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026 · Apr 30, 2026
diff --git a/modelopt/recipe/config.py b/modelopt/recipe/config.py
@@ -17,18 +17,28 @@
 
 from __future__ import annotations
 
+import warnings
 from enum import Enum
 
-from pydantic import field_validator
+from pydantic import field_validator, model_validator
 
 from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField
 from modelopt.torch.quantization.config import QuantizeConfig
+from modelopt.torch.speculative.config import DFlashConfig, EagleConfig, MedusaConfig
+from modelopt.torch.speculative.plugins.hf_training_args import DataArguments as SpecDataArgs
+from modelopt.torch.speculative.plugins.hf_training_args import ModelArguments as SpecModelArgs
+from modelopt.torch.speculative.plugins.hf_training_args import (
+    TrainingArguments as SpecTrainingArgs,
+)
 
 
 class RecipeType(str, Enum):
     """List of recipe types."""
 
     PTQ = "ptq"
+    SPECULATIVE_EAGLE = "speculative_eagle"
+    SPECULATIVE_DFLASH = "speculative_dflash"
+    SPECULATIVE_MEDUSA = "speculative_medusa"
     # QAT = "qat" # Not implemented yet, will be added in the future.
 
 
@@ -72,3 +82,94 @@ class ModelOptPTQRecipe(ModelOptRecipeBase):
         description="PTQ config containing quant_cfg and algorithm.",
         validate_default=True,
     )
+
+
+class ModelOptSpeculativeRecipeBase(ModelOptRecipeBase):
+    """Base class for speculative-decoding recipes.
+
+    Unlike PTQ, speculative-decoding is a training-time optimization: the draft head is trained
+    with HF Trainer. We therefore bundle ``model`` / ``data`` / ``training`` sections into the
+    recipe so a single YAML is the full experiment spec. Each section is a typed Pydantic model
+    (see :mod:`modelopt.torch.speculative.plugins.hf_training_args`) so field typos and bad
+    values are caught at recipe-load time; HF trainer fields pass through
+    ``TrainingArguments`` via ``extra='allow'``.
+    """
+
+    model: SpecModelArgs = ModeloptField(
+        default=SpecModelArgs(),
+        title="HF model args",
+        description="ModelArguments for the base HF model to train a draft head against.",
+        validate_default=True,
+    )
+    data: SpecDataArgs = ModeloptField(
+        default=SpecDataArgs(),
+        title="HF data args",
+        description="DataArguments for the training/offline dataset.",
+        validate_default=True,
+    )
+    training: SpecTrainingArgs = ModeloptField(
+        default=SpecTrainingArgs(),
+        title="HF training args",
+        description="Speculative-decoding extensions; HF trainer fields flow through as extras.",
+        validate_default=True,
+    )
+
+
+class ModelOptEagleRecipe(ModelOptSpeculativeRecipeBase):
+    """Our config class for EAGLE speculative decoding recipes."""
+
+    recipe_type: RecipeType = RecipeType.SPECULATIVE_EAGLE
+
+    eagle: EagleConfig = ModeloptField(
+        default=EagleConfig(),
+        title="EAGLE config",
+        description="EAGLE speculative decoding configuration.",
+        validate_default=True,
+    )
+
+    @model_validator(mode="after")
+    def _derive_eagle_offline(self) -> ModelOptEagleRecipe:
+        self.eagle.eagle_offline = self.data.offline_data_path is not None
+        return self
+
+    @model_validator(mode="after")
+    def _warn_rope_vs_training_seq_len(self) -> ModelOptEagleRecipe:
+        orig_max_pos = self.eagle.eagle_export_rope_scaling.get("original_max_position_embeddings")
+        if orig_max_pos is not None and orig_max_pos != self.training.training_seq_len:
+            warnings.warn(
+                f"eagle.eagle_export_rope_scaling.original_max_position_embeddings ({orig_max_pos}) "
+                f"differs from training.training_seq_len ({self.training.training_seq_len}). "
+                f"This may affect long-context inference quality."
+            )
+        return self
+
+
+class ModelOptDFlashRecipe(ModelOptSpeculativeRecipeBase):
+    """Our config class for DFlash speculative decoding recipes."""
+
+    recipe_type: RecipeType = RecipeType.SPECULATIVE_DFLASH
+
+    dflash: DFlashConfig = ModeloptField(
+        default=DFlashConfig(),
+        title="DFlash config",
+        description="DFlash speculative decoding configuration.",
+        validate_default=True,
+    )
+
+    @model_validator(mode="after")
+    def _derive_dflash_offline(self) -> ModelOptDFlashRecipe:
+        self.dflash.dflash_offline = self.data.offline_data_path is not None
+        return self
+
+
+class ModelOptMedusaRecipe(ModelOptSpeculativeRecipeBase):
+    """Our config class for Medusa speculative decoding recipes."""
+
+    recipe_type: RecipeType = RecipeType.SPECULATIVE_MEDUSA
+
+    medusa: MedusaConfig = ModeloptField(
+        default=MedusaConfig(),
+        title="Medusa config",
+        description="Medusa speculative decoding configuration.",
+        validate_default=True,
+    )
diff --git a/modelopt/recipe/loader.py b/modelopt/recipe/loader.py
@@ -21,8 +21,17 @@
     from importlib.abc import Traversable
 from pathlib import Path
 
+from omegaconf import OmegaConf
+
 from ._config_loader import BUILTIN_RECIPES_LIB, load_config
-from .config import ModelOptPTQRecipe, ModelOptRecipeBase, RecipeType
+from .config import (
+    ModelOptDFlashRecipe,
+    ModelOptEagleRecipe,
+    ModelOptMedusaRecipe,
+    ModelOptPTQRecipe,
+    ModelOptRecipeBase,
+    RecipeType,
+)
 
 __all__ = ["load_config", "load_recipe"]
 
@@ -49,17 +58,29 @@ def _resolve_recipe_path(recipe_path: str | Path | Traversable) -> Path | Traver
     return recipe_path
 
 
-def load_recipe(recipe_path: str | Path | Traversable) -> ModelOptRecipeBase:
-    """Load a recipe from a YAML file or directory.
+def load_recipe(
+    recipe_path: str | Path | Traversable,
+    overrides: list[str] | None = None,
+) -> ModelOptRecipeBase:
+    """Load a recipe from a YAML file or directory, with optional CLI-style overrides.
 
     ``recipe_path`` can be:
 
-    * A ``.yml`` / ``.yaml`` file with ``metadata`` and ``quantize`` sections.
-      The suffix may be omitted and will be probed automatically.
-    * A directory containing ``recipe.yml`` (metadata) and ``quantize.yml``.
+    * A ``.yml`` / ``.yaml`` file with ``metadata`` and one of ``quantize`` (PTQ),
+      ``eagle`` (EAGLE speculative decoding), ``dflash`` (DFlash speculative
+      decoding) or ``medusa`` (Medusa speculative decoding) sections. The suffix
+      may be omitted and will be probed automatically.
+    * A directory containing ``recipe.yml`` (metadata) plus ``quantize.yml`` —
+      **PTQ recipes only**. Speculative-decoding recipes are always single YAML files.
 
     The path may be relative to the built-in recipes library or an absolute /
     relative filesystem path.
+
+    ``overrides`` is an optional list of ``key.path=value`` dotlist entries applied
+    on top of the YAML before Pydantic validation. Values are parsed with
+    ``yaml.safe_load`` so they get proper types (``foo.bar=true`` → bool, ``foo=1``
+    → int, ``foo=[1,2]`` → list, etc.). Only supported when *recipe_path* is a
+    single YAML file.
     """
     resolved = _resolve_recipe_path(recipe_path)
 
@@ -72,21 +93,43 @@ def load_recipe(recipe_path: str | Path | Traversable) -> ModelOptRecipeBase:
     print(f"[load_recipe] loading: {_display}")
 
     if resolved.is_file():
-        return _load_recipe_from_file(resolved)
+        return _load_recipe_from_file(resolved, overrides=overrides)
 
     if resolved.is_dir():
+        if overrides:
+            raise ValueError(
+                "overrides are not supported for directory-format recipes; "
+                "use the single-YAML-file form instead."
+            )
         return _load_recipe_from_dir(resolved)
 
     raise ValueError(f"Recipe path {recipe_path!r} is not a valid YAML file or directory.")
 
 
-def _load_recipe_from_file(recipe_file: Path | Traversable) -> ModelOptRecipeBase:
-    """Load a recipe from a YAML file.
+def _apply_dotlist(data: dict, overrides: list[str]) -> dict:
+    """Merge ``a.b.c=value`` command line overrides on top of ``data`` via OmegaConf."""
+    for entry in overrides:
+        if "=" not in entry:
+            raise ValueError(f"Invalid override (missing '='): {entry!r}")
+    merged = OmegaConf.merge(
+        OmegaConf.create(data),
+        OmegaConf.from_dotlist(list(overrides)),
+    )
+    return OmegaConf.to_container(merged, resolve=True)
+
+
+def _load_recipe_from_file(
+    recipe_file: Path | Traversable,
+    overrides: list[str] | None = None,
+) -> ModelOptRecipeBase:
+    """Load a recipe from a YAML file, optionally applying dotlist overrides.
 
     The file must contain a ``metadata`` section with at least ``recipe_type``,
-    plus a ``quant_cfg`` mapping and an optional ``algorithm`` for PTQ recipes.
+    plus the algorithm-specific section (``quantize`` / ``eagle`` / ``dflash`` / ``medusa``).
     """
     data = load_config(recipe_file)
+    if overrides:
+        data = _apply_dotlist(data, overrides)
 
     metadata = data.get("metadata", {})
     recipe_type = metadata.get("recipe_type")
@@ -101,6 +144,36 @@ def _load_recipe_from_file(recipe_file: Path | Traversable) -> ModelOptRecipeBas
             description=metadata.get("description", "PTQ recipe."),
             quantize=data["quantize"],
         )
+    if recipe_type == RecipeType.SPECULATIVE_EAGLE:
+        if "eagle" not in data:
+            raise ValueError(f"EAGLE recipe file {recipe_file} must contain 'eagle'.")
+        return ModelOptEagleRecipe(
+            description=metadata.get("description", "EAGLE speculative decoding recipe."),
+            model=data.get("model") or {},
+            data=data.get("data") or {},
+            training=data.get("training") or {},
+            eagle=data["eagle"],
+        )
+    if recipe_type == RecipeType.SPECULATIVE_DFLASH:
+        if "dflash" not in data:
+            raise ValueError(f"DFlash recipe file {recipe_file} must contain 'dflash'.")
+        return ModelOptDFlashRecipe(
+            description=metadata.get("description", "DFlash speculative decoding recipe."),
+            model=data.get("model") or {},
+            data=data.get("data") or {},
+            training=data.get("training") or {},
+            dflash=data["dflash"],
+        )
+    if recipe_type == RecipeType.SPECULATIVE_MEDUSA:
+        if "medusa" not in data:
+            raise ValueError(f"Medusa recipe file {recipe_file} must contain 'medusa'.")
+        return ModelOptMedusaRecipe(
+            description=metadata.get("description", "Medusa speculative decoding recipe."),
+            model=data.get("model") or {},
+            data=data.get("data") or {},
+            training=data.get("training") or {},
+            medusa=data["medusa"],
+        )
     raise ValueError(f"Unsupported recipe type: {recipe_type!r}")
 
 

@@ -15,11 +15,9 @@
 
 """Configurations for speculative decoding modes."""
 
-import warnings
 from copy import deepcopy
-from typing import Any
 
-from pydantic import ValidationInfo, model_validator
+from pydantic import model_validator
 
 from modelopt.torch.opt.config import ModeloptBaseConfig, ModeloptField
 
@@ -71,7 +69,7 @@ class DFlashConfig(ModeloptBaseConfig):
         default=False,
         description=(
             "Whether to use detached DFlash (offline training from pre-computed hidden states). "
-            "Auto-derived from data_args.offline_data_path during validation — not user-configurable."
+            "Derived by ModelOptDFlashRecipe from data.offline_data_path; not user-configurable."
         ),
     )
 
@@ -103,10 +101,12 @@ class DFlashConfig(ModeloptBaseConfig):
         default=True, description="Whether to report eval accuracy."
     )
 
-    dflash_mask_token_id: int = ModeloptField(
+    dflash_mask_token_id: int | None = ModeloptField(
         default=None,
-        description="Token ID used for masked (unknown) positions. "
-        "Set explicitly or auto-detected from tokenizer.mask_token_id in main.py.",
+        description=(
+            "Token ID used for masked (unknown) positions. Set explicitly in the recipe YAML, "
+            "or left unset to fall back to ``tokenizer.mask_token_id`` at training time."
+        ),
     )
 
     dflash_architecture_config: dict = ModeloptField(
@@ -118,43 +118,6 @@ class DFlashConfig(ModeloptBaseConfig):
         description="Whether to use torch.compile on DFlash forward/loss methods.",
     )
 
-    @model_validator(mode="before")
-    @classmethod
-    def _derive_dflash_offline(cls, data: Any, info: ValidationInfo) -> Any:
-        """Derive ``dflash_offline`` from ``data_args.offline_data_path``.
-
-        This field is auto-derived, not user-configurable: when context provides
-        ``data_args``, the derived value overrides any user-supplied value.
-        """
-        ctx = info.context if info.context else {}
-        data_args = ctx.get("data_args")
-        if data_args is not None and isinstance(data, dict):
-            data["dflash_offline"] = getattr(data_args, "offline_data_path", None) is not None
-        return data
-
-    @model_validator(mode="before")
-    @classmethod
-    def _resolve_mask_token_id(cls, data: Any, info: ValidationInfo) -> Any:
-        """Auto-detect ``dflash_mask_token_id`` from tokenizer when provided in context."""
-        if not isinstance(data, dict) or data.get("dflash_mask_token_id") is not None:
-            return data
-        ctx = info.context if info.context else {}
-        tokenizer = ctx.get("tokenizer")
-        if tokenizer is not None and getattr(tokenizer, "mask_token_id", None) is not None:
-            data["dflash_mask_token_id"] = tokenizer.mask_token_id
-        return data
-
-    @model_validator(mode="after")
-    def _check_mask_token_id(self) -> "DFlashConfig":
-        """Validate that mask_token_id is set after all resolution attempts."""
-        if self.dflash_mask_token_id is None:
-            raise ValueError(
-                "dflash_mask_token_id is required. Set it in the config YAML "
-                "(dflash.dflash_mask_token_id=TOKEN_ID) or ensure the tokenizer "
-                "has a mask_token_id attribute."
-            )
-        return self
-
 
 class MedusaConfig(ModeloptBaseConfig):
     """Medusa config."""
@@ -174,7 +137,11 @@ class EagleConfig(ModeloptBaseConfig):
     """Eagle config."""
 
     eagle_offline: bool = ModeloptField(
-        default=False, description=("Whether to use detached Eagle.")
+        default=False,
+        description=(
+            "Whether to use detached Eagle. Derived by ModelOptEagleRecipe from "
+            "data.offline_data_path; not user-configurable."
+        ),
     )
 
     eagle_hidden_state_distillation: bool = ModeloptField(
@@ -292,16 +259,6 @@ class EagleConfig(ModeloptBaseConfig):
         ),
     )
 
-    @model_validator(mode="before")
-    @classmethod
-    def _derive_eagle_offline(cls, data: Any, info: ValidationInfo) -> Any:
-        """Derive ``eagle_offline`` from ``data_args.offline_data_path`` when provided in context."""
-        ctx = info.context if info.context else {}
-        data_args = ctx.get("data_args")
-        if data_args is not None and isinstance(data, dict):
-            data["eagle_offline"] = data_args.offline_data_path is not None
-        return data
-
     @model_validator(mode="after")
     def _check_rope_scaling_consistency(self) -> "EagleConfig":
         if not self.eagle_export_rope_scaling:
@@ -315,18 +272,3 @@ def _check_rope_scaling_consistency(self) -> "EagleConfig":
                 f"training rope_type is 'default' (no scaling)."
             )
         return self
-
-    @model_validator(mode="after")
-    def _warn_rope_vs_training_seq_len(self, info: ValidationInfo) -> "EagleConfig":
-        ctx = info.context if info.context else {}
-        training_args = ctx.get("training_args")
-        if training_args is None:
-            return self
-        orig_max_pos = self.eagle_export_rope_scaling.get("original_max_position_embeddings")
-        if orig_max_pos is not None and orig_max_pos != training_args.training_seq_len:
-            warnings.warn(
-                f"eagle_export_rope_scaling.original_max_position_embeddings ({orig_max_pos}) "
-                f"differs from training_seq_len ({training_args.training_seq_len}). "
-                f"This may affect long-context inference quality."
-            )
-        return self