From 6ab53db634a74bf6c7addbc1da48d6d5a78ea974 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Thu, 5 Mar 2026 11:14:25 -0800
Subject: [PATCH 1/4] fix: https://github.com/NVIDIA/Model-Optimizer/issues/981

Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 modelopt/torch/opt/plugins/mcore_dist_checkpointing.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
index 3e5b359468..8a2f38a68d 100644
--- a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
+++ b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
@@ -149,6 +149,12 @@ def _parse_transformer_config(transformer_config: dict) -> dict:
                 config[k] = v
             else:
                 config[k] = str(v)
+                # Handle https://github.com/NVIDIA/Model-Optimizer/issues/981 where
+                # hierarchical_context_parallel_sizes: [8, 2] will raise a TypeError.
+                try:
+                    config[k] = str(v)
+                except (AttributeError, TypeError):
+                    config[k] = repr(type(v))
 
         return config
 

From 014c1a007f01729c2269ce6e4383fcb0dcfe1abc Mon Sep 17 00:00:00 2001
From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
Date: Fri, 6 Mar 2026 00:52:45 +0530
Subject: [PATCH 2/4] Update
 modelopt/torch/opt/plugins/mcore_dist_checkpointing.py

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
---
 modelopt/torch/opt/plugins/mcore_dist_checkpointing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
index 8a2f38a68d..10c04b43d8 100644
--- a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
+++ b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
@@ -148,7 +148,6 @@ def _parse_transformer_config(transformer_config: dict) -> dict:
             if isinstance(v, (bool, int, str)):
                 config[k] = v
             else:
-                config[k] = str(v)
                 # Handle https://github.com/NVIDIA/Model-Optimizer/issues/981 where
                 # hierarchical_context_parallel_sizes: [8, 2] will raise a TypeError.
                 try:

From 6e567e60cb5b385d5ee94faab03ef8a7abb19706 Mon Sep 17 00:00:00 2001
From: Chenhan Yu <chenhany@nvidia.com>
Date: Thu, 5 Mar 2026 14:05:25 -0800
Subject: [PATCH 3/4] chg: taking feedback to omit the field

Signed-off-by: Chenhan Yu <chenhany@nvidia.com>
---
 modelopt/torch/opt/plugins/mcore_dist_checkpointing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
index 10c04b43d8..a17b55f773 100644
--- a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
+++ b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
@@ -153,7 +153,7 @@ def _parse_transformer_config(transformer_config: dict) -> dict:
                 try:
                     config[k] = str(v)
                 except (AttributeError, TypeError):
-                    config[k] = repr(type(v))
+                    print("Warning: TransformerConfig.{} does not have _repr_ implemented.")
 
         return config
 

From 68f6c204dd39de9acfb24a1256a7e559e398c083 Mon Sep 17 00:00:00 2001
From: Asha Anoosheh <aanoosheh@nvidia.com>
Date: Tue, 17 Mar 2026 17:11:43 -0700
Subject: [PATCH 4/4] Remove extra config parsing/saving altogether

Signed-off-by: Asha Anoosheh <aanoosheh@nvidia.com>
---
 .../opt/plugins/mcore_dist_checkpointing.py   | 44 -------------------
 1 file changed, 44 deletions(-)

diff --git a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
index a17b55f773..16ace511fe 100644
--- a/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
+++ b/modelopt/torch/opt/plugins/mcore_dist_checkpointing.py
@@ -22,7 +22,6 @@
 from typing import Any
 
 import torch
-import yaml
 from megatron.core import dist_checkpointing, mpu
 from megatron.core.dist_checkpointing.serialization import get_default_load_sharded_strategy
 from megatron.core.dist_checkpointing.strategies.common import COMMON_STATE_FNAME
@@ -36,21 +35,6 @@
 
 SUPPORTED_WRAPPERS[Float16Module] = "module"
 
-DROP_SUBSTRINGS = [
-    "fp4",
-    "fp8",
-    "tp_",
-    "parallel",
-    "cuda_graph",
-    "init_",
-    "cpu",
-    "recompute",
-    "inference",
-    "pipeline",
-    "comm",
-    "batch",
-]
-
 
 def remove_per_module_state(
     modelopt_state: dict[str, Any],
@@ -138,34 +122,6 @@ def save_sharded_modelopt_state(
         sharded_strategy: configures sharded tensors saving behavior and backend
         prefix: the prefix to add to the modelopt_state keys ("model." for NeMo)
     """
-
-    def _parse_transformer_config(transformer_config: dict) -> dict:
-        config = {}
-
-        for k, v in transformer_config.items():
-            if any(substring in k for substring in DROP_SUBSTRINGS):
-                continue
-            if isinstance(v, (bool, int, str)):
-                config[k] = v
-            else:
-                # Handle https://github.com/NVIDIA/Model-Optimizer/issues/981 where
-                # hierarchical_context_parallel_sizes: [8, 2] will raise a TypeError.
-                try:
-                    config[k] = str(v)
-                except (AttributeError, TypeError):
-                    print("Warning: TransformerConfig.{} does not have _repr_ implemented.")
-
-        return config
-
-    # Save own version of run config, if not already saved by the framework.
-    if dist.is_master() and not os.path.exists(f"{checkpoint_name}/run_config.yaml"):
-        run_config_name = f"{checkpoint_name}/modelopt_run_config.yaml"
-        # We avoid deepcopy since some attributes in Megatron-Bridge config cannot be deepcopied.
-        config_dict = _parse_transformer_config(model[0].config.__dict__)
-        config_dict["nvidia_modelopt_version"] = modelopt.__version__
-        with open(run_config_name, "w") as f:
-            yaml.dump(config_dict, f, default_flow_style=False)
-
     if not mto.ModeloptStateManager.is_converted(model[0]):
         return
     if len(model) > 1: