From cbb6bfccca906b89c5cc251a2554a4ee1ce8f425 Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Mon, 4 May 2026 13:26:05 -0700 Subject: [PATCH 1/2] Enable Python 3.14 wheel support to unblock DLFW testing on Ubuntu 26.04 + Python 3.14 Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- .github/workflows/unit_tests.yml | 1 + CHANGELOG.rst | 3 ++- docs/source/getting_started/_installation_for_Linux.rst | 2 +- noxfile.py | 2 +- pyproject.toml | 8 ++++++-- 5 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 9832f0cc605..e0933babf66 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -99,6 +99,7 @@ jobs: - {nox_session: "unit-3.10(torch_211, tf_latest)", python_version: "3.10"} - {nox_session: "unit-3.11(torch_211, tf_latest)", python_version: "3.11"} - {nox_session: "unit-3.13(torch_211, tf_latest)", python_version: "3.13"} + - {nox_session: "unit-3.14(torch_211, tf_latest)", python_version: "3.14"} - {nox_session: "unit-3.12(torch_28, tf_latest)", python_version: "3.12"} - {nox_session: "unit-3.12(torch_29, tf_latest)", python_version: "3.12"} - {nox_session: "unit-3.12(torch_210, tf_latest)", python_version: "3.12"} diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a78a14bc1f6..759690f4537 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -21,7 +21,7 @@ Changelog - Add ``--cast_mxfp4_to_nvfp4`` flag to ``examples/llm_ptq/hf_ptq.py`` for closed-form, bit-exact MXFP4 → NVFP4 weight conversion. Supports the GPT-OSS family (``openai/gpt-oss-20b``, ``openai/gpt-oss-120b``). See `examples/llm_ptq/README.md `__ for usage. - DeepSeek PTQ (``examples/deepseek/ptq.py``) now defaults to native top-k calibration with post-hoc per-layer peer-max sync of expert ``input_quantizer.amax``; the all-experts path is preserved behind ``--calib_all_experts``. -0.44 (2026-05-xx) +0.44 (2026-05-18) ^^^^^^^^^^^^^^^^^ **New Features** @@ -60,6 +60,7 @@ Changelog - Bump minimum required PyTorch version to 2.8. - [Experimental] Add support for transformers>=5.0, including generic PTQ and unified HF checkpoint export for fused MoE expert modules (Mixtral, Qwen2-MoE, Qwen3-MoE, Qwen3.5-MoE, DeepSeek-V3, Jamba, OLMoE, etc.). - Improve ``megatron_preprocess_data``: add ``--reasoning_content`` support for Nemotron v3 datasets, eliminate intermediate JSONL for HuggingFace datasets, return output file prefixes from the Python API, add gzip input support (``.jsonl.gz``), add ``--strip_newlines`` flag for plain-text pretraining data, add ``--hf_streaming`` for very large datasets (only consumed rows downloaded), and auto-shuffle when ``--hf_max_samples_per_split`` is set to avoid biased sampling. +- Add installation support for Python 3.14. Only basic unit tests are verified for now. Production usage is still default to Python 3.12. Python 3.10 support will be dropped in the next release. 0.43 (2026-04-16) ^^^^^^^^^^^^^^^^^ diff --git a/docs/source/getting_started/_installation_for_Linux.rst b/docs/source/getting_started/_installation_for_Linux.rst index a18b45ee7c4..1c3f17fc0fa 100644 --- a/docs/source/getting_started/_installation_for_Linux.rst +++ b/docs/source/getting_started/_installation_for_Linux.rst @@ -12,7 +12,7 @@ Latest Model Optimizer (``nvidia-modelopt``) currently has the following system +-------------------------+-----------------------------+ | Architecture | x86_64, aarch64 (SBSA) | +-------------------------+-----------------------------+ -| Python | >=3.10,<3.14 | +| Python | >=3.10,<3.15 | +-------------------------+-----------------------------+ | CUDA | 12.x, 13.x | +-------------------------+-----------------------------+ diff --git a/noxfile.py b/noxfile.py index 96db23e1eee..918a7c17807 100644 --- a/noxfile.py +++ b/noxfile.py @@ -52,7 +52,7 @@ def _cov_args(): # ─── CPU unit tests ─────────────────────────────────────────────────────────── -@nox.session(python=["3.10", "3.11", "3.12", "3.13"]) +@nox.session(python=["3.10", "3.11", "3.12", "3.13", "3.14"]) @nox.parametrize("tf_ver", [nox.param(k, id=k) for k in TRANSFORMERS_VERSIONS]) @nox.parametrize("torch_ver", [nox.param(k, id=k) for k in TORCH_VERSIONS]) def unit(session, torch_ver, tf_ver): diff --git a/pyproject.toml b/pyproject.toml index bace52dff9c..8e53eaf09d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ description = "Nvidia Model Optimizer: a unified model optimization and deployme readme = { text = "Checkout https://github.com/nvidia/Model-Optimizer for more information.", content-type = "text/markdown" } license = "Apache-2.0" license-files = ["LICENSE_HEADER"] -requires-python = ">=3.10,<3.14" +requires-python = ">=3.10,<3.15" authors = [{ name = "NVIDIA Corporation" }] classifiers = [ "Programming Language :: Python :: 3", @@ -227,7 +227,11 @@ extend-ignore = [ "SIM", "UP", ] # TODO: Disabled for now, will enable later, once all puzzletron code is migrated -"modelopt/torch/kernels/quantization/gemm/*" = ["N803", "N806", "E731"] # triton style +"modelopt/torch/kernels/quantization/gemm/*" = [ + "N803", + "N806", + "E731", +] # triton style "modelopt/torch/kernels/sparsity/attention/*" = [ "N803", "N806", From e27ddd080846b0d407453bf59f7f44fd3eb498ce Mon Sep 17 00:00:00 2001 From: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> Date: Tue, 5 May 2026 02:04:24 +0530 Subject: [PATCH 2/2] Update CHANGELOG.rst Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com> --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 759690f4537..da2007f4c37 100755 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -60,7 +60,7 @@ Changelog - Bump minimum required PyTorch version to 2.8. - [Experimental] Add support for transformers>=5.0, including generic PTQ and unified HF checkpoint export for fused MoE expert modules (Mixtral, Qwen2-MoE, Qwen3-MoE, Qwen3.5-MoE, DeepSeek-V3, Jamba, OLMoE, etc.). - Improve ``megatron_preprocess_data``: add ``--reasoning_content`` support for Nemotron v3 datasets, eliminate intermediate JSONL for HuggingFace datasets, return output file prefixes from the Python API, add gzip input support (``.jsonl.gz``), add ``--strip_newlines`` flag for plain-text pretraining data, add ``--hf_streaming`` for very large datasets (only consumed rows downloaded), and auto-shuffle when ``--hf_max_samples_per_split`` is set to avoid biased sampling. -- Add installation support for Python 3.14. Only basic unit tests are verified for now. Production usage is still default to Python 3.12. Python 3.10 support will be dropped in the next release. +- Add installation support for Python 3.14. Only basic unit tests are verified for now. Production usage still defaults to Python 3.12. Python 3.10 support will be dropped in the next release. 0.43 (2026-04-16) ^^^^^^^^^^^^^^^^^