NVIDIA · rdspring1 · Feb 3, 2026 · Feb 3, 2026 · Feb 4, 2026
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -18,7 +18,6 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(NVFUSER_ROOT ${PROJECT_SOURCE_DIR})
 set(NVFUSER_SRCS_DIR "${NVFUSER_ROOT}/csrc")
 set(NVFUSER_PYTHON_DIR "${NVFUSER_ROOT}/python")
-set(NVFUSER_PYTHON_BINDINGS "${NVFUSER_ROOT}/python/python_frontend")
 set(NVFUSER_PYTHON_COMMON "${NVFUSER_ROOT}/python/python_common")
 set(NVFUSER_PYTHON_DIRECT_BINDINGS "${NVFUSER_ROOT}/python/python_direct")
 set(NVFUSER_CUTLASS "${NVFUSER_ROOT}/cutlass")
@@ -381,21 +380,6 @@ if(NOT MSVC)
   )
 endif()
 
-if(BUILD_PYTHON)
-  list(APPEND NVFUSER_SRCS
-    ${NVFUSER_PYTHON_BINDINGS}/fusion_cache.cpp
-    ${NVFUSER_PYTHON_BINDINGS}/fusion_definition.cpp
-    ${NVFUSER_PYTHON_BINDINGS}/fusion_state.cpp
-    ${NVFUSER_PYTHON_BINDINGS}/segmentation.cpp
-    ${NVFUSER_PYTHON_BINDINGS}/translation.cpp
-    ${NVFUSER_PYTHON_BINDINGS}/translation_utils.cpp
-    ${NVFUSER_SRCS_DIR}/serde/fusion_record.cpp
-    ${NVFUSER_PYTHON_COMMON}/distributed_tensor.cpp
-    ${NVFUSER_PYTHON_COMMON}/python_utils.cpp
-    ${NVFUSER_PYTHON_COMMON}/translation_names.cpp
-  )
-endif()
-
 # We create both static and shared libraries.
 #
 # Shared libraries are what ships, but a large advantage of static libraries is
@@ -606,128 +590,6 @@ install(DIRECTORY "${NVFUSER_ROOT}/lib/dynamic_type/src/dynamic_type"
   DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nvfuser")
 
 if(BUILD_PYTHON)
-  # -----------------------------
-  # build nvfuser python library
-  # -----------------------------
-  # nvfuser python API sources
-  set(NVFUSER_PYTHON_SRCS)
-  list(APPEND NVFUSER_PYTHON_SRCS
-    ${NVFUSER_PYTHON_BINDINGS}/multidevice_bindings.cpp
-    ${NVFUSER_PYTHON_BINDINGS}/python_bindings.cpp
-    ${NVFUSER_PYTHON_BINDINGS}/python_bindings_extension.cpp
-    ${NVFUSER_PYTHON_BINDINGS}/schedule_bindings.cpp
-  )
-
-  add_library(nvf_py_internal OBJECT ${NVFUSER_PYTHON_SRCS})
-  target_include_directories(nvf_py_internal PUBLIC ${NVFUSER_PYTHON_DIR})
-  target_include_directories(nvf_py_internal PUBLIC ${NVFUSER_PYTHON_COMMON})
-  target_include_directories(nvf_py_internal PUBLIC ${NVFUSER_CUTLASS})
-  target_include_directories(nvf_py_internal SYSTEM INTERFACE
-    ${CMAKE_SOURCE_DIR}/third_party/flatbuffers/include
-  )
-
-  # setup python API version
-  add_custom_command(
-    OUTPUT ${NVFUSER_PYTHON_DIR}/nvfuser/version.py
-    COMMAND
-    "${Python_EXECUTABLE}" -c \"from pathlib import Path\; Path('${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py') .touch() \"
-    COMMAND
-    "${Python_EXECUTABLE}" ${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py nvfuser
-    DEPENDS ${NVFUSER_PYTHON_DIR}/tools/gen_nvfuser_version.py
-    DEPENDS ${NVFUSER_PYTHON_DIR}/version.txt
-    WORKING_DIRECTORY ${NVFUSER_PYTHON_DIR}/tools/
-  )
-  add_custom_target(
-    gen_nvfuser_version ALL
-    DEPENDS ${NVFUSER_PYTHON_DIR}/nvfuser/version.py
-  )
-  add_dependencies(nvf_py_internal gen_nvfuser_version)
-
-  target_compile_definitions(nvf_py_internal PRIVATE
-    "-DTORCH_CUDA_BUILD_MAIN_LIB"
-    "-DC10_BUILD_MAIN_LIB=1"
-    EXTENSION_NAME=_C
-  )
-
-  add_library(nvfuser MODULE $<TARGET_OBJECTS:nvf_py_internal>)
-  target_compile_definitions(nvfuser PRIVATE
-    "-DTORCH_CUDA_BUILD_MAIN_LIB"
-    "-DC10_BUILD_MAIN_LIB=1"
-    EXTENSION_NAME=_C
-  )
-
-  if(NOT MSVC)
-    target_compile_options(nvf_py_internal PRIVATE -Wall -Wno-unused-function)
-    target_compile_options(nvf_py_internal PRIVATE -Werror)
-
-    # Add function/data sections for dead code elimination
-    target_compile_options(nvf_py_internal PRIVATE
-      "-ffunction-sections"
-      "-fdata-sections"
-    )
-
-    set(NVF_LIB_SUFFIX ".so")
-  else()
-    set(NVF_LIB_SUFFIX ".pyd")
-  endif()
-
-  set_target_properties(nvfuser PROPERTIES
-    C_STANDARD ${NVFUSER_C_STANDARD}
-    CUDA_STANDARD ${NVFUSER_CUDA_STANDARD}
-    CXX_STANDARD ${NVFUSER_CPP_STANDARD}
-    CXX_STANDARD_REQUIRED ON
-    CXX_VISIBILITY_PRESET hidden
-    INSTALL_RPATH
-    "$ORIGIN/lib:$ORIGIN/../nvfuser_common/lib:$ORIGIN/../nvidia/cuda_runtime/lib:$ORIGIN/../nvidia/cuda_nvrtc/lib:$ORIGIN/../../nvidia/cuda_cupti/lib:$ORIGIN/../torch/lib"
-    POSITION_INDEPENDENT_CODE Yes
-    SUFFIX ${NVF_LIB_SUFFIX}
-    VISIBILITY_INLINES_HIDDEN Yes
-  )
-  set_target_properties(nvf_py_internal PROPERTIES
-    C_STANDARD ${NVFUSER_C_STANDARD}
-    CUDA_STANDARD ${NVFUSER_CUDA_STANDARD}
-    CXX_STANDARD ${NVFUSER_CPP_STANDARD}
-    CXX_STANDARD_REQUIRED ON
-    CXX_VISIBILITY_PRESET hidden
-    INSTALL_RPATH
-    "$ORIGIN/lib:$ORIGIN/../nvidia/cuda_runtime/lib:$ORIGIN/../nvidia/cuda_nvrtc/lib:$ORIGIN/../../nvidia/cuda_cupti/lib:$ORIGIN/../torch/lib"
-    POSITION_INDEPENDENT_CODE Yes
-    VISIBILITY_INLINES_HIDDEN Yes
-  )
-
-  if (NVFUSER_USE_CUTLASS)
-    target_link_libraries(nvf_py_internal PRIVATE nvf_cutlass)
-  endif()
-
-  if (NOT MSVC)
-    target_link_libraries(nvf_py_internal PRIVATE CUDA::cupti)
-  endif()
-
-  target_link_libraries(nvf_py_internal PRIVATE
-    nvfuser_codegen
-    "${TORCH_INSTALL_PREFIX}/lib/libtorch_python.so"
-    pybind11::pybind11 pybind11::headers
-  )
-
-  target_link_libraries(nvfuser PRIVATE
-    nvf_py_internal
-    Python::Module
-  )
-
-  # Add dead code elimination flags to reduce file size
-  if(NOT MSVC)
-    target_link_options(nvfuser PRIVATE
-      "-Wl,--gc-sections"
-      "-Wl,--as-needed"
-      $<$<CONFIG:Release>:-s>
-    )
-  endif()
-
-  set_target_properties(nvfuser PROPERTIES
-    INSTALL_RPATH "$ORIGIN:$ORIGIN/lib:$ORIGIN/../build:$ORIGIN/../nvfuser_common/lib"
-  )
-  install(TARGETS nvfuser DESTINATION lib)
-
   # ------------------------------------------------
   # build nvfuser direct python library
   # ------------------------------------------------
@@ -750,6 +612,9 @@ if(BUILD_PYTHON)
     ${NVFUSER_PYTHON_DIRECT_BINDINGS}/profile.cpp
     ${NVFUSER_PYTHON_DIRECT_BINDINGS}/direct_utils.cpp
     ${NVFUSER_PYTHON_DIRECT_BINDINGS}/python_translate.cpp
+    ${NVFUSER_PYTHON_COMMON}/distributed_tensor.cpp
+    ${NVFUSER_PYTHON_COMMON}/python_utils.cpp
+    ${NVFUSER_PYTHON_COMMON}/translation_names.cpp
   )
   add_library(nvf_py_direct_internal OBJECT ${NVFUSER_PYTHON_DIRECT_SRCS})
 
@@ -1435,9 +1300,6 @@ target_include_directories(codegen_internal PRIVATE "${CMAKE_BINARY_DIR}/include
 install(EXPORT NvfuserTargets FILE NvfuserConfig.cmake DESTINATION share/cmake/nvfuser)
 
 file(CREATE_LINK "${CMAKE_BINARY_DIR}" "${NVFUSER_ROOT}/bin" SYMBOLIC)
-# These symbolic links help IDEs like Cursor resolve symbols in nvfuser and
-# nvfuser_direct.
-file(CREATE_LINK "${NVFUSER_ROOT}/python/nvfuser" "${NVFUSER_ROOT}/nvfuser" SYMBOLIC)
 file(CREATE_LINK "${NVFUSER_ROOT}/python/nvfuser_direct" "${NVFUSER_ROOT}/nvfuser_direct" SYMBOLIC)
 
 message(STATUS "******** Nvfuser configuration summary ********")

diff --git a/csrc/fusion_segmenter.cpp b/csrc/fusion_segmenter.cpp
@@ -4535,7 +4535,7 @@ bool SegmentCandidateFinder::privatizeUpCastOrSqueezeOp() {
       // More details of the issue regarding merging horizontal groups can be
       // found in issue 3829 -- https://github.com/NVIDIA/Fuser/issues/3829.
       // Even with a squeeze op with 2 uses, this test case:
-      // https://github.com/NVIDIA/Fuser/blob/70ab277c7d91bcc24cd50dd75cedd79863a24f96/tests/python/test_python_frontend.py#L3666C1-L3666C30
+      // https://github.com/NVIDIA/Fuser/blob/69da2d1972eb19bf7a04cef0c4debe9f55d8e11c/tests/python/direct/test_repro.py#L801
       // demonstrates that privatizing the squeeze op leads to horizontal groups
       // that can't be merged back.
       if (maybe_upcast_squeeze_out_tv->definition()->isA<SqueezeOp>() &&

diff --git a/csrc/multidevice/utils.h b/csrc/multidevice/utils.h
@@ -55,7 +55,8 @@ std::unordered_map<ParallelType, IterDomain*> mapDeviceAndStreamParallelTypeToId
 // `tv->getLogicalDomain()` map one-to-one modulo reduction. However, a size in
 // `at::Tensor::sizes` is a factor of the corresponding logical IterDomain's
 // extent if that IterDomain is sharded.
-int64_t getShardedLogicalAxis(const TensorView* tv, ParallelType parallel_type);
+NVF_API int64_t
+getShardedLogicalAxis(const TensorView* tv, ParallelType parallel_type);
 
 // Returns the IterDomain that's parallelized on `parallel_type` in the domain
 // of type `domain_type`.

diff --git a/csrc/options.cpp b/csrc/options.cpp
@@ -139,8 +139,6 @@ std::unordered_map<DebugDumpOption, std::vector<std::string>> Options<
       {"ptx", DebugDumpOption::Ptx},
       {"ptxas_verbose", DebugDumpOption::PrintPtxasLog},
       {"python_definition", DebugDumpOption::PythonDefinition},
-      {"python_definition_segments", DebugDumpOption::PythonDefinitionSegments},
-      {"python_frontend_debug", DebugDumpOption::PythonFrontendDebug},
       {"sass", DebugDumpOption::Sass},
       {"sass_to_file", DebugDumpOption::SassToFile},
       {"segmented_fusion", DebugDumpOption::FusionSegments},

diff --git a/csrc/options.h b/csrc/options.h
@@ -72,8 +72,6 @@ enum class DebugDumpOption {
   PreSegmenterLogging,
   HostIrLowering, //! Dump the Host IR after each lowering pass
   PythonDefinition, //! Python Frontend Fusion Definition.
-  PythonDefinitionSegments, //! Python Frontend Fusion Definition of segments.
-  PythonFrontendDebug, //! Python Frontend debug information.
   TransformPropagator, //! When running TransformPropagator, print propagation
                        //! path and replay result
   Cubin, //! Dump compiled CUBIN

diff --git a/csrc/runtime/fusion_kernel_runtime.cpp b/csrc/runtime/fusion_kernel_runtime.cpp
@@ -17,8 +17,6 @@
 #include "instrumentation.h"
 #include "ir/base_nodes.h"
 #include "preseg_passes/pre_segmenter.h"
-#include "python_frontend/fusion_definition.h"
-#include "python_frontend/translation.h"
 #include "runtime/executor.h"
 #include "runtime/executor_dispatch.h"
 #include "runtime/fusion_cache_utils.h"
@@ -430,16 +428,6 @@ void FusionKernelRuntime::compileFusionParallel(KernelArgumentHolder args) {
     FusionProfiler::startCompile();
   }
 
-  if (isDebugDumpEnabled(DebugDumpOption::PythonDefinitionSegments)) {
-    for (SegmentedGroup* group_to_run : runtime_workspace_.group_run_order) {
-      debug() << "Python definition for segmented group "
-              << group_to_run->groupId() << ":" << std::endl;
-      python_frontend::FusionDefinition fd(/*id=*/std::nullopt);
-      python_frontend::translate(group_to_run->getFusion(), &fd);
-      fd.print(debug());
-    }
-  }
-
   const std::vector<KernelArgumentHolder> all_runtime_inputs =
       prepareInputs(args);
 

diff --git a/csrc/serde/Serde.md b/csrc/serde/Serde.md
@@ -6,6 +6,8 @@
 
 # NvFuser Serialization
 
+## Serialization is disabled because legacy bindings are removed.
+
 Serde is an acronym of serialization and deserialization.
 
 # Overview
@@ -66,7 +68,7 @@ References:
 
 # Serde Testing
 
-In test_python_frontend.py, the `exec_nvfuser` function is decorated with the `serde_check` functions. Every unit test should automatically test serialization.
+The `exec_nvfuser` function is decorated with the `serde_check` functions. Every unit test should automatically test serialization.
 
 ```python
 def serde_check(test_fn: Callable):