rapidsai · mdboom · Apr 28, 2026
@@ -111,7 +111,7 @@ The doctor command discovers and runs checks via Python entry points defined in
 ### Key Dependencies
 
 - `rich` and `rich-click` for terminal output and CLI interface
-- `pynvml` (nvidia-ml-py) for GPU information
+- `cuda-core` for GPU information
 - `cuda-pathfinder` for locating CUDA installations
 - `psutil` for system memory checks
 

@@ -32,12 +32,14 @@ requirements:
     - python
     - importlib-metadata >=4.13.0
     - cuda-pathfinder >=1.2.3
-    - nvidia-ml-py >=12.0
     - packaging
     - psutil
     - pyyaml
     - rich
     - rich-click
+    - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+    # TODO: Change to cuda-core >= 1.0.0 once that's released
+    - cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
 
 tests:
   - script:

@@ -61,8 +61,9 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - cuda-core >=0.6.0
-          - nvidia-ml-py>=12.0
+          - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+          # TODO: Change to cuda-core >= 1.0.0 once that's released
+          - cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
           - cuda-pathfinder >=1.2.3
           - packaging
           - psutil

@@ -10,7 +10,7 @@ for troubleshooting RAPIDS installations.
 :func:`~rapids_cli.debug.debug.run_debug` is the main entry point. It collects:
 
 - Platform and OS details (from ``platform`` and ``/etc/os-release``)
-- NVIDIA driver and CUDA versions (via ``pynvml``)
+- NVIDIA driver and CUDA versions (via ``cuda.core.system``)
 - CUDA runtime path (via ``cuda-pathfinder``)
 - System CUDA toolkit locations (globbing ``/usr/local/cuda*``)
 - Python version and hash info

@@ -95,15 +95,13 @@ GPU memory requirement check:
 
 .. code-block:: python
 
-   import pynvml
+   from cuda.core import system
 
 
    def gpu_memory_check(verbose=False, **kwargs):
        """Check that GPU has at least 8GB memory."""
-       pynvml.nvmlInit()
-       handle = pynvml.nvmlDeviceGetHandleByIndex(0)
-       mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
-       available_gb = mem.total / (1024**3)
+       device = system.Device(index=0)
+       available_gb = device.memory_info.total / (1024**3)
 
        if available_gb < 8:
            raise ValueError(

@@ -19,7 +19,7 @@ No GPUs Detected
 
    .. code-block:: bash
 
-      python -c "import pynvml; pynvml.nvmlInit(); print(pynvml.nvmlDeviceGetCount())"
+      python -c "from cuda.core import system; system.Device.get_device_count()"
 
 3. If running in a container, ensure GPU passthrough is enabled:
 

@@ -7,10 +7,10 @@ license-files = ["LICENSE"]
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
-    "cuda-core >=0.6.0",
+    "cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*",
+    "cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core",
     "cuda-pathfinder >=1.2.3",
     "importlib-metadata >= 4.13.0; python_version < '3.12'",
-    "nvidia-ml-py>=12.0",
     "packaging",
     "psutil",
     "pyyaml",
@@ -49,6 +49,10 @@ version-file = "rapids_cli/_version.py"
 [tool.hatch.version]
 source = "vcs"
 
+[tool.hatch.metadata]
+# TODO: Remove me when cuda-core 1.0 is released
+allow-direct-references = true
+
 [tool.black]
 # this should match the oldest version of Python the library supports
 target-version = ["py310"]

@@ -11,7 +11,7 @@
 from pathlib import Path
 
 import cuda.pathfinder
-import pynvml
+from cuda.core import system
 from rich.console import Console
 from rich.table import Table
 
@@ -20,11 +20,7 @@
 
 def gather_cuda_version():
     """Return CUDA driver version as a string, similar to nvidia-smi output."""
-    version = pynvml.nvmlSystemGetCudaDriverVersion()
-    # pynvml returns an int like 12040 for 12.4, so format as string
-    major = version // 1000
-    minor = (version % 1000) // 10
-    patch = version % 10
+    major, minor, patch = system.get_driver_version_full()
     if patch == 0:
         return f"{major}.{minor}"
     else:
@@ -69,14 +65,13 @@ def gather_tools():
 
 def run_debug(output_format="console"):
     """Run debug."""
-    pynvml.nvmlInit()
     debug_info = {
         "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
         "platform": platform.platform(),
         "nvidia_smi_output": gather_command_output(
             ["nvidia-smi"], "Nvidia-smi not installed"
         ),
-        "driver_version": pynvml.nvmlSystemGetDriverVersion(),
+        "driver_version": ".".join(str(x) for x in system.get_driver_version_full(kernel_mode=True)),
         "cuda_version": gather_cuda_version(),
         "cuda_runtime_path": cuda.pathfinder.find_nvidia_header_directory("cudart"),
         "system_ctk": sorted(

@@ -2,17 +2,14 @@
 # SPDX-License-Identifier: Apache-2.0
 """Check for CUDA and driver compatibility."""
 
-import pynvml
+from cuda.core import system
 
 
 def cuda_check(verbose=False):
     """Check CUDA availability."""
+
     try:
-        pynvml.nvmlInit()
-        try:
-            cuda_version = pynvml.nvmlSystemGetCudaDriverVersion()
-            return cuda_version
-        except pynvml.NVMLError as e:
-            raise ValueError("Unable to look up CUDA version") from e
-    except pynvml.NVMLError as e:
+        cuda_version = system.get_driver_version_full(kernel_mode=True)
+        return cuda_version[0] * 1000 + cuda_version[1] * 10 + cuda_version[2]
+    except system.NvmlError as e:
         raise ValueError("Unable to look up CUDA version") from e
@@ -2,17 +2,16 @@
 # SPDX-License-Identifier: Apache-2.0
 """GPU checks for the doctor command."""
 
-import pynvml
+from cuda.core import system
 
 REQUIRED_COMPUTE_CAPABILITY = 7
 
 
 def gpu_check(verbose=False):
     """Check GPU availability."""
     try:
-        pynvml.nvmlInit()
-        num_gpus = pynvml.nvmlDeviceGetCount()
-    except pynvml.NVMLError as e:
+        num_gpus = system.Device.get_device_count()
+    except system.NvmlError as e:
         raise ValueError("No available GPUs detected") from e
     assert num_gpus > 0, "No GPUs detected"
     return f"GPU(s) detected: {num_gpus}"
@@ -21,13 +20,14 @@ def gpu_check(verbose=False):
 def check_gpu_compute_capability(verbose):
     """Check the system for GPU Compute Capability."""
     try:
-        pynvml.nvmlInit()
-    except pynvml.NVMLError as e:
+        num_gpus = system.Device.get_device_count()
+        if num_gpus == 0:
+            raise system.NvmlError(1)
+    except system.NvmlError as e:
         raise ValueError("No GPU - cannot determine GPU Compute Capability") from e
 
-    for i in range(pynvml.nvmlDeviceGetCount()):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-        major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+    for i, device in enumerate(system.Device.get_all_devices()):
+        major, minor = device.cuda_compute_capability
         if major >= REQUIRED_COMPUTE_CAPABILITY:
             continue
         else:

@@ -5,7 +5,8 @@
 import warnings
 
 import psutil
-import pynvml
+
+from cuda.core import system
 
 
 def get_system_memory(verbose=False):
@@ -17,15 +18,11 @@ def get_system_memory(verbose=False):
 
 def get_gpu_memory(verbose=False):
     """Get the total GPU memory."""
-    pynvml.nvmlInit()
-    gpus = pynvml.nvmlDeviceGetCount()
+
     gpu_memory_total = 0
-    for i in range(gpus):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-        memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
-        gpu_memory_total += memory_info.total / (1024**3)  # converts to gigabytes
+    for device in system.Device.get_all_devices():
+        gpu_memory_total += device.memory_info.total / (1024**3)  # converts to gigabytes
 
-    pynvml.nvmlShutdown()
     return gpu_memory_total
 
 
@@ -36,9 +33,10 @@ def check_memory_to_gpu_ratio(verbose=True):
 
     """
     try:
-        pynvml.nvmlInit()
-    except pynvml.NVMLError as e:
-        raise ValueError("GPU not found. Please ensure GPUs are installed.") from e
+        if system.Device.get_device_count() == 0:
+            raise system.NvmlError(1)
+    except system.NvmlError:
+        raise ValueError("GPU not found. Please ensure GPUs are installed.")
 
     system_memory = get_system_memory(verbose)
     gpu_memory = get_gpu_memory(verbose)

@@ -2,18 +2,19 @@
 # SPDX-License-Identifier: Apache-2.0
 """Check for NVLink status."""
 
-import pynvml
+from cuda.core import system
+from cuda.bindings import nvml
 
 
 def check_nvlink_status(verbose=True, **kwargs):
     """Check NVLink status across all GPUs."""
     try:
-        pynvml.nvmlInit()
-    except pynvml.NVMLError as e:
+        device_count = system.Device.get_device_count()
+        if device_count == 0:
+            raise system.NvmlError(1)
+    except system.NvmlError as e:
         raise ValueError("GPU not found. Please ensure GPUs are installed.") from e
 
-    device_count = pynvml.nvmlDeviceGetCount()
-
     # NVLink requires at least 2 GPUs to be meaningful. A single GPU has nothing
     # to link to, so there is nothing to check.
     if device_count < 2:
@@ -25,24 +26,22 @@ def check_nvlink_status(verbose=True, **kwargs):
 
     failed_links: list[tuple[int, int]] = []
 
-    for gpu_idx in range(device_count):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_idx)
+    for gpu_idx, device in enumerate(system.Device.get_all_devices()):
         # NVML provides no API to query the number of NVLink slots on a device
         # (e.g. V100=6, A100=12, H100=18). The only way to discover the real count
         # is to iterate up to NVML_NVLINK_MAX_LINKS and stop when the driver signals
         # that link_id is out of range via NVMLError_InvalidArgument.
-        for link_id in range(pynvml.NVML_NVLINK_MAX_LINKS):
+        for link_id in range(nvml.NVLINK_MAX_LINKS):
             try:
                 # nvmlDeviceGetNvLinkState(device, link) returns NVML_FEATURE_ENABLED
                 # if the link is active, or NVML_FEATURE_DISABLED if it is not.
-                state = pynvml.nvmlDeviceGetNvLinkState(handle, link_id)
-                if state == pynvml.NVML_FEATURE_DISABLED:
+                if not device.get_nvlink(link_id).state:
                     failed_links.append((gpu_idx, link_id))
-            except pynvml.NVMLError_NotSupported:
+            except system.NotSupportedError:
                 # The driver reports NVLink is not supported on this system.
                 # There is nothing to check — skip like the single-GPU case above.
                 return False
-            except pynvml.NVMLError_InvalidArgument:
+            except system.InvalidArgumentError:
                 # link_id exceeds the number of NVLink slots on this device.
                 # Stop iterating links for this GPU.
                 break

@@ -2,32 +2,34 @@
 # SPDX-License-Identifier: Apache-2.0
 from unittest.mock import patch
 
-import pynvml
 import pytest
 
 from rapids_cli.doctor.checks.cuda_driver import cuda_check
 
 
 def test_cuda_check_success():
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
+        patch("cuda.core.system.get_driver_version_full", return_value=(12, 5, 0)),
     ):
         assert cuda_check(verbose=True) == 12050
 
 
 def test_cuda_check_init_fails():
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
+    from cuda.bindings import nvml  
+
+    with patch("cuda.bindings.nvml.init_v2", side_effect=nvml.NvmlError(1)):
         with pytest.raises(ValueError, match="Unable to look up CUDA version"):
             cuda_check()
 
 
 def test_cuda_check_version_query_fails():
+    from cuda.bindings import nvml
+
     with (
-        patch("pynvml.nvmlInit"),
+        patch("cuda.bindings.nvml.init_v2"),
         patch(
-            "pynvml.nvmlSystemGetCudaDriverVersion",
-            side_effect=pynvml.NVMLError(1),
+            "cuda.bindings.nvml.system_get_cuda_driver_version",
+            side_effect=nvml.NvmlError(1),
         ),
     ):
         with pytest.raises(ValueError, match="Unable to look up CUDA version"):

@@ -14,14 +14,14 @@
 
 def test_gather_cuda_version():
     """Test CUDA version gathering."""
-    with patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040):
+    with patch("cuda.core.system.get_driver_version_full", return_value=(12, 4, 0)):
         result = gather_cuda_version()
         assert result == "12.4"
 
 
 def test_gather_cuda_version_with_patch():
     """Test CUDA version with patch number."""
-    with patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12345):
+    with patch("cuda.core.system.get_driver_version_full", return_value=(12, 34, 5)):
         result = gather_cuda_version()
         assert result == "12.34.5"
 
@@ -74,9 +74,9 @@ def test_run_debug_console(capsys):
     mock_vm.total = 32 * 1024**3
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54.15"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.system_get_driver_version", return_value="550.54.15"),
+        patch("cuda.bindings.nvml.system_get_cuda_driver_version", return_value=12040),
         patch(
             "cuda.pathfinder.find_nvidia_header_directory",
             return_value="/usr/local/cuda/include",
@@ -95,10 +95,11 @@ def test_run_debug_console(capsys):
 
 def test_run_debug_json(capsys):
     """Test run_debug with JSON output."""
+
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54.15"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.system_get_driver_version", return_value="550.54.15"),
+        patch("cuda.bindings.nvml.system_get_cuda_driver_version", return_value=12040),
         patch(
             "cuda.pathfinder.find_nvidia_header_directory",
             return_value="/usr/local/cuda/include",