diff --git a/CLAUDE.md b/CLAUDE.md
index d73a716..5b794f0 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -111,7 +111,7 @@ The doctor command discovers and runs checks via Python entry points defined in
 ### Key Dependencies
 
 - `rich` and `rich-click` for terminal output and CLI interface
-- `pynvml` (nvidia-ml-py) for GPU information
+- `cuda-core` for GPU information
 - `cuda-pathfinder` for locating CUDA installations
 - `psutil` for system memory checks
 
diff --git a/conda/recipes/rapids-cli/recipe.yaml b/conda/recipes/rapids-cli/recipe.yaml
index 4e3ad4a..63ab677 100644
--- a/conda/recipes/rapids-cli/recipe.yaml
+++ b/conda/recipes/rapids-cli/recipe.yaml
@@ -32,12 +32,14 @@ requirements:
     - python
     - importlib-metadata >=4.13.0
     - cuda-pathfinder >=1.2.3
-    - nvidia-ml-py >=12.0
     - packaging
     - psutil
     - pyyaml
     - rich
     - rich-click
+    - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+    # TODO: Change to cuda-core >= 1.0.0 once that's released
+    - cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
 
 tests:
   - script:
diff --git a/dependencies.yaml b/dependencies.yaml
index d312739..0318431 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -61,8 +61,9 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - cuda-core >=0.6.0
-          - nvidia-ml-py>=12.0
+          - cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*
+          # TODO: Change to cuda-core >= 1.0.0 once that's released
+          - cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core
           - cuda-pathfinder >=1.2.3
           - packaging
           - psutil
diff --git a/docs/source/api/debug.rst b/docs/source/api/debug.rst
index aa4e84b..d892970 100644
--- a/docs/source/api/debug.rst
+++ b/docs/source/api/debug.rst
@@ -10,7 +10,7 @@ for troubleshooting RAPIDS installations.
 :func:`~rapids_cli.debug.debug.run_debug` is the main entry point. It collects:
 
 - Platform and OS details (from ``platform`` and ``/etc/os-release``)
-- NVIDIA driver and CUDA versions (via ``pynvml``)
+- NVIDIA driver and CUDA versions (via ``cuda.core.system``)
 - CUDA runtime path (via ``cuda-pathfinder``)
 - System CUDA toolkit locations (globbing ``/usr/local/cuda*``)
 - Python version and hash info
diff --git a/docs/source/plugin_development.rst b/docs/source/plugin_development.rst
index d5b5e45..6ecd9af 100644
--- a/docs/source/plugin_development.rst
+++ b/docs/source/plugin_development.rst
@@ -95,15 +95,13 @@ GPU memory requirement check:
 
 .. code-block:: python
 
-   import pynvml
+   from cuda.core import system
 
 
    def gpu_memory_check(verbose=False, **kwargs):
        """Check that GPU has at least 8GB memory."""
-       pynvml.nvmlInit()
-       handle = pynvml.nvmlDeviceGetHandleByIndex(0)
-       mem = pynvml.nvmlDeviceGetMemoryInfo(handle)
-       available_gb = mem.total / (1024**3)
+       device = system.Device(index=0)
+       available_gb = device.memory_info.total / (1024**3)
 
        if available_gb < 8:
            raise ValueError(
diff --git a/docs/source/troubleshooting.rst b/docs/source/troubleshooting.rst
index 5da7f2c..9c8a23b 100644
--- a/docs/source/troubleshooting.rst
+++ b/docs/source/troubleshooting.rst
@@ -19,7 +19,7 @@ No GPUs Detected
 
    .. code-block:: bash
 
-      python -c "import pynvml; pynvml.nvmlInit(); print(pynvml.nvmlDeviceGetCount())"
+      python -c "from cuda.core import system; system.Device.get_device_count()"
 
 3. If running in a container, ensure GPU passthrough is enabled:
 
diff --git a/pyproject.toml b/pyproject.toml
index 882cc68..29e58b2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,10 +7,10 @@ license-files = ["LICENSE"]
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
-    "cuda-core >=0.6.0",
+    "cuda-bindings>=12.9.6,!=13.0.*,!=13.1.*",
+    "cuda-core @ git+https://github.com/nvidia/cuda-python@main#subdirectory=cuda_core",
     "cuda-pathfinder >=1.2.3",
     "importlib-metadata >= 4.13.0; python_version < '3.12'",
-    "nvidia-ml-py>=12.0",
     "packaging",
     "psutil",
     "pyyaml",
@@ -49,6 +49,10 @@ version-file = "rapids_cli/_version.py"
 [tool.hatch.version]
 source = "vcs"
 
+[tool.hatch.metadata]
+# TODO: Remove me when cuda-core 1.0 is released
+allow-direct-references = true
+
 [tool.black]
 # this should match the oldest version of Python the library supports
 target-version = ["py310"]
diff --git a/rapids_cli/debug/debug.py b/rapids_cli/debug/debug.py
index fca4d1d..f3160d1 100644
--- a/rapids_cli/debug/debug.py
+++ b/rapids_cli/debug/debug.py
@@ -11,7 +11,7 @@
 from pathlib import Path
 
 import cuda.pathfinder
-import pynvml
+from cuda.core import system
 from rich.console import Console
 from rich.table import Table
 
@@ -20,11 +20,7 @@
 
 def gather_cuda_version():
     """Return CUDA driver version as a string, similar to nvidia-smi output."""
-    version = pynvml.nvmlSystemGetCudaDriverVersion()
-    # pynvml returns an int like 12040 for 12.4, so format as string
-    major = version // 1000
-    minor = (version % 1000) // 10
-    patch = version % 10
+    major, minor, patch = system.get_driver_version_full()
     if patch == 0:
         return f"{major}.{minor}"
     else:
@@ -69,14 +65,13 @@ def gather_tools():
 
 def run_debug(output_format="console"):
     """Run debug."""
-    pynvml.nvmlInit()
     debug_info = {
         "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
         "platform": platform.platform(),
         "nvidia_smi_output": gather_command_output(
             ["nvidia-smi"], "Nvidia-smi not installed"
         ),
-        "driver_version": pynvml.nvmlSystemGetDriverVersion(),
+        "driver_version": ".".join(str(x) for x in system.get_driver_version_full(kernel_mode=True)),
         "cuda_version": gather_cuda_version(),
         "cuda_runtime_path": cuda.pathfinder.find_nvidia_header_directory("cudart"),
         "system_ctk": sorted(
diff --git a/rapids_cli/doctor/checks/cuda_driver.py b/rapids_cli/doctor/checks/cuda_driver.py
index 252dd47..1aa3a41 100644
--- a/rapids_cli/doctor/checks/cuda_driver.py
+++ b/rapids_cli/doctor/checks/cuda_driver.py
@@ -2,17 +2,14 @@
 # SPDX-License-Identifier: Apache-2.0
 """Check for CUDA and driver compatibility."""
 
-import pynvml
+from cuda.core import system
 
 
 def cuda_check(verbose=False):
     """Check CUDA availability."""
+    
     try:
-        pynvml.nvmlInit()
-        try:
-            cuda_version = pynvml.nvmlSystemGetCudaDriverVersion()
-            return cuda_version
-        except pynvml.NVMLError as e:
-            raise ValueError("Unable to look up CUDA version") from e
-    except pynvml.NVMLError as e:
+        cuda_version = system.get_driver_version_full(kernel_mode=True)
+        return cuda_version[0] * 1000 + cuda_version[1] * 10 + cuda_version[2]
+    except system.NvmlError as e:
         raise ValueError("Unable to look up CUDA version") from e
diff --git a/rapids_cli/doctor/checks/gpu.py b/rapids_cli/doctor/checks/gpu.py
index 77e6ca6..cf77e9a 100644
--- a/rapids_cli/doctor/checks/gpu.py
+++ b/rapids_cli/doctor/checks/gpu.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 """GPU checks for the doctor command."""
 
-import pynvml
+from cuda.core import system
 
 REQUIRED_COMPUTE_CAPABILITY = 7
 
@@ -10,9 +10,8 @@
 def gpu_check(verbose=False):
     """Check GPU availability."""
     try:
-        pynvml.nvmlInit()
-        num_gpus = pynvml.nvmlDeviceGetCount()
-    except pynvml.NVMLError as e:
+        num_gpus = system.Device.get_device_count()
+    except system.NvmlError as e:
         raise ValueError("No available GPUs detected") from e
     assert num_gpus > 0, "No GPUs detected"
     return f"GPU(s) detected: {num_gpus}"
@@ -21,13 +20,14 @@ def gpu_check(verbose=False):
 def check_gpu_compute_capability(verbose):
     """Check the system for GPU Compute Capability."""
     try:
-        pynvml.nvmlInit()
-    except pynvml.NVMLError as e:
+        num_gpus = system.Device.get_device_count()
+        if num_gpus == 0:
+            raise system.NvmlError(1)
+    except system.NvmlError as e:
         raise ValueError("No GPU - cannot determine GPU Compute Capability") from e
 
-    for i in range(pynvml.nvmlDeviceGetCount()):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-        major, minor = pynvml.nvmlDeviceGetCudaComputeCapability(handle)
+    for i, device in enumerate(system.Device.get_all_devices()):
+        major, minor = device.cuda_compute_capability
         if major >= REQUIRED_COMPUTE_CAPABILITY:
             continue
         else:
diff --git a/rapids_cli/doctor/checks/memory.py b/rapids_cli/doctor/checks/memory.py
index cb1fcb5..138d764 100644
--- a/rapids_cli/doctor/checks/memory.py
+++ b/rapids_cli/doctor/checks/memory.py
@@ -5,7 +5,8 @@
 import warnings
 
 import psutil
-import pynvml
+
+from cuda.core import system
 
 
 def get_system_memory(verbose=False):
@@ -17,15 +18,11 @@ def get_system_memory(verbose=False):
 
 def get_gpu_memory(verbose=False):
     """Get the total GPU memory."""
-    pynvml.nvmlInit()
-    gpus = pynvml.nvmlDeviceGetCount()
+
     gpu_memory_total = 0
-    for i in range(gpus):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
-        memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
-        gpu_memory_total += memory_info.total / (1024**3)  # converts to gigabytes
+    for device in system.Device.get_all_devices():
+        gpu_memory_total += device.memory_info.total / (1024**3)  # converts to gigabytes
 
-    pynvml.nvmlShutdown()
     return gpu_memory_total
 
 
@@ -36,9 +33,10 @@ def check_memory_to_gpu_ratio(verbose=True):
 
     """
     try:
-        pynvml.nvmlInit()
-    except pynvml.NVMLError as e:
-        raise ValueError("GPU not found. Please ensure GPUs are installed.") from e
+        if system.Device.get_device_count() == 0:
+            raise system.NvmlError(1)
+    except system.NvmlError:
+        raise ValueError("GPU not found. Please ensure GPUs are installed.")
 
     system_memory = get_system_memory(verbose)
     gpu_memory = get_gpu_memory(verbose)
diff --git a/rapids_cli/doctor/checks/nvlink.py b/rapids_cli/doctor/checks/nvlink.py
index 6dd6c66..82debf6 100644
--- a/rapids_cli/doctor/checks/nvlink.py
+++ b/rapids_cli/doctor/checks/nvlink.py
@@ -2,18 +2,19 @@
 # SPDX-License-Identifier: Apache-2.0
 """Check for NVLink status."""
 
-import pynvml
+from cuda.core import system
+from cuda.bindings import nvml
 
 
 def check_nvlink_status(verbose=True, **kwargs):
     """Check NVLink status across all GPUs."""
     try:
-        pynvml.nvmlInit()
-    except pynvml.NVMLError as e:
+        device_count = system.Device.get_device_count()
+        if device_count == 0:
+            raise system.NvmlError(1)
+    except system.NvmlError as e:
         raise ValueError("GPU not found. Please ensure GPUs are installed.") from e
 
-    device_count = pynvml.nvmlDeviceGetCount()
-
     # NVLink requires at least 2 GPUs to be meaningful. A single GPU has nothing
     # to link to, so there is nothing to check.
     if device_count < 2:
@@ -25,24 +26,22 @@ def check_nvlink_status(verbose=True, **kwargs):
 
     failed_links: list[tuple[int, int]] = []
 
-    for gpu_idx in range(device_count):
-        handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_idx)
+    for gpu_idx, device in enumerate(system.Device.get_all_devices()):
         # NVML provides no API to query the number of NVLink slots on a device
         # (e.g. V100=6, A100=12, H100=18). The only way to discover the real count
         # is to iterate up to NVML_NVLINK_MAX_LINKS and stop when the driver signals
         # that link_id is out of range via NVMLError_InvalidArgument.
-        for link_id in range(pynvml.NVML_NVLINK_MAX_LINKS):
+        for link_id in range(nvml.NVLINK_MAX_LINKS):
             try:
                 # nvmlDeviceGetNvLinkState(device, link) returns NVML_FEATURE_ENABLED
                 # if the link is active, or NVML_FEATURE_DISABLED if it is not.
-                state = pynvml.nvmlDeviceGetNvLinkState(handle, link_id)
-                if state == pynvml.NVML_FEATURE_DISABLED:
+                if not device.get_nvlink(link_id).state:
                     failed_links.append((gpu_idx, link_id))
-            except pynvml.NVMLError_NotSupported:
+            except system.NotSupportedError:
                 # The driver reports NVLink is not supported on this system.
                 # There is nothing to check — skip like the single-GPU case above.
                 return False
-            except pynvml.NVMLError_InvalidArgument:
+            except system.InvalidArgumentError:
                 # link_id exceeds the number of NVLink slots on this device.
                 # Stop iterating links for this GPU.
                 break
diff --git a/rapids_cli/tests/test_cuda.py b/rapids_cli/tests/test_cuda.py
index c6d4525..c026575 100644
--- a/rapids_cli/tests/test_cuda.py
+++ b/rapids_cli/tests/test_cuda.py
@@ -2,7 +2,6 @@
 # SPDX-License-Identifier: Apache-2.0
 from unittest.mock import patch
 
-import pynvml
 import pytest
 
 from rapids_cli.doctor.checks.cuda_driver import cuda_check
@@ -10,24 +9,27 @@
 
 def test_cuda_check_success():
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12050),
+        patch("cuda.core.system.get_driver_version_full", return_value=(12, 5, 0)),
     ):
         assert cuda_check(verbose=True) == 12050
 
 
 def test_cuda_check_init_fails():
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
+    from cuda.bindings import nvml  
+
+    with patch("cuda.bindings.nvml.init_v2", side_effect=nvml.NvmlError(1)):
         with pytest.raises(ValueError, match="Unable to look up CUDA version"):
             cuda_check()
 
 
 def test_cuda_check_version_query_fails():
+    from cuda.bindings import nvml
+
     with (
-        patch("pynvml.nvmlInit"),
+        patch("cuda.bindings.nvml.init_v2"),
         patch(
-            "pynvml.nvmlSystemGetCudaDriverVersion",
-            side_effect=pynvml.NVMLError(1),
+            "cuda.bindings.nvml.system_get_cuda_driver_version",
+            side_effect=nvml.NvmlError(1),
         ),
     ):
         with pytest.raises(ValueError, match="Unable to look up CUDA version"):
diff --git a/rapids_cli/tests/test_debug.py b/rapids_cli/tests/test_debug.py
index 91c330c..d94c1f6 100644
--- a/rapids_cli/tests/test_debug.py
+++ b/rapids_cli/tests/test_debug.py
@@ -14,14 +14,14 @@
 
 def test_gather_cuda_version():
     """Test CUDA version gathering."""
-    with patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040):
+    with patch("cuda.core.system.get_driver_version_full", return_value=(12, 4, 0)):
         result = gather_cuda_version()
         assert result == "12.4"
 
 
 def test_gather_cuda_version_with_patch():
     """Test CUDA version with patch number."""
-    with patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12345):
+    with patch("cuda.core.system.get_driver_version_full", return_value=(12, 34, 5)):
         result = gather_cuda_version()
         assert result == "12.34.5"
 
@@ -74,9 +74,9 @@ def test_run_debug_console(capsys):
     mock_vm.total = 32 * 1024**3
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54.15"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.system_get_driver_version", return_value="550.54.15"),
+        patch("cuda.bindings.nvml.system_get_cuda_driver_version", return_value=12040),
         patch(
             "cuda.pathfinder.find_nvidia_header_directory",
             return_value="/usr/local/cuda/include",
@@ -95,10 +95,11 @@ def test_run_debug_console(capsys):
 
 def test_run_debug_json(capsys):
     """Test run_debug with JSON output."""
+    
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlSystemGetDriverVersion", return_value="550.54.15"),
-        patch("pynvml.nvmlSystemGetCudaDriverVersion", return_value=12040),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.system_get_driver_version", return_value="550.54.15"),
+        patch("cuda.bindings.nvml.system_get_cuda_driver_version", return_value=12040),
         patch(
             "cuda.pathfinder.find_nvidia_header_directory",
             return_value="/usr/local/cuda/include",
diff --git a/rapids_cli/tests/test_gpu.py b/rapids_cli/tests/test_gpu.py
index a895bc2..3b3665c 100644
--- a/rapids_cli/tests/test_gpu.py
+++ b/rapids_cli/tests/test_gpu.py
@@ -13,8 +13,8 @@
 
 def test_gpu_check_success():
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=2),
     ):
         result = gpu_check(verbose=True)
         assert result == "GPU(s) detected: 2"
@@ -22,28 +22,28 @@ def test_gpu_check_success():
 
 def test_gpu_check_no_gpus():
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=0),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=0),
     ):
         with pytest.raises(AssertionError, match="No GPUs detected"):
             gpu_check(verbose=False)
 
 
 def test_gpu_check_nvml_error():
-    import pynvml
+    from cuda.bindings import nvml
 
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
+    with patch("cuda.bindings.nvml.init_v2", side_effect=nvml.NvmlError(1)):
         with pytest.raises(ValueError, match="No available GPUs detected"):
             gpu_check(verbose=False)
 
 
 def test_check_gpu_compute_capability_success():
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlDeviceGetHandleByIndex"),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=2),
+        patch("cuda.bindings.nvml.device_get_handle_by_index_v2", return_value=0xffffffff),
         patch(
-            "pynvml.nvmlDeviceGetCudaComputeCapability",
+            "cuda.bindings.nvml.device_get_cuda_compute_capability",
             return_value=(REQUIRED_COMPUTE_CAPABILITY, 5),
         ),
     ):
@@ -53,10 +53,10 @@ def test_check_gpu_compute_capability_success():
 
 def test_check_gpu_compute_capability_insufficient():
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=1),
-        patch("pynvml.nvmlDeviceGetHandleByIndex"),
-        patch("pynvml.nvmlDeviceGetCudaComputeCapability", return_value=(6, 0)),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=1),
+        patch("cuda.bindings.nvml.device_get_handle_by_index_v2", return_value=0xffffffff),
+        patch("cuda.bindings.nvml.device_get_cuda_compute_capability", return_value=(6, 0)),
     ):
         with pytest.raises(
             ValueError,
@@ -66,9 +66,9 @@ def test_check_gpu_compute_capability_insufficient():
 
 
 def test_check_gpu_compute_capability_no_gpu():
-    import pynvml
+    from cuda.bindings import nvml
 
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
+    with patch("cuda.bindings.nvml.init_v2", side_effect=nvml.NvmlError(1)):
         with pytest.raises(
             ValueError, match="No GPU - cannot determine GPU Compute Capability"
         ):
diff --git a/rapids_cli/tests/test_memory.py b/rapids_cli/tests/test_memory.py
index 572df33..4ef5e32 100644
--- a/rapids_cli/tests/test_memory.py
+++ b/rapids_cli/tests/test_memory.py
@@ -20,32 +20,30 @@ def test_get_system_memory():
 
 
 def test_get_gpu_memory_single_gpu():
-    mock_handle = MagicMock()
     mock_memory_info = MagicMock()
     mock_memory_info.total = 16 * 1024**3  # 16 GB in bytes
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=1),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory_info),
-        patch("pynvml.nvmlShutdown"),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=1),
+        patch("cuda.bindings.nvml.device_get_handle_by_index_v2", return_value=0xffffffff),
+        patch("cuda.bindings.nvml.device_get_memory_info_v2", return_value=mock_memory_info),
+        patch("cuda.bindings.nvml.shutdown"),
     ):
         result = get_gpu_memory(verbose=False)
         assert result == 16.0
 
 
 def test_get_gpu_memory_multiple_gpus():
-    mock_handle = MagicMock()
     mock_memory_info = MagicMock()
     mock_memory_info.total = 16 * 1024**3  # 16 GB per GPU
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=4),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetMemoryInfo", return_value=mock_memory_info),
-        patch("pynvml.nvmlShutdown"),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=4),
+        patch("cuda.bindings.nvml.device_get_handle_by_index_v2", return_value=0xffffffff),
+        patch("cuda.bindings.nvml.device_get_memory_info_v2", return_value=mock_memory_info),
+        patch("cuda.bindings.nvml.shutdown"),
     ):
         result = get_gpu_memory(verbose=False)
         assert result == 64.0  # 16 GB * 4 GPUs
@@ -53,7 +51,8 @@ def test_get_gpu_memory_multiple_gpus():
 
 def test_check_memory_to_gpu_ratio_good_ratio():
     with (
-        patch("pynvml.nvmlInit"),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=2),
         patch("rapids_cli.doctor.checks.memory.get_system_memory", return_value=64.0),
         patch("rapids_cli.doctor.checks.memory.get_gpu_memory", return_value=32.0),
     ):
@@ -63,7 +62,8 @@ def test_check_memory_to_gpu_ratio_good_ratio():
 
 def test_check_memory_to_gpu_ratio_warning():
     with (
-        patch("pynvml.nvmlInit"),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=2),
         patch("rapids_cli.doctor.checks.memory.get_system_memory", return_value=32.0),
         patch("rapids_cli.doctor.checks.memory.get_gpu_memory", return_value=32.0),
     ):
@@ -73,9 +73,12 @@ def test_check_memory_to_gpu_ratio_warning():
 
 
 def test_check_memory_to_gpu_ratio_no_gpu():
-    import pynvml
+    from cuda.bindings import nvml
 
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
+    with (
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=0),
+    ):
         with pytest.raises(
             ValueError, match="GPU not found. Please ensure GPUs are installed."
         ):
diff --git a/rapids_cli/tests/test_nvlink.py b/rapids_cli/tests/test_nvlink.py
index 4deb0dc..142d2e9 100644
--- a/rapids_cli/tests/test_nvlink.py
+++ b/rapids_cli/tests/test_nvlink.py
@@ -16,21 +16,19 @@
 )
 def test_check_nvlink_status_success(verbose, expected):
     """2 GPUs, all NVLinks active — verbose controls whether a summary string is returned."""
-    import pynvml
-
-    mock_handle = MagicMock()
+    from cuda.bindings import nvml
 
     # Simulate a V100 with 6 NVLink slots; link_id >= 6 is out of range.
     def mock_link_state(handle, link_id):
         if link_id >= 6:
-            raise pynvml.NVMLError_InvalidArgument
-        return pynvml.NVML_FEATURE_ENABLED
+            raise nvml.InvalidArgumentError(0)
+        return nvml.EnableState.FEATURE_ENABLED
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetNvLinkState", side_effect=mock_link_state),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=2),
+        patch("cuda.bindings.nvml.device_get_handle_by_index_v2", return_value=0xffffffff),
+        patch("cuda.bindings.nvml.device_get_nvlink_state", side_effect=mock_link_state),
     ):
         result = check_nvlink_status(verbose=verbose)
         assert result == expected
@@ -39,8 +37,8 @@ def mock_link_state(handle, link_id):
 def test_check_nvlink_status_single_gpu():
     """Single GPU — NVLink is not applicable, check skips early."""
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=1),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=1),
     ):
         result = check_nvlink_status(verbose=False)
         assert result is False
@@ -48,9 +46,9 @@ def test_check_nvlink_status_single_gpu():
 
 def test_check_nvlink_status_no_gpu():
     """nvmlInit fails — no GPUs installed."""
-    import pynvml
+    from cuda.bindings import nvml
 
-    with patch("pynvml.nvmlInit", side_effect=pynvml.NVMLError(1)):
+    with patch("cuda.bindings.nvml.init_v2", side_effect=nvml.NvmlError(1)):
         with pytest.raises(
             ValueError, match="GPU not found. Please ensure GPUs are installed."
         ):
@@ -59,15 +57,14 @@ def test_check_nvlink_status_no_gpu():
 
 def test_check_nvlink_status_not_supported():
     """NVLink is not supported on this system — check skips silently like single-GPU case."""
-    import pynvml
+    from cuda.bindings import nvml
 
-    mock_handle = MagicMock()
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=2),
+        patch("cuda.bindings.nvml.device_get_handle_by_index_v2", return_value=0xffffffff),
         patch(
-            "pynvml.nvmlDeviceGetNvLinkState", side_effect=pynvml.NVMLError_NotSupported
+            "cuda.bindings.nvml.device_get_nvlink_state", side_effect=nvml.NotSupportedError(1)
         ),
     ):
         result = check_nvlink_status(verbose=False)
@@ -76,21 +73,19 @@ def test_check_nvlink_status_not_supported():
 
 def test_check_nvlink_status_link_inactive():
     """A supported link is inactive — check fails and reports which GPU and link."""
-    import pynvml
-
-    mock_handle = MagicMock()
+    from cuda.bindings import nvml
 
     # Simulate a V100 with 6 NVLink slots, all inactive.
     def mock_link_state(handle, link_id):
         if link_id >= 6:
-            raise pynvml.NVMLError_InvalidArgument
-        return pynvml.NVML_FEATURE_DISABLED
+            raise nvml.InvalidArgumentError(0)
+        return nvml.EnableState.FEATURE_DISABLED
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetNvLinkState", side_effect=mock_link_state),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=2),
+        patch("cuda.bindings.nvml.device_get_handle_by_index_v2", return_value=0xffffffff),
+        patch("cuda.bindings.nvml.device_get_nvlink_state", side_effect=mock_link_state),
     ):
         with pytest.raises(ValueError, match="NVLink inactive on:"):
             check_nvlink_status(verbose=False)
@@ -98,23 +93,21 @@ def mock_link_state(handle, link_id):
 
 def test_check_nvlink_status_partial_failure():
     """Some links active, some inactive — all failures are reported in a single error."""
-    import pynvml
-
-    mock_handle = MagicMock()
+    from cuda.bindings import nvml
 
     # Simulate a V100 with 6 NVLink slots: link 0 active, link 1 inactive, rest active.
     def mock_link_state(handle, link_id):
         if link_id >= 6:
-            raise pynvml.NVMLError_InvalidArgument
+            raise nvml.InvalidArgumentError(0)
         if link_id == 1:
-            return pynvml.NVML_FEATURE_DISABLED
-        return pynvml.NVML_FEATURE_ENABLED
+            return nvml.EnableState.FEATURE_DISABLED
+        return nvml.EnableState.FEATURE_ENABLED
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetNvLinkState", side_effect=mock_link_state),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=2),
+        patch("cuda.bindings.nvml.device_get_handle_by_index_v2", return_value=0xffffffff),
+        patch("cuda.bindings.nvml.device_get_nvlink_state", side_effect=mock_link_state),
     ):
         with pytest.raises(ValueError, match="NVLink inactive on:") as exc_info:
             check_nvlink_status(verbose=False)
@@ -125,21 +118,19 @@ def mock_link_state(handle, link_id):
 
 def test_check_nvlink_status_invalid_argument():
     """NVMLError_InvalidArgument stops link iteration early — check succeeds for valid links."""
-    import pynvml
-
-    mock_handle = MagicMock()
+    from cuda.bindings import nvml
 
     # Simulate an A100 with 12 NVLink slots; link_id >= 12 is out of range.
     def mock_link_state(handle, link_id):
         if link_id >= 12:
-            raise pynvml.NVMLError_InvalidArgument
-        return pynvml.NVML_FEATURE_ENABLED
+            raise nvml.InvalidArgumentError(0)
+        return nvml.EnableState.FEATURE_ENABLED
 
     with (
-        patch("pynvml.nvmlInit"),
-        patch("pynvml.nvmlDeviceGetCount", return_value=2),
-        patch("pynvml.nvmlDeviceGetHandleByIndex", return_value=mock_handle),
-        patch("pynvml.nvmlDeviceGetNvLinkState", side_effect=mock_link_state),
+        patch("cuda.bindings.nvml.init_v2"),
+        patch("cuda.bindings.nvml.device_get_count_v2", return_value=2),
+        patch("cuda.bindings.nvml.device_get_handle_by_index_v2", return_value=0xffffffff),
+        patch("cuda.bindings.nvml.device_get_nvlink_state", side_effect=mock_link_state),
     ):
         result = check_nvlink_status(verbose=True)
         assert result == "All NVLinks active across 2 GPUs"