From 036374ac89bc85477a1a5a46ceeb2b1dfd9c9284 Mon Sep 17 00:00:00 2001 From: James Le Houx Date: Tue, 21 Apr 2026 11:22:25 +0000 Subject: [PATCH 1/4] fix wheel builds: drop redundant CMake option, patch AMReX for CUDA 12 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CPU wheel error ("gmake: *** No rule to make target '_core'") traced to the SKBUILD_CMAKE_ARGS env var interfering with scikit-build-core's cmake.args merge. The OPENIMPALA_ENABLE_TINY_PROFILE option was redundant anyway — when AMReX is built with AMReX_TINY_PROFILE=ON, it sets AMREX_TINY_PROFILE in its installed AMReX_Config.H header, which every file including AMReX.H picks up automatically. Removed the option and the env var; kept the AMReX-side build flag. GPU wheel error ("CUDA::nvToolsExt target not found") is AMReX 25.03 vs. CUDA 12 — libnvToolsExt was removed in CUDA 12 in favour of NVTX3 (header-only). Patch AMReX 25.03's CMake to use CUDA::nvtx3 instead, applied via sed before configure. CMake 3.25+ (we have 3.28) exposes CUDA::nvtx3 from CUDAToolkit, so this is drop-in. Cache keys bumped (CPU v5, GPU nvtx3-v4) to force a fresh dep rebuild. https://claude.ai/code/session_011dJ5Bwq4Tnr8wxH597XJFf --- .github/workflows/pypi-wheels-cpu.yml | 3 +-- .github/workflows/pypi-wheels-gpu.yml | 4 ++-- CMakeLists.txt | 19 ++++++------------- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/.github/workflows/pypi-wheels-cpu.yml b/.github/workflows/pypi-wheels-cpu.yml index fc03342..1463411 100644 --- a/.github/workflows/pypi-wheels-cpu.yml +++ b/.github/workflows/pypi-wheels-cpu.yml @@ -41,7 +41,7 @@ jobs: uses: actions/cache@v4 with: path: .cibw-deps-cache - key: cibw-deps-manylinux_2_28-x86_64-hdf5_1.14.6-tiff_4.6.0-hypre_2.31.0-amrex_25.03-tinyprof-v4 + key: cibw-deps-manylinux_2_28-x86_64-hdf5_1.14.6-tiff_4.6.0-hypre_2.31.0-amrex_25.03-tinyprof-v5 - name: Build wheels run: python -m cibuildwheel --output-dir wheelhouse @@ -123,7 +123,6 @@ jobs: CMAKE_PREFIX_PATH="/usr/local" CMAKE_GENERATOR="Unix Makefiles" SETUPTOOLS_SCM_PRETEND_VERSION="${{ steps.version.outputs.version }}" - SKBUILD_CMAKE_ARGS="-DOPENIMPALA_ENABLE_TINY_PROFILE=ON" CIBW_REPAIR_WHEEL_COMMAND_LINUX: > auditwheel repair -w {dest_dir} {wheel} diff --git a/.github/workflows/pypi-wheels-gpu.yml b/.github/workflows/pypi-wheels-gpu.yml index f9a58de..928b91e 100644 --- a/.github/workflows/pypi-wheels-gpu.yml +++ b/.github/workflows/pypi-wheels-gpu.yml @@ -45,7 +45,7 @@ jobs: uses: actions/cache@v4 with: path: .cibw-deps-cache - key: cibw-deps-gpu-cuda12.6-manylinux_2_34-x86_64-hdf5_1.14.6-tiff_4.6.0-hypre_2.31.0-amrex_25.03-gcc13-tinyprof-v3 + key: cibw-deps-gpu-cuda12.6-manylinux_2_34-x86_64-hdf5_1.14.6-tiff_4.6.0-hypre_2.31.0-amrex_25.03-gcc13-nvtx3-v4 - name: Build GPU wheels run: python -m cibuildwheel --output-dir wheelhouse @@ -117,6 +117,7 @@ jobs: make install && cd ../.. && git clone --depth 1 --branch 25.03 https://github.com/AMReX-Codes/amrex.git /tmp/amrex && + sed -i 's|CUDA::nvToolsExt|CUDA::nvtx3|g' /tmp/amrex/Tools/CMake/AMReXParallelBackends.cmake && cmake -S /tmp/amrex -B /tmp/amrex/build -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_BUILD_TYPE=Release @@ -157,7 +158,6 @@ jobs: CMAKE_PREFIX_PATH="/usr/local" CMAKE_GENERATOR="Unix Makefiles" CMAKE_ARGS="-DGPU_BACKEND=CUDA '-DCMAKE_CUDA_ARCHITECTURES=60;70;75;80;86;89;90' -DCMAKE_CUDA_HOST_COMPILER=/opt/rh/gcc-toolset-13/root/usr/bin/g++" - SKBUILD_CMAKE_ARGS="-DOPENIMPALA_ENABLE_TINY_PROFILE=ON" SETUPTOOLS_SCM_PRETEND_VERSION="${{ steps.version.outputs.version }}" # Vendor libraries but exclude host-specific MPI, OpenMP, Fortran runtime, diff --git a/CMakeLists.txt b/CMakeLists.txt index 9144b35..04da7e0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -161,19 +161,12 @@ endif() # ============================================================================== # TinyProfiler # ============================================================================== -# AMReX's TinyProfiler emits a function-level timing table at AMReX::Finalize(). -# Useful for diagnosing C++ hotspots from the profiling notebook (§7). -# -# This option assumes AMReX was itself built with AMReX_TINY_PROFILE=ON — if not, -# BL_PROFILE regions compile away and no table is emitted. The wheel CI builds -# AMReX with this flag when OPENIMPALA_ENABLE_TINY_PROFILE=ON. -option(OPENIMPALA_ENABLE_TINY_PROFILE - "Enable AMReX TinyProfiler instrumentation (BL_PROFILE regions)" OFF) -if(OPENIMPALA_ENABLE_TINY_PROFILE) - add_compile_definitions(AMREX_TINY_PROFILE) - message(STATUS "AMReX TinyProfiler: ENABLED (AMReX must also be built with " - "AMReX_TINY_PROFILE=ON for tables to appear)") -endif() +# AMReX emits a function-level BL_PROFILE timing table at AMReX::Finalize() when +# AMReX itself is built with -DAMReX_TINY_PROFILE=ON. That flag is exported via +# the AMReX::amrex target's INTERFACE_COMPILE_DEFINITIONS, so every target that +# links against AMReX::amrex picks up AMREX_TINY_PROFILE automatically — no +# OpenImpala-side option is needed. The wheel CI sets -DAMReX_TINY_PROFILE=ON +# when building AMReX from source. # ============================================================================== # Library targets From 72e4d6440a2840b5ef843ee025630ccaea568a67 Mon Sep 17 00:00:00 2001 From: James Le Houx Date: Tue, 21 Apr 2026 11:26:02 +0000 Subject: [PATCH 2/4] publish GPU wheels to PyPI instead of GitHub Releases Now that openimpala-cuda has been granted the 320 MiB per-file PyPI limit, the GPU wheels fit and can be installed via `pip install openimpala-cuda` like any other package. Mirror the CPU workflow's publish job: use the pypi trusted-publisher flow (environment: pypi, id-token: write) via pypa/gh-action-pypi-publish. Gate on github.event_name == 'release' so workflow_dispatch runs still produce artifacts for manual inspection without touching the index. https://claude.ai/code/session_011dJ5Bwq4Tnr8wxH597XJFf --- .github/workflows/pypi-wheels-gpu.yml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/pypi-wheels-gpu.yml b/.github/workflows/pypi-wheels-gpu.yml index 928b91e..5f9f4cf 100644 --- a/.github/workflows/pypi-wheels-gpu.yml +++ b/.github/workflows/pypi-wheels-gpu.yml @@ -200,12 +200,16 @@ jobs: name: cibw-wheels-gpu path: ./wheelhouse/*.whl - upload_to_github_release: - name: Upload GPU wheels to GitHub Release + publish_to_pypi: + name: Publish GPU wheels to PyPI (openimpala-cuda) needs: build_gpu_wheels runs-on: ubuntu-latest + # Only publish on release; workflow_dispatch leaves the artifact for manual + # inspection without touching PyPI. + if: github.event_name == 'release' + environment: pypi permissions: - contents: write + id-token: write steps: - name: Download wheel artifacts @@ -214,12 +218,7 @@ jobs: name: cibw-wheels-gpu path: dist/ - - name: Upload wheels to GitHub Release - if: github.event_name == 'release' - uses: softprops/action-gh-release@v2 + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 with: - files: dist/*.whl - - - name: List wheels (workflow_dispatch — no release to upload to) - if: github.event_name == 'workflow_dispatch' - run: ls -lh dist/ + skip-existing: true From 109f24ee7c9d1a381d5b02ec369ddb3251641aa7 Mon Sep 17 00:00:00 2001 From: James Le Houx Date: Tue, 21 Apr 2026 11:30:15 +0000 Subject: [PATCH 3/4] docs: drop GitHub-Releases find-links workaround for openimpala-cuda MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that openimpala-cuda is published to PyPI (previous commit switched the GPU wheel workflow), the install collapses from pip install openimpala-cuda --find-links \ https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 \ nvidia-cuda-runtime-cu12 nvidia-cublas-cu12 nvidia-cusparse-cu12 \ nvidia-curand-cu12 down to plain pip install openimpala-cuda The nvidia-*-cu12 packages were only needed because the --find-links index didn't carry them; PyPI's resolver will pull whatever the wheel actually declares. Updates every call site that showed the old incantation: - README.md, docs/getting-started.md, docs/user-guide/gpu.md — advanced install sections - paper.md — corrects "via GitHub Releases" wording for the JOSS draft - notebooks/visualization_yt.ipynb — §0 install cell - tutorials/02_digital_twin.ipynb — install cell - tutorials/04_multiphase_and_fields.ipynb — install cell - tutorials/07_hpc_scaling.ipynb — §6 install cell Also fixes a malformed .sif wget URL in docs/getting-started.md (a stray concatenation of expanded_assets/v4.0.6 with the filename) by switching to a vX.Y.Z placeholder to match the pattern already used in tutorial 7. https://claude.ai/code/session_011dJ5Bwq4Tnr8wxH597XJFf --- README.md | 6 ++++-- docs/getting-started.md | 15 ++++++++------- docs/user-guide/gpu.md | 5 ++--- notebooks/visualization_yt.ipynb | 21 ++------------------- paper.md | 2 +- tutorials/02_digital_twin.ipynb | 2 +- tutorials/04_multiphase_and_fields.ipynb | 2 +- tutorials/07_hpc_scaling.ipynb | 2 +- 8 files changed, 20 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index c2eb9d3..66a58b4 100644 --- a/README.md +++ b/README.md @@ -145,10 +145,12 @@ If CuPy is not available, OpenImpala falls back to SciPy on the CPU. CUDA support: ```bash -pip install openimpala-cuda --find-links \ - https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 +pip install openimpala-cuda ``` +The `openimpala-cuda` wheel requires a working NVIDIA CUDA 12 runtime (driver ++ toolkit). On Colab, Kaggle, and most cluster nodes this is already present. + To install with optional dependencies: ```bash diff --git a/docs/getting-started.md b/docs/getting-started.md index 644d5e6..e1688f4 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -29,12 +29,13 @@ For HPC clusters that need the compiled C++ HYPRE solvers, a separate package is available: ```bash -pip install openimpala-cuda --find-links \ - https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 +pip install openimpala-cuda ``` This package bundles AMReX + HYPRE compiled with CUDA and is a drop-in -replacement for the pure-Python `openimpala` package. +replacement for the pure-Python `openimpala` package. It requires a working +NVIDIA CUDA 12 runtime (driver + toolkit) on the host, which is already +present on Colab, Kaggle, and most GPU cluster nodes. ### Container (HPC) @@ -42,14 +43,14 @@ For HPC clusters, download the pre-built Apptainer/Singularity container from [GitHub Releases](https://github.com/BASE-Laboratory/OpenImpala/releases): ```bash -# Download the latest .sif file -wget https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6openimpala-v4.0.0.sif +# Download the latest .sif file (replace vX.Y.Z with the release tag) +wget https://github.com/BASE-Laboratory/OpenImpala/releases/download/vX.Y.Z/openimpala-vX.Y.Z.sif # Run interactively -apptainer shell openimpala-v4.0.0.sif +apptainer shell openimpala-vX.Y.Z.sif # Run a simulation -apptainer exec openimpala-v4.0.0.sif /opt/OpenImpala/build/Diffusion3d inputs +apptainer exec openimpala-vX.Y.Z.sif /opt/OpenImpala/build/Diffusion3d inputs ``` ### From source (developers) diff --git a/docs/user-guide/gpu.md b/docs/user-guide/gpu.md index 95274a3..525633e 100644 --- a/docs/user-guide/gpu.md +++ b/docs/user-guide/gpu.md @@ -60,11 +60,10 @@ with oi.Session(): For HPC clusters that need the compiled C++ HYPRE linear solvers with native CUDA support (AMReX + HYPRE compiled with CUDA), a separate package is -available: +available on PyPI: ```bash -pip install openimpala-cuda --find-links \ - https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 +pip install openimpala-cuda ``` The `openimpala-cuda` package is a drop-in replacement for `openimpala` and diff --git a/notebooks/visualization_yt.ipynb b/notebooks/visualization_yt.ipynb index 7034eb6..bbfb89c 100644 --- a/notebooks/visualization_yt.ipynb +++ b/notebooks/visualization_yt.ipynb @@ -35,24 +35,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "import subprocess, sys\n", - "\n", - "def _has_gpu():\n", - " try:\n", - " subprocess.check_output([\"nvidia-smi\"], stderr=subprocess.DEVNULL)\n", - " return True\n", - " except (FileNotFoundError, subprocess.CalledProcessError):\n", - " return False\n", - "\n", - "_extras = \"yt matplotlib porespy\"\n", - "if _has_gpu():\n", - " print(\"GPU detected — installing openimpala-cuda\")\n", - " !pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/latest nvidia-cuda-runtime-cu12 nvidia-cublas-cu12 nvidia-cusparse-cu12 nvidia-curand-cu12 {_extras}\n", - "else:\n", - " print(\"No GPU detected — installing openimpala (CPU)\")\n", - " !pip install -q openimpala {_extras}" - ] + "source": "import subprocess, sys\n\ndef _has_gpu():\n try:\n subprocess.check_output([\"nvidia-smi\"], stderr=subprocess.DEVNULL)\n return True\n except (FileNotFoundError, subprocess.CalledProcessError):\n return False\n\n_extras = \"yt matplotlib porespy\"\nif _has_gpu():\n print(\"GPU detected — installing openimpala-cuda\")\n !pip install -q openimpala-cuda {_extras}\nelse:\n print(\"No GPU detected — installing openimpala (CPU)\")\n !pip install -q openimpala {_extras}" }, { "cell_type": "markdown", @@ -413,4 +396,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/paper.md b/paper.md index 947135e..7aec22c 100644 --- a/paper.md +++ b/paper.md @@ -83,7 +83,7 @@ with oi.Session(): print(f"Tortuosity: {result.tortuosity:.4f}") ``` -A pure-Python package is distributed via PyPI (`pip install openimpala`) with automatic GPU acceleration via CuPy when available, and compiled CUDA GPU wheels with HYPRE solvers are available via GitHub Releases (`pip install openimpala-cuda`) for HPC deployments. Interactive tutorial notebooks are provided for Google Colab, covering workflows from basic tortuosity computation to digital twin parameterisation with PyBaMM. API reference documentation, installation guides, and interactive tutorial notebooks are available at https://base-laboratory.github.io/OpenImpala/ +A pure-Python package is distributed via PyPI (`pip install openimpala`) with automatic GPU acceleration via CuPy when available, and compiled CUDA GPU wheels with HYPRE solvers are also distributed via PyPI (`pip install openimpala-cuda`) for HPC deployments. Interactive tutorial notebooks are provided for Google Colab, covering workflows from basic tortuosity computation to digital twin parameterisation with PyBaMM. API reference documentation, installation guides, and interactive tutorial notebooks are available at https://base-laboratory.github.io/OpenImpala/ ## Testing and Quality Assurance diff --git a/tutorials/02_digital_twin.ipynb b/tutorials/02_digital_twin.ipynb index 8942498..2d8691b 100644 --- a/tutorials/02_digital_twin.ipynb +++ b/tutorials/02_digital_twin.ipynb @@ -10,7 +10,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "# Install OpenImpala and dependencies.\n# On GPU runtimes (T4, A100, etc.) we install the CUDA-accelerated wheel;\n# on CPU-only runtimes we fall back to the pure-Python/CPU package.\nimport subprocess, sys\n\ndef _has_gpu():\n try:\n subprocess.check_output([\"nvidia-smi\"], stderr=subprocess.DEVNULL)\n return True\n except (FileNotFoundError, subprocess.CalledProcessError):\n return False\n\n_common = \"pybamm bpx tifffile matplotlib yt\"\nif _has_gpu():\n print(\"GPU detected — installing openimpala-cuda\")\n !pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/latest nvidia-cuda-runtime-cu12 nvidia-cublas-cu12 nvidia-cusparse-cu12 nvidia-curand-cu12 {_common}\nelse:\n print(\"No GPU detected — installing openimpala (CPU)\")\n !pip install -q openimpala {_common}" + "source": "# Install OpenImpala and dependencies.\n# On GPU runtimes (T4, A100, etc.) we install the CUDA-accelerated wheel;\n# on CPU-only runtimes we fall back to the pure-Python/CPU package.\nimport subprocess, sys\n\ndef _has_gpu():\n try:\n subprocess.check_output([\"nvidia-smi\"], stderr=subprocess.DEVNULL)\n return True\n except (FileNotFoundError, subprocess.CalledProcessError):\n return False\n\n_common = \"pybamm bpx tifffile matplotlib yt\"\nif _has_gpu():\n print(\"GPU detected — installing openimpala-cuda\")\n !pip install -q openimpala-cuda {_common}\nelse:\n print(\"No GPU detected — installing openimpala (CPU)\")\n !pip install -q openimpala {_common}" }, { "cell_type": "code", diff --git a/tutorials/04_multiphase_and_fields.ipynb b/tutorials/04_multiphase_and_fields.ipynb index 8131c07..f664066 100644 --- a/tutorials/04_multiphase_and_fields.ipynb +++ b/tutorials/04_multiphase_and_fields.ipynb @@ -10,7 +10,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "# Install OpenImpala (compiled C++ backend needed for low-level API in this tutorial).\n# Auto-detect GPU vs CPU runtime.\nimport subprocess, sys\n\ndef _has_gpu():\n try:\n subprocess.check_output([\"nvidia-smi\"], stderr=subprocess.DEVNULL)\n return True\n except (FileNotFoundError, subprocess.CalledProcessError):\n return False\n\n_common = \"porespy yt matplotlib\"\nif _has_gpu():\n print(\"GPU detected — installing openimpala-cuda\")\n !pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/latest nvidia-cuda-runtime-cu12 nvidia-cublas-cu12 nvidia-cusparse-cu12 nvidia-curand-cu12 {_common}\nelse:\n print(\"No GPU detected — installing openimpala (CPU)\")\n !pip install -q openimpala {_common}" + "source": "# Install OpenImpala (compiled C++ backend needed for low-level API in this tutorial).\n# Auto-detect GPU vs CPU runtime.\nimport subprocess, sys\n\ndef _has_gpu():\n try:\n subprocess.check_output([\"nvidia-smi\"], stderr=subprocess.DEVNULL)\n return True\n except (FileNotFoundError, subprocess.CalledProcessError):\n return False\n\n_common = \"porespy yt matplotlib\"\nif _has_gpu():\n print(\"GPU detected — installing openimpala-cuda\")\n !pip install -q openimpala-cuda {_common}\nelse:\n print(\"No GPU detected — installing openimpala (CPU)\")\n !pip install -q openimpala {_common}" }, { "cell_type": "code", diff --git a/tutorials/07_hpc_scaling.ipynb b/tutorials/07_hpc_scaling.ipynb index bd0b783..e4c879e 100644 --- a/tutorials/07_hpc_scaling.ipynb +++ b/tutorials/07_hpc_scaling.ipynb @@ -242,7 +242,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": "# Install OpenImpala (compiled C++ backend needed for HPC features in this tutorial).\n# Auto-detect GPU vs CPU runtime.\nimport subprocess, sys\n\ndef _has_gpu():\n try:\n subprocess.check_output([\"nvidia-smi\"], stderr=subprocess.DEVNULL)\n return True\n except (FileNotFoundError, subprocess.CalledProcessError):\n return False\n\n_common = \"porespy matplotlib\"\nif _has_gpu():\n print(\"GPU detected — installing openimpala-cuda\")\n !pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/latest nvidia-cuda-runtime-cu12 nvidia-cublas-cu12 nvidia-cusparse-cu12 nvidia-curand-cu12 {_common}\nelse:\n print(\"No GPU detected — installing openimpala (CPU)\")\n !pip install -q openimpala {_common}" + "source": "# Install OpenImpala (compiled C++ backend needed for HPC features in this tutorial).\n# Auto-detect GPU vs CPU runtime.\nimport subprocess, sys\n\ndef _has_gpu():\n try:\n subprocess.check_output([\"nvidia-smi\"], stderr=subprocess.DEVNULL)\n return True\n except (FileNotFoundError, subprocess.CalledProcessError):\n return False\n\n_common = \"porespy matplotlib\"\nif _has_gpu():\n print(\"GPU detected — installing openimpala-cuda\")\n !pip install -q openimpala-cuda {_common}\nelse:\n print(\"No GPU detected — installing openimpala (CPU)\")\n !pip install -q openimpala {_common}" }, { "cell_type": "code", From 4e606f46341bfd447332c7c98c3b22b8c3a9004a Mon Sep 17 00:00:00 2001 From: James Le Houx Date: Tue, 21 Apr 2026 11:32:55 +0000 Subject: [PATCH 4/4] declare CUDA runtime deps for openimpala-cuda wheel MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit auditwheel repair --exclude drops libcudart / libcublas / libcusparse / libcurand / libnvJitLink from the openimpala-cuda wheel payload, which means the wheel only works on machines that already have the CUDA 12 toolkit installed — driver-only Colab/Kaggle runtimes have the libraries, but a bare Python venv on an NVIDIA workstation does not. Declare the nvidia-*-cu12 PyPI packages as runtime deps so pip pulls them automatically. Keep them commented out in pyproject.toml with clear markers so the CPU wheel (which uses the same file) doesn't grow a 1-2 GB dep tree. The GPU workflow's existing sed step already rewrites `name = "openimpala"` to `"openimpala-cuda"`; a second sed uncomments the `#"nvidia-..."` lines in the same pass. Verified with python3 -m tomllib that both variants produce valid TOML and the expected dependency lists: CPU: ['numpy', 'scipy>=1.7'] GPU: ['numpy', 'scipy>=1.7', 'nvidia-cuda-runtime-cu12', 'nvidia-cublas-cu12', 'nvidia-cusparse-cu12', 'nvidia-curand-cu12', 'nvidia-nvjitlink-cu12'] https://claude.ai/code/session_011dJ5Bwq4Tnr8wxH597XJFf --- .github/workflows/pypi-wheels-gpu.yml | 9 +++++++-- pyproject.toml | 11 +++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-wheels-gpu.yml b/.github/workflows/pypi-wheels-gpu.yml index 5f9f4cf..e7f1ad3 100644 --- a/.github/workflows/pypi-wheels-gpu.yml +++ b/.github/workflows/pypi-wheels-gpu.yml @@ -139,11 +139,16 @@ jobs: tar czf /project/.cibw-deps-cache/deps.tar.gz /usr/local ; fi - # Rename the package to openimpala-cuda for the GPU wheel. + # Rename the package to openimpala-cuda and uncomment the nvidia-*-cu12 + # runtime deps (kept commented in pyproject.toml so they don't pollute + # the CPU wheel). auditwheel --exclude drops the CUDA .so's from the + # wheel payload, so without these PyPI deps the wheel breaks on any + # machine that doesn't already have the CUDA toolkit installed. # The import name stays 'openimpala' — only the PyPI distribution name changes. CIBW_BEFORE_BUILD: > pip install "cmake>=3.28,<4" && - sed -i 's/^name = "openimpala"/name = "openimpala-cuda"/' /project/pyproject.toml + sed -i 's/^name = "openimpala"/name = "openimpala-cuda"/' /project/pyproject.toml && + sed -i 's/^ #"nvidia-/ "nvidia-/' /project/pyproject.toml # Point to MPI, CUDA, and our compiled GPU dependencies. CIBW_ENVIRONMENT_LINUX: > diff --git a/pyproject.toml b/pyproject.toml index 324c4bc..8ad4105 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,17 @@ requires-python = ">=3.8" dependencies = [ "numpy", "scipy>=1.7", + # GPU-only deps: kept commented out so the CPU wheel stays lean. + # The openimpala-cuda build in .github/workflows/pypi-wheels-gpu.yml + # uncomments these via sed before scikit-build-core reads this file. + # Do not remove the "# cuda-" markers — the sed rule keys off them. + # cuda-deps-start + #"nvidia-cuda-runtime-cu12", + #"nvidia-cublas-cu12", + #"nvidia-cusparse-cu12", + #"nvidia-curand-cu12", + #"nvidia-nvjitlink-cu12", + # cuda-deps-end ] [project.optional-dependencies]