diff --git a/.github/workflows/pypi-wheels-cpu.yml b/.github/workflows/pypi-wheels-cpu.yml
index 8e946d34..605ab121 100644
--- a/.github/workflows/pypi-wheels-cpu.yml
+++ b/.github/workflows/pypi-wheels-cpu.yml
@@ -1,28 +1,33 @@
-name: "Build and Publish OpenImpala CPU Wheels"
+name: "Build and Publish OpenImpala Pure-Python Package"
 
 on:
   release:
     types:
       - published
-  workflow_dispatch: # Allows you to trigger it manually from the Actions tab for testing
+  workflow_dispatch:
 
 jobs:
-  build_wheels:
-    name: Build manylinux wheels on ubuntu-latest
+  build_package:
+    name: Build pure-Python wheel and sdist
     runs-on: ubuntu-latest
 
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
         with:
-          submodules: recursive # Fetches Catch2, nlohmann/json, or pybind11 if needed
           fetch-depth: 0
 
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+
+      - name: Install build tools
+        run: python -m pip install build "setuptools-scm>=8"
+
       - name: Extract version from tag
         id: version
         run: |
-          # For release events, GITHUB_REF_NAME is the tag (e.g. v4.0.2).
-          # For workflow_dispatch, fall back to git describe.
           if [[ "$GITHUB_REF_NAME" == v* ]]; then
             VERSION="${GITHUB_REF_NAME#v}"
           else
@@ -31,162 +36,44 @@ jobs:
           echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
           echo "Resolved version: ${VERSION}"
 
-      - name: Set up Python
-        uses: actions/setup-python@v5
-        with:
-          python-version: "3.x"
-
-      - name: Install cibuildwheel
-        run: python -m pip install cibuildwheel==2.16.5
-
-      # Cache the compiled C/C++ dependencies (HDF5, libtiff, HYPRE, AMReX)
-      # inside the manylinux container. cibuildwheel runs CIBW_BEFORE_ALL only
-      # once per container launch, so we tar /usr/local after the first build
-      # and restore it on subsequent runs to skip the ~5-minute dep compile.
-      - name: Cache native dependencies
-        uses: actions/cache@v4
-        with:
-          path: .cibw-deps-cache
-          key: cibw-deps-manylinux_2_28-x86_64-hdf5_1.14.6-tiff_4.6.0-hypre_2.31.0-amrex_25.03-v3
-
-      - name: Build wheels
-        run: python -m cibuildwheel --output-dir wheelhouse
+      # Build from python/ subdirectory which has its own pyproject.toml
+      # using setuptools (no CMake, no compiled extensions).
+      - name: Build wheel and sdist
+        run: python -m build python/ --outdir dist/
         env:
-          # Target modern 64-bit Python versions
-          CIBW_BUILD: "cp39-* cp310-* cp311-* cp312-*"
-          CIBW_SKIP: "*musllinux* *i686*"
-          CIBW_ARCHS_LINUX: "x86_64"
-
-          # Explicitly use AlmaLinux 8 (matches your Rocky 8 environment)
-          CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
-
-          # Install all build dependencies inside the manylinux container.
-          # Key points:
-          #   - gcc-gfortran is kept for AMReX's Fortran dependencies
-          #   - HDF5 and libtiff are built from source as static libraries so they
-          #     get linked into the wheel without auditwheel needing to vendor them
-          #   - HYPRE is built static (--enable-shared=no)
-          #   - AMReX is built static (-DBUILD_SHARED_LIBS=OFF)
-          # Install system packages, then restore cached deps or build from source.
-          # The cache tarball (.cibw-deps-cache/deps.tar.gz) is mounted into the
-          # container via the project bind-mount. On cache hit we just untar it;
-          # on miss we build everything and create the tarball for next time.
-          CIBW_BEFORE_ALL_LINUX: >
-            dnf install -y epel-release &&
-            dnf --enablerepo=powertools install -y
-            openmpi-devel gcc-gfortran gcc-c++ wget git
-            zlib-devel libjpeg-turbo-devel python3-pip &&
-            pip3 install "cmake>=3.28,<4" &&
-            export PATH=/usr/lib64/openmpi/bin:$PATH &&
-            if [ -f /project/.cibw-deps-cache/deps.tar.gz ]; then
-            echo "=== Restoring cached dependencies ===" &&
-            tar xzf /project/.cibw-deps-cache/deps.tar.gz -C / ;
-            else
-            echo "=== Building dependencies from source ===" &&
-            wget -q https://github.com/HDFGroup/hdf5/releases/download/hdf5_1.14.6/hdf5-1.14.6.tar.gz &&
-            tar xzf hdf5-1.14.6.tar.gz &&
-            cd hdf5-1.14.6 &&
-            CC=mpicc CXX=mpicxx ./configure
-            --prefix=/usr/local
-            --enable-parallel
-            --enable-cxx
-            --enable-unsupported
-            --disable-shared
-            --with-pic &&
-            make -j$(nproc) &&
-            make install &&
-            cd .. &&
-            wget -q https://download.osgeo.org/libtiff/tiff-4.6.0.tar.gz &&
-            tar xzf tiff-4.6.0.tar.gz &&
-            cd tiff-4.6.0 &&
-            cmake -S . -B build
-            -DCMAKE_INSTALL_PREFIX=/usr/local
-            -DCMAKE_BUILD_TYPE=Release
-            -DBUILD_SHARED_LIBS=OFF
-            -DCMAKE_POSITION_INDEPENDENT_CODE=ON &&
-            cmake --build build -j$(nproc) &&
-            cmake --install build &&
-            cd .. &&
-            wget -q https://github.com/hypre-space/hypre/archive/v2.31.0.tar.gz &&
-            tar xzf v2.31.0.tar.gz &&
-            cd hypre-2.31.0/src &&
-            ./configure --prefix=/usr/local --with-MPI --enable-shared=no
-            CC=mpicc CXX=mpicxx FC=mpif90
-            CFLAGS="-O2 -fPIC" CXXFLAGS="-O2 -fPIC" FFLAGS="-O2 -fPIC" &&
-            make -j$(nproc) &&
-            make install &&
-            cd ../.. &&
-            git clone --depth 1 --branch 25.03 https://github.com/AMReX-Codes/amrex.git /tmp/amrex &&
-            cmake -S /tmp/amrex -B /tmp/amrex/build
-            -DCMAKE_INSTALL_PREFIX=/usr/local
-            -DCMAKE_BUILD_TYPE=Release
-            -DBUILD_SHARED_LIBS=OFF
-            -DAMReX_MPI=ON
-            -DAMReX_OMP=ON
-            -DAMReX_SPACEDIM=3
-            -DAMReX_FORTRAN=ON
-            -DAMReX_PARTICLES=OFF
-            -DCMAKE_POSITION_INDEPENDENT_CODE=ON &&
-            cmake --build /tmp/amrex/build -j$(nproc) &&
-            cmake --install /tmp/amrex/build &&
-            mkdir -p /project/.cibw-deps-cache &&
-            tar czf /project/.cibw-deps-cache/deps.tar.gz /usr/local ;
-            fi
-
-          # Ensure each Python version has cmake >= 3.28 (needed by AMReX)
-          CIBW_BEFORE_BUILD: pip install "cmake>=3.28,<4"
+          SETUPTOOLS_SCM_PRETEND_VERSION: ${{ steps.version.outputs.version }}
 
-          # Point scikit-build-core to our newly compiled dependencies and MPI compilers.
-          CIBW_ENVIRONMENT_LINUX: >
-            PATH="/usr/lib64/openmpi/bin:$PATH"
-            CMAKE_C_COMPILER="mpicc"
-            CMAKE_CXX_COMPILER="mpicxx"
-            CMAKE_PREFIX_PATH="/usr/local"
-            CMAKE_GENERATOR="Unix Makefiles"
-            SETUPTOOLS_SCM_PRETEND_VERSION="${{ steps.version.outputs.version }}"
-
-          # Vendor libraries into the wheel, but exclude host-specific MPI and
-          # runtime libraries that users must provide on their system.
-          # With all C/C++ deps statically linked, the only external shared libs
-          # left are MPI and OpenMP runtime (libgomp).
-          CIBW_REPAIR_WHEEL_COMMAND_LINUX: >
-            auditwheel repair -w {dest_dir} {wheel}
-            --exclude libmpi.so
-            --exclude libmpi.so.12
-            --exclude libmpi.so.40
-            --exclude libmpi_cxx.so
-            --exclude libmpi_cxx.so.1
-            --exclude libmpi_cxx.so.40
-            --exclude libopen-rte.so
-            --exclude libopen-rte.so.40
-            --exclude libopen-pal.so
-            --exclude libopen-pal.so.40
-            --exclude libmpi_mpifh.so
-            --exclude libmpi_mpifh.so.40
-            --exclude libgomp.so.1
-            --exclude libgfortran.so.5
-            --exclude libquadmath.so.0
+      - name: Verify wheel is pure-Python
+        run: |
+          echo "=== Built artifacts ==="
+          ls -lh dist/
+          # Pure-Python wheels have 'py3-none-any' in the filename
+          if ls dist/*-py3-none-any.whl 1>/dev/null 2>&1; then
+            echo "OK: wheel is platform-independent (pure-Python)"
+          else
+            echo "ERROR: wheel appears to contain compiled code"
+            exit 1
+          fi
 
-      - name: Upload wheels as artifacts
+      - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:
-          name: cibw-wheels
-          path: ./wheelhouse/*.whl
+          name: python-package
+          path: dist/
 
   publish_to_pypi:
-    name: Publish wheels to PyPI
-    needs: build_wheels
+    name: Publish to PyPI
+    needs: build_package
     runs-on: ubuntu-latest
-    # Required for PyPI's Trusted Publishing
     environment: pypi
     permissions:
       id-token: write
 
     steps:
-      - name: Download wheel artifacts
+      - name: Download artifacts
         uses: actions/download-artifact@v4
         with:
-          name: cibw-wheels
+          name: python-package
           path: dist/
 
       - name: Publish to PyPI
diff --git a/.gitignore b/.gitignore
index 1400d174..6b133aa8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,6 +7,10 @@ plt*
 phi*
 Backtrace*
 
+# Python
+__pycache__/
+*.pyc
+
 # CMake
 CMakeCache.txt
 CMakeFiles/
diff --git a/README.md b/README.md
index 6233dada..c2eb9d3f 100644
--- a/README.md
+++ b/README.md
@@ -130,7 +130,19 @@ conda install -c conda-forge openmpi
 pip install openimpala
 ```
 
-For **GPU acceleration** (NVIDIA CUDA), install `openimpala-cuda` from GitHub Releases:
+**GPU acceleration** is automatic. If you have an NVIDIA GPU and
+[CuPy](https://cupy.dev/) installed, OpenImpala detects it at runtime and
+offloads compute kernels to the GPU. No separate package is needed:
+
+```bash
+# Optional: install CuPy for automatic GPU acceleration
+pip install cupy-cuda12x   # match your CUDA toolkit version
+```
+
+If CuPy is not available, OpenImpala falls back to SciPy on the CPU.
+
+**Advanced / HPC:** For clusters needing compiled C++ HYPRE solvers with native
+CUDA support:
 
 ```bash
 pip install openimpala-cuda --find-links \
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 0dce8b31..644d5e61 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -4,19 +4,37 @@
 
 ### Python (recommended)
 
-OpenImpala is available on PyPI as pre-compiled wheels — no compilation required.
+OpenImpala is available on PyPI — no compilation required.
 
 ```bash
-# CPU version (works everywhere)
 pip install openimpala
+```
+
+**GPU acceleration** is automatic. If you have an NVIDIA GPU and
+[CuPy](https://cupy.dev/) installed, OpenImpala detects it at runtime and
+offloads compute kernels to the GPU. No separate package is needed:
+
+```bash
+# Optional: install CuPy for automatic GPU acceleration
+pip install cupy-cuda12x   # match your CUDA toolkit version
+```
+
+If CuPy is not available, OpenImpala falls back to SciPy on the CPU.
 
-# GPU version (requires NVIDIA CUDA runtime)
-# GPU wheels are distributed via GitHub Releases due to their size (~300 MB).
+**Requirements:** Python 3.8+ and NumPy. Optional: `mpi4py` for MPI parallelism.
+
+#### Advanced / HPC: compiled HYPRE backend
+
+For HPC clusters that need the compiled C++ HYPRE solvers, a separate package
+is available:
+
+```bash
 pip install openimpala-cuda --find-links \
   https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6
 ```
 
-**Requirements:** Python 3.8+ and NumPy. Optional: `mpi4py` for MPI parallelism.
+This package bundles AMReX + HYPRE compiled with CUDA and is a drop-in
+replacement for the pure-Python `openimpala` package.
 
 ### Container (HPC)
 
diff --git a/docs/index.rst b/docs/index.rst
index 37893476..d225d82f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -26,12 +26,11 @@ Install from PyPI
 
 .. code-block:: bash
 
-   # CPU version
    pip install openimpala
 
-   # GPU version (NVIDIA CUDA) — distributed via GitHub Releases
-   pip install openimpala-cuda --find-links \
-     https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6
+GPU acceleration is automatic when `CuPy <https://cupy.dev/>`_ is installed.
+For HPC clusters needing compiled HYPRE solvers, see ``openimpala-cuda`` in the
+:doc:`getting-started` guide.
 
 .. toctree::
    :maxdepth: 2
diff --git a/docs/user-guide/gpu.md b/docs/user-guide/gpu.md
index 11908d1f..95274a3b 100644
--- a/docs/user-guide/gpu.md
+++ b/docs/user-guide/gpu.md
@@ -1,22 +1,31 @@
 # GPU Acceleration
 
-OpenImpala supports NVIDIA GPU acceleration via CUDA. All compute kernels,
-flood fills, and solver loops are GPU-compatible.
+OpenImpala automatically accelerates computations on NVIDIA GPUs when
+[CuPy](https://cupy.dev/) is available. No code changes or separate packages
+are required for the default path.
 
-## Installation
+## Quick setup
 
 ```bash
-# GPU wheels are distributed via GitHub Releases due to their size (~300 MB).
-pip install openimpala-cuda --find-links \
-  https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6
+pip install openimpala
+
+# Install CuPy for your CUDA toolkit version
+pip install cupy-cuda12x   # CUDA 12.x
+# or
+pip install cupy-cuda11x   # CUDA 11.x
 ```
 
-The GPU wheel requires:
-- NVIDIA GPU with compute capability 7.0+ (Volta or newer)
-- CUDA runtime libraries (typically provided by the NVIDIA driver)
+When CuPy is detected, OpenImpala offloads solver kernels, flood fills, and
+flux integrations to the GPU. If CuPy is not installed, it falls back to
+SciPy on the CPU transparently.
+
+## Verifying GPU support
 
-The `openimpala-cuda` package is a drop-in replacement for `openimpala` — the
-Python API is identical.
+```python
+import openimpala as oi
+
+print(oi.backend())  # "cupy" if GPU is active, "scipy" otherwise
+```
 
 ## Usage
 
@@ -32,14 +41,11 @@ with oi.Session():
     result = oi.tortuosity(data, phase=1, direction="z")
 ```
 
-When a GPU is available, AMReX automatically offloads `ParallelFor` kernels
-and HYPRE solver operations to the device.
-
 ## What runs on GPU
 
 - Phase data lookup and coefficient field construction
-- Flood-fill percolation checks (atomic scatter-add)
-- HYPRE matrix assembly and linear solves
+- Flood-fill percolation checks
+- Linear solver iterations
 - Solution extraction and flux integration
 - Through-thickness profile computation
 - Connected components labelling
@@ -48,6 +54,26 @@ and HYPRE solver operations to the device.
 
 - GPU acceleration provides the most benefit for large domains (>128^3)
 - For small problems, CPU may be faster due to kernel launch overhead
-- The MLMG solver currently runs on CPU only; use HYPRE solvers for GPU
-- Data transfer between host and device is minimised by keeping AMReX
-  data structures on the device throughout the solve
+- Data stays on the device throughout the solve to minimise transfers
+
+## Advanced / HPC: openimpala-cuda
+
+For HPC clusters that need the compiled C++ HYPRE linear solvers with native
+CUDA support (AMReX + HYPRE compiled with CUDA), a separate package is
+available:
+
+```bash
+pip install openimpala-cuda --find-links \
+  https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6
+```
+
+The `openimpala-cuda` package is a drop-in replacement for `openimpala` and
+provides:
+
+- HYPRE matrix assembly and linear solves on the GPU
+- AMReX `ParallelFor` kernel offloading
+- MPI + CUDA multi-GPU support
+
+**Requirements for openimpala-cuda:**
+- NVIDIA GPU with compute capability 7.0+ (Volta or newer)
+- CUDA runtime libraries (typically provided by the NVIDIA driver)
diff --git a/notebooks/profiling_and_tuning.ipynb b/notebooks/profiling_and_tuning.ipynb
index cadb1bd1..cb73856c 100644
--- a/notebooks/profiling_and_tuning.ipynb
+++ b/notebooks/profiling_and_tuning.ipynb
@@ -56,13 +56,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Install system MPI and Python packages\n",
-    "!apt-get install -y libopenmpi-dev > /dev/null 2>&1\n",
-    "!pip install openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 nvidia-cuda-runtime-cu12 nvidia-curand-cu12 nvidia-cusparse-cu12 nvidia-cublas-cu12[all] > /dev/null 2>&1\n",
-    "!pip install porespy > /dev/null 2>&1\n",
-    "print(\"Dependencies installed.\")"
-   ]
+   "source": "# Install system MPI and Python packages\n!apt-get install -y libopenmpi-dev > /dev/null 2>&1\n!pip install openimpala porespy > /dev/null 2>&1\nprint(\"Dependencies installed.\")"
   },
   {
    "cell_type": "code",
diff --git a/paper.md b/paper.md
index 4ba48da7..947135ee 100644
--- a/paper.md
+++ b/paper.md
@@ -83,7 +83,7 @@ with oi.Session():
     print(f"Tortuosity: {result.tortuosity:.4f}")
 ```
 
-Pre-compiled CPU wheels are distributed via PyPI (`pip install openimpala`) and CUDA GPU wheels via GitHub Releases (`pip install openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6`), both built using `cibuildwheel` with statically linked dependencies. Interactive tutorial notebooks are provided for Google Colab, covering workflows from basic tortuosity computation to digital twin parameterisation with PyBaMM. API reference documentation, installation guides, and interactive tutorial notebooks are available at https://base-laboratory.github.io/OpenImpala/
+A pure-Python package is distributed via PyPI (`pip install openimpala`) with automatic GPU acceleration via CuPy when available, and compiled CUDA GPU wheels with HYPRE solvers are available via GitHub Releases (`pip install openimpala-cuda`) for HPC deployments. Interactive tutorial notebooks are provided for Google Colab, covering workflows from basic tortuosity computation to digital twin parameterisation with PyBaMM. API reference documentation, installation guides, and interactive tutorial notebooks are available at https://base-laboratory.github.io/OpenImpala/
 
 ## Testing and Quality Assurance
 
diff --git a/pyproject.toml b/pyproject.toml
index ba469091..324c4bc6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,7 @@ license = {text = "BSD-3-Clause"}
 requires-python = ">=3.8"
 dependencies = [
     "numpy",
+    "scipy>=1.7",
 ]
 
 [project.optional-dependencies]
@@ -25,6 +26,7 @@ viz = ["yt", "matplotlib"]
 test = [
     "pytest>=7",
     "numpy",
+    "scipy>=1.7",
 ]
 docs = [
     "sphinx>=7.0",
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 9ef815ff..c168e8cf 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -84,6 +84,7 @@ install(FILES
     openimpala/exceptions.py
     openimpala/facade.py
     openimpala/cli.py
+    openimpala/_solver.py
     DESTINATION openimpala
     COMPONENT python
 )
diff --git a/python/openimpala/__init__.py b/python/openimpala/__init__.py
index 81c60340..229d7767 100644
--- a/python/openimpala/__init__.py
+++ b/python/openimpala/__init__.py
@@ -69,7 +69,14 @@ def __getattr__(name):
     }
 
     if name in _CORE_ATTRS:
-        _core = _load_core()
+        try:
+            _core = _load_core()
+        except ImportError:
+            raise AttributeError(
+                f"'openimpala.{name}' requires the compiled C++ backend (_core). "
+                f"In pure-Python mode, use the high-level API: "
+                f"openimpala.volume_fraction(), tortuosity(), etc."
+            )
         if name in ("core", "_core"):
             return _core
         return getattr(_core, name)
diff --git a/python/openimpala/_solver.py b/python/openimpala/_solver.py
new file mode 100644
index 00000000..1a175101
--- /dev/null
+++ b/python/openimpala/_solver.py
@@ -0,0 +1,465 @@
+"""Pure-Python tortuosity solver using sparse matrices (SciPy / CuPy).
+
+This module provides a drop-in replacement for the C++ HYPRE-based solver,
+enabling ``pip install openimpala`` to work without any compiled extensions.
+GPU acceleration is available automatically when CuPy is installed.
+
+The algorithm matches the C++ implementation exactly:
+  1. Flood-fill percolation check (6-connected BFS)
+  2. 7-point stencil Laplacian with harmonic-mean face coefficients
+  3. Dirichlet BCs at inlet/outlet, zero-flux Neumann on sides
+  4. Sparse CG solve (CuPy on GPU, SciPy on CPU)
+  5. Flux integration for D_eff, then tau = active_vf / D_eff
+"""
+
+from __future__ import annotations
+
+import numpy as np
+
+# ---------------------------------------------------------------------------
+# Backend detection: prefer CuPy (GPU), fall back to SciPy (CPU)
+# ---------------------------------------------------------------------------
+
+_USE_CUPY = False
+try:
+    import cupy as cp
+    import cupyx.scipy.sparse as cusp
+    import cupyx.scipy.sparse.linalg as cusp_linalg
+
+    # Verify a GPU is actually available
+    cp.cuda.runtime.getDeviceCount()
+    _USE_CUPY = True
+except Exception:
+    cp = None
+
+import scipy.sparse as sp
+import scipy.sparse.linalg as sp_linalg
+import scipy.ndimage as ndimage
+
+
+def _xp():
+    """Return the active array module (cupy or numpy)."""
+    return cp if _USE_CUPY else np
+
+
+def backend_name() -> str:
+    """Return a human-readable name for the active compute backend."""
+    if _USE_CUPY:
+        dev = cp.cuda.runtime.getDeviceProperties(cp.cuda.runtime.getDevice())
+        name = dev["name"].decode() if isinstance(dev["name"], bytes) else dev["name"]
+        return f"CuPy (GPU: {name})"
+    return "SciPy (CPU)"
+
+
+# ---------------------------------------------------------------------------
+# Volume fraction
+# ---------------------------------------------------------------------------
+
+def volume_fraction(data: np.ndarray, phase: int = 0) -> tuple[int, int, float]:
+    """Return (phase_count, total_count, fraction) for *phase*."""
+    mask = data == phase
+    pc = int(np.count_nonzero(mask))
+    tc = int(data.size)
+    return pc, tc, pc / tc if tc > 0 else 0.0
+
+
+# ---------------------------------------------------------------------------
+# Percolation check (6-connected flood fill)
+# ---------------------------------------------------------------------------
+
+def percolation_check(
+    data: np.ndarray,
+    phase: int = 0,
+    direction: int = 0,
+) -> tuple[bool, float, np.ndarray]:
+    """Check if *phase* percolates in *direction* (0=x, 1=y, 2=z).
+
+    Returns (percolates, active_vf, active_mask).
+    The active_mask marks cells reachable from BOTH inlet and outlet.
+    """
+    phase_mask = data == phase
+
+    # Label connected components using 6-connectivity (faces only)
+    struct = ndimage.generate_binary_structure(3, 1)  # 6-connected
+    labels, num_features = ndimage.label(phase_mask, structure=struct)
+
+    if num_features == 0:
+        return False, 0.0, np.zeros_like(data, dtype=np.int32)
+
+    # Find labels touching inlet face (first slice along direction)
+    # and outlet face (last slice along direction)
+    inlet_slice = [slice(None)] * 3
+    inlet_slice[direction] = 0
+    outlet_slice = [slice(None)] * 3
+    outlet_slice[direction] = data.shape[direction] - 1
+
+    inlet_labels = set(labels[tuple(inlet_slice)].ravel()) - {0}
+    outlet_labels = set(labels[tuple(outlet_slice)].ravel()) - {0}
+
+    # Active labels are those touching both inlet and outlet
+    active_labels = inlet_labels & outlet_labels
+
+    if not active_labels:
+        return False, 0.0, np.zeros_like(data, dtype=np.int32)
+
+    # Build active mask
+    active_mask = np.isin(labels, list(active_labels)).astype(np.int32)
+    active_vf = float(np.count_nonzero(active_mask)) / float(data.size)
+
+    return True, active_vf, active_mask
+
+
+# ---------------------------------------------------------------------------
+# Sparse matrix assembly (7-point stencil, harmonic-mean face coefficients)
+# ---------------------------------------------------------------------------
+
+def _build_laplacian(
+    active_mask: np.ndarray,
+    diff_coeff: np.ndarray,
+    direction: int,
+    dx: tuple[float, float, float],
+    vlo: float = 0.0,
+    vhi: float = 1.0,
+):
+    """Assemble the sparse linear system Ax = b.
+
+    Parameters
+    ----------
+    active_mask : (Nz, Ny, Nx) int32 — 1 for active cells, 0 for inactive
+    diff_coeff  : (Nz, Ny, Nx) float  — diffusion coefficient per cell
+    direction   : 0=x, 1=y, 2=z flow direction
+    dx          : grid spacing (dz, dy, dx) matching array axis order
+    vlo, vhi    : Dirichlet BC values at inlet/outlet
+
+    Returns
+    -------
+    A : sparse CSR matrix (N x N)
+    b : RHS vector (N,)
+    """
+    shape = active_mask.shape  # (Nz, Ny, Nx)
+    N = int(np.prod(shape))
+
+    # Flat index helper
+    def flat(i, j, k):
+        return i * shape[1] * shape[2] + j * shape[2] + k
+
+    # Pre-flatten arrays for fast indexing
+    mask_flat = active_mask.ravel()
+    diff_flat = diff_coeff.ravel()
+
+    # Strides for each axis in the flattened array
+    strides = [shape[1] * shape[2], shape[2], 1]
+
+    # Inverse squared grid spacing for each axis
+    dxinv2 = [1.0 / (dx[d] * dx[d]) for d in range(3)]
+
+    # Neighbor offsets: (axis, delta, stride)
+    neighbors = []
+    for axis in range(3):
+        neighbors.append((axis, -1, -strides[axis]))
+        neighbors.append((axis, +1, +strides[axis]))
+
+    # Build COO data
+    rows = []
+    cols = []
+    vals = []
+    rhs = np.zeros(N, dtype=np.float64)
+
+    # Vectorized assembly using NumPy
+    indices = np.arange(N)
+    i3d = np.unravel_index(indices, shape)  # (iz, iy, ix) arrays
+
+    # For each cell: if inactive, diagonal=1, rhs=0
+    # If active but on Dirichlet boundary: diagonal=1, rhs=bc_value
+    # If active interior: build 7-point stencil
+
+    is_active = mask_flat == 1
+
+    # Identify Dirichlet boundary cells (inlet/outlet faces)
+    is_inlet = np.zeros(N, dtype=bool)
+    is_outlet = np.zeros(N, dtype=bool)
+    is_inlet[is_active] = i3d[direction][is_active] == 0
+    is_outlet[is_active] = i3d[direction][is_active] == shape[direction] - 1
+
+    is_dirichlet = is_inlet | is_outlet
+    is_interior = is_active & ~is_dirichlet
+
+    # --- Inactive cells: A_ii = 1, rhs = 0 ---
+    inactive_idx = indices[~is_active]
+    rows.append(inactive_idx)
+    cols.append(inactive_idx)
+    vals.append(np.ones(len(inactive_idx)))
+
+    # --- Dirichlet cells: A_ii = 1, rhs = bc_value ---
+    dirichlet_idx = indices[is_dirichlet]
+    rows.append(dirichlet_idx)
+    cols.append(dirichlet_idx)
+    vals.append(np.ones(len(dirichlet_idx)))
+    rhs[is_inlet] = vlo
+    rhs[is_outlet] = vhi
+
+    # --- Interior active cells: 7-point stencil ---
+    interior_idx = indices[is_interior]
+    if len(interior_idx) > 0:
+        diag = np.zeros(len(interior_idx), dtype=np.float64)
+        D_center = diff_flat[interior_idx]
+
+        for axis, delta, stride in neighbors:
+            nbr_idx = interior_idx + stride
+            coord = i3d[axis][interior_idx] + delta
+
+            # Check bounds
+            in_bounds = (coord >= 0) & (coord < shape[axis])
+
+            # For out-of-bounds neighbors (domain boundary), zero-flux Neumann
+            # means we simply don't add a coupling term.
+            valid = in_bounds.copy()
+            valid_nbr = nbr_idx[valid]
+
+            # Check neighbor is active
+            nbr_active = np.zeros(len(interior_idx), dtype=bool)
+            nbr_active[valid] = mask_flat[valid_nbr] == 1
+
+            # Compute harmonic mean face coefficient for valid+active neighbors
+            coupled = nbr_active
+            coupled_idx = interior_idx[coupled]
+            coupled_nbr = interior_idx[coupled] + stride
+            D_c = D_center[coupled]
+            D_n = diff_flat[coupled_nbr]
+
+            denom = D_c + D_n
+            # Harmonic mean: 2*D_c*D_n / (D_c + D_n), zero if denom==0
+            D_face = np.where(denom > 0, 2.0 * D_c * D_n / denom, 0.0)
+
+            coeff = -D_face * dxinv2[axis]
+
+            rows.append(coupled_idx)
+            cols.append(coupled_nbr)
+            vals.append(coeff)
+
+            # Accumulate diagonal (negative of off-diagonal sum)
+            diag[coupled] -= coeff
+
+        rows.append(interior_idx)
+        cols.append(interior_idx)
+        vals.append(diag)
+
+    # Assemble sparse matrix
+    all_rows = np.concatenate(rows)
+    all_cols = np.concatenate(cols)
+    all_vals = np.concatenate(vals)
+
+    A = sp.coo_matrix((all_vals, (all_rows, all_cols)), shape=(N, N)).tocsr()
+
+    return A, rhs
+
+
+# ---------------------------------------------------------------------------
+# Flux computation
+# ---------------------------------------------------------------------------
+
+def _compute_flux(
+    solution: np.ndarray,
+    active_mask: np.ndarray,
+    diff_coeff: np.ndarray,
+    direction: int,
+    dx: tuple[float, float, float],
+) -> tuple[float, float, float]:
+    """Compute inlet flux, outlet flux, and D_eff from the solved potential.
+
+    Returns (flux_in, flux_out, D_eff).
+    """
+    shape = active_mask.shape  # (Nz, Ny, Nx)
+    sol = solution.reshape(shape)
+
+    # Cross-sectional area perpendicular to flow direction
+    perp_axes = [a for a in range(3) if a != direction]
+    cross_area = dx[perp_axes[0]] * dx[perp_axes[1]]
+
+    # Domain length in flow direction
+    L = shape[direction] * dx[direction]
+
+    # --- Compute flux through each interior plane perpendicular to flow ---
+    # For plane between cell i and i+1:
+    #   flux = sum over plane of: -D_face * (phi[i+1] - phi[i]) / dx * cell_area
+    n_planes = shape[direction] - 1
+    plane_fluxes = np.zeros(n_planes)
+
+    for p in range(n_planes):
+        # Build slices for left and right cells
+        left_sl = [slice(None)] * 3
+        left_sl[direction] = p
+        right_sl = [slice(None)] * 3
+        right_sl[direction] = p + 1
+
+        left_sl = tuple(left_sl)
+        right_sl = tuple(right_sl)
+
+        mask_l = active_mask[left_sl]
+        mask_r = active_mask[right_sl]
+        both_active = (mask_l == 1) & (mask_r == 1)
+
+        if not np.any(both_active):
+            continue
+
+        D_l = diff_coeff[left_sl][both_active]
+        D_r = diff_coeff[right_sl][both_active]
+        denom = D_l + D_r
+        D_face = np.where(denom > 0, 2.0 * D_l * D_r / denom, 0.0)
+
+        grad = (sol[right_sl][both_active] - sol[left_sl][both_active]) / dx[direction]
+        plane_fluxes[p] = np.sum(-D_face * grad * cross_area)
+
+    # Flux in = first plane, flux out = last plane
+    flux_in = plane_fluxes[0]
+    flux_out = plane_fluxes[-1]
+
+    # Average flux magnitude from all interior planes
+    nonzero_planes = plane_fluxes[plane_fluxes != 0.0]
+    if len(nonzero_planes) > 0:
+        avg_flux = float(np.mean(np.abs(nonzero_planes)))
+    else:
+        avg_flux = 0.5 * (abs(flux_in) + abs(flux_out))
+
+    # Total cross-section area of the domain
+    total_cross_area = 1.0
+    for a in perp_axes:
+        total_cross_area *= shape[a] * dx[a]
+
+    # Imposed gradient
+    vlo, vhi = 0.0, 1.0
+    grad_imposed = abs(vhi - vlo) / L
+
+    # D_eff = avg_flux / (A_total * grad_imposed)
+    if total_cross_area * grad_imposed > 0:
+        D_eff = avg_flux / (total_cross_area * grad_imposed)
+    else:
+        D_eff = 0.0
+
+    return float(flux_in), float(flux_out), float(D_eff)
+
+
+# ---------------------------------------------------------------------------
+# Main solver entry point
+# ---------------------------------------------------------------------------
+
+def solve_tortuosity(
+    data: np.ndarray,
+    phase: int = 0,
+    direction: int = 0,
+    tol: float = 1e-9,
+    maxiter: int = 2000,
+) -> dict:
+    """Compute tortuosity of *phase* in *direction*.
+
+    Parameters
+    ----------
+    data : (Nz, Ny, Nx) int32 array of phase IDs
+    phase : target phase ID
+    direction : 0=x, 1=y, 2=z  (axis index in the NumPy array)
+    tol : solver tolerance
+    maxiter : max solver iterations
+
+    Returns
+    -------
+    dict with keys: tortuosity, solver_converged, iterations, residual_norm,
+                    flux_in, flux_out, active_volume_fraction, backend
+    """
+    data = np.ascontiguousarray(data, dtype=np.int32)
+    if data.ndim != 3:
+        raise ValueError(f"Expected 3-D array, got shape {data.shape}")
+
+    shape = data.shape  # (Nz, Ny, Nx)
+
+    # --- Percolation check ---
+    percolates, active_vf, active_mask = percolation_check(data, phase, direction)
+    if not percolates:
+        from .exceptions import PercolationError
+        dir_names = {0: "x", 1: "y", 2: "z"}
+        raise PercolationError(
+            f"Phase {phase} does not percolate in the {dir_names[direction]} direction. "
+            f"The solver cannot converge when the conducting phase is "
+            f"disconnected between the inlet and outlet faces."
+        )
+
+    # --- Build diffusion coefficient field ---
+    # All active cells get D=1.0, inactive get D=0.0
+    diff_coeff = np.where(active_mask == 1, 1.0, 0.0)
+
+    # --- Grid spacing (uniform unit grid) ---
+    dx = (1.0, 1.0, 1.0)
+
+    # --- Assemble sparse system ---
+    A_sp, rhs = _build_laplacian(active_mask, diff_coeff, direction, dx)
+
+    # --- Initial guess: linear gradient in flow direction ---
+    x0 = np.zeros(A_sp.shape[0], dtype=np.float64)
+    coords = np.arange(shape[direction], dtype=np.float64)
+    grad_init = coords / max(shape[direction] - 1, 1)
+
+    # Broadcast linear gradient to 3D then flatten
+    bcast_shape = [1, 1, 1]
+    bcast_shape[direction] = shape[direction]
+    tile_shape = list(shape)
+    tile_shape[direction] = 1
+    x0_3d = np.tile(grad_init.reshape(bcast_shape), tile_shape)
+    x0 = x0_3d.ravel()
+    # Zero out inactive cells
+    x0[active_mask.ravel() == 0] = 0.0
+
+    # --- Solve ---
+    if _USE_CUPY:
+        A_gpu = cusp.csr_matrix(A_sp)
+        rhs_gpu = cp.asarray(rhs)
+        x0_gpu = cp.asarray(x0)
+
+        # CuPy CG solve
+        solution_gpu, info = cusp_linalg.cg(A_gpu, rhs_gpu, x0=x0_gpu,
+                                             tol=tol, maxiter=maxiter)
+        solution = cp.asnumpy(solution_gpu)
+        converged = info == 0
+        # CuPy doesn't return iteration count directly; estimate from info
+        iterations = maxiter if not converged else -1  # unknown exact count
+        # Compute residual
+        res = cp.asnumpy(rhs_gpu - A_gpu @ solution_gpu)
+        residual_norm = float(np.linalg.norm(res) / max(np.linalg.norm(rhs), 1e-30))
+    else:
+        # SciPy CG solve with callback to count iterations
+        iter_count = [0]
+
+        def _callback(xk):
+            iter_count[0] += 1
+
+        # scipy >= 1.12 renamed tol → rtol; older versions use tol.
+        try:
+            solution, info = sp_linalg.cg(A_sp, rhs, x0=x0, rtol=tol,
+                                           maxiter=maxiter, callback=_callback)
+        except TypeError:
+            solution, info = sp_linalg.cg(A_sp, rhs, x0=x0, tol=tol,
+                                           maxiter=maxiter, callback=_callback)
+        converged = info == 0
+        iterations = iter_count[0]
+        res = rhs - A_sp @ solution
+        residual_norm = float(np.linalg.norm(res) / max(np.linalg.norm(rhs), 1e-30))
+
+    # --- Compute flux and D_eff ---
+    flux_in, flux_out, D_eff = _compute_flux(
+        solution, active_mask, diff_coeff, direction, dx,
+    )
+
+    # --- Tortuosity: tau = active_vf / D_eff ---
+    if D_eff > 0:
+        tau = active_vf / D_eff
+    else:
+        tau = float("inf")
+
+    return {
+        "tortuosity": tau,
+        "solver_converged": converged,
+        "iterations": iterations,
+        "residual_norm": residual_norm,
+        "flux_in": flux_in,
+        "flux_out": flux_out,
+        "active_volume_fraction": active_vf,
+        "backend": backend_name(),
+    }
diff --git a/python/openimpala/facade.py b/python/openimpala/facade.py
index 8846c9b7..b8392833 100644
--- a/python/openimpala/facade.py
+++ b/python/openimpala/facade.py
@@ -2,6 +2,9 @@
 
 These functions accept plain ``numpy.ndarray`` inputs and return rich
 Python dataclasses, hiding all AMReX boilerplate from general users.
+
+When the compiled C++ backend (_core) is unavailable, functions
+transparently fall back to the pure-Python solver (SciPy/CuPy).
 """
 
 from __future__ import annotations
@@ -12,6 +15,7 @@
 import numpy as np
 
 from .exceptions import ConvergenceError, PercolationError
+from .session import Session
 
 
 # ---------------------------------------------------------------------------
@@ -76,15 +80,44 @@ def _repr_html_(self) -> str:
 
 
 # ---------------------------------------------------------------------------
-# Helpers
+# Backend detection
 # ---------------------------------------------------------------------------
 
+def _is_pure_python() -> bool:
+    """Return True if we should use the pure-Python solver backend."""
+    return Session._pure_python
+
+
 def _get_core():
     """Import and return the _core C extension (lazy, cached by Python)."""
     import importlib
     return importlib.import_module("openimpala._core")
 
 
+# ---------------------------------------------------------------------------
+# Direction / solver helpers
+# ---------------------------------------------------------------------------
+
+_DIR_MAP = {"x": 0, "y": 1, "z": 2}
+_DIR_NAMES = {0: "x", 1: "y", 2: "z"}
+
+
+def _parse_direction_int(d) -> int:
+    """Parse a direction to an integer (0=x, 1=y, 2=z)."""
+    if isinstance(d, int):
+        return d
+    if isinstance(d, str):
+        key = d.strip().lower()
+        if key in _DIR_MAP:
+            return _DIR_MAP[key]
+    # Might be a _core.Direction enum
+    try:
+        return {"X": 0, "Y": 1, "Z": 2}[str(d).split(".")[-1]]
+    except (KeyError, AttributeError):
+        pass
+    raise ValueError(f"Unknown direction '{d}'. Use 'x', 'y', or 'z'.")
+
+
 def _parse_direction(d):
     """Parse a direction string ('x', 'y', 'z') or Direction enum value."""
     _core = _get_core()
@@ -98,6 +131,15 @@ def _parse_direction(d):
 
 
 def _ensure_initialized():
+    """Check that a Session is active."""
+    if _is_pure_python():
+        if Session._depth == 0:
+            raise RuntimeError(
+                "OpenImpala is not initialized! Please wrap your code in a session block:\n\n"
+                "with openimpala.Session():\n"
+                "    openimpala.volume_fraction(...)"
+            )
+        return
     _core = _get_core()
     if not _core.amrex_initialized():
         raise RuntimeError(
@@ -203,6 +245,13 @@ def volume_fraction(
     VolumeFractionResult
     """
     _ensure_initialized()
+
+    if _is_pure_python():
+        from . import _solver
+        data = np.ascontiguousarray(data, dtype=np.int32)
+        pc, tc, frac = _solver.volume_fraction(data, phase)
+        return VolumeFractionResult(phase_count=pc, total_count=tc, fraction=frac)
+
     _core = _get_core()
 
     if isinstance(data, np.ndarray):
@@ -244,6 +293,18 @@ def percolation_check(
     PercolationResult
     """
     _ensure_initialized()
+
+    if _is_pure_python():
+        from . import _solver
+        data = np.ascontiguousarray(data, dtype=np.int32)
+        d = _parse_direction_int(direction)
+        percolates, active_vf, _ = _solver.percolation_check(data, phase, d)
+        return PercolationResult(
+            percolates=percolates,
+            active_volume_fraction=active_vf,
+            direction=_DIR_NAMES[d],
+        )
+
     _core = _get_core()
     d = _parse_direction(direction)
 
@@ -272,7 +333,7 @@ def tortuosity(
     results_path: str = ".",
     verbose: int = 0,
 ) -> TortuosityResult:
-    """Compute the tortuosity of *phase* in *direction* using the HYPRE solver.
+    """Compute the tortuosity of *phase* in *direction*.
 
     Parameters
     ----------
@@ -283,12 +344,12 @@ def tortuosity(
     direction : str or Direction
         Flow direction ('x', 'y', 'z').
     solver : str or SolverType
-        Solver algorithm.  ``'auto'`` (default) selects HYPRE PCG, which is
-        optimal for the symmetric positive-definite single-phase diffusion
-        problem.  ``'mlmg'`` uses AMReX's native matrix-free geometric
-        multigrid (lower memory, no matrix assembly).  Other HYPRE options:
-        ``'flexgmres'``, ``'gmres'``, ``'bicgstab'``, ``'pcg'``, ``'smg'``,
-        ``'pfmg'``, ``'jacobi'``.
+        Solver algorithm.  ``'auto'`` (default) selects the best available
+        solver: CuPy CG on GPU or SciPy CG on CPU when the C++ backend is
+        not installed, HYPRE PCG otherwise.  ``'mlmg'`` uses AMReX's native
+        matrix-free geometric multigrid (requires C++ backend).  Other HYPRE
+        options: ``'flexgmres'``, ``'gmres'``, ``'bicgstab'``, ``'pcg'``,
+        ``'smg'``, ``'pfmg'``, ``'jacobi'``.
     max_grid_size : int or str
         AMReX box decomposition size.  ``'auto'`` picks a value based on the
         domain dimensions.
@@ -305,6 +366,28 @@ def tortuosity(
         If the phase does not percolate in the given direction.
     """
     _ensure_initialized()
+
+    if _is_pure_python():
+        from . import _solver
+        data = np.ascontiguousarray(data, dtype=np.int32)
+        d = _parse_direction_int(direction)
+        result = _solver.solve_tortuosity(data, phase, d)
+        if not result["solver_converged"]:
+            raise ConvergenceError(
+                f"Solver did not converge after {result['iterations']} "
+                f"iterations (residual={result['residual_norm']:.2e})"
+            )
+        return TortuosityResult(
+            tortuosity=result["tortuosity"],
+            solver_converged=result["solver_converged"],
+            iterations=result["iterations"],
+            residual_norm=result["residual_norm"],
+            flux_in=result["flux_in"],
+            flux_out=result["flux_out"],
+            active_volume_fraction=result["active_volume_fraction"],
+        )
+
+    # --- C++ backend path (unchanged) ---
     _core = _get_core()
     d = _parse_direction(direction)
     st = _parse_solver(solver)
@@ -438,6 +521,14 @@ def read_image(
         The reader object and the VoxelImage handle.
     """
     _ensure_initialized()
+
+    if _is_pure_python():
+        raise NotImplementedError(
+            "read_image() requires the compiled C++ backend (_core). "
+            "In pure-Python mode, load your image with tifffile/h5py/numpy "
+            "and pass the array directly to volume_fraction() or tortuosity()."
+        )
+
     _core = _get_core()
 
     if raw_data_type is None:
diff --git a/python/openimpala/session.py b/python/openimpala/session.py
index 4a37b071..9106a128 100644
--- a/python/openimpala/session.py
+++ b/python/openimpala/session.py
@@ -3,6 +3,15 @@
 from __future__ import annotations
 
 
+def _has_core() -> bool:
+    """Return True if the compiled C++ backend is importable."""
+    try:
+        import openimpala._core  # noqa: F401
+        return True
+    except ImportError:
+        return False
+
+
 class Session:
     """Ensures AMReX ``initialize()`` / ``finalize()`` are called exactly once
     and in the correct order, even when *mpi4py* is also in use.
@@ -16,9 +25,13 @@ class Session:
 
     The session is re-entrant: nested ``with Session()`` blocks share the same
     underlying AMReX state; only the outermost block triggers init/finalize.
+
+    When the compiled C++ backend (``_core``) is not available, the session
+    activates the pure-Python solver backend (SciPy / CuPy) automatically.
     """
 
     _depth: int = 0  # nesting counter (class-level)
+    _pure_python: bool = False  # True when _core is unavailable
 
     def __enter__(self) -> "Session":
         if Session._depth == 0:
@@ -35,23 +48,48 @@ def __exit__(self, exc_type, exc_val, exc_tb) -> None:  # noqa: ANN001
     # ------------------------------------------------------------------
     @staticmethod
     def _do_initialize() -> None:
-        # Import mpi4py first so MPI_Init happens before AMReX touches MPI.
-        try:
-            from mpi4py import MPI  # noqa: F401
-        except ImportError:
-            pass
-
-        # Initialise AMReX natively via our C++ bindings — no pyamrex needed.
-        from openimpala._core import init_amrex
-
-        init_amrex()
-
-        # NOTE: HYPRE initialisation is handled automatically by the C++
-        # solver constructors (TortuosityHypre, EffectiveDiffusivityHypre)
-        # via std::call_once, so no Python-side HYPRE_Init() is needed.
+        if _has_core():
+            # Import mpi4py first so MPI_Init happens before AMReX touches MPI.
+            try:
+                from mpi4py import MPI  # noqa: F401
+            except ImportError:
+                pass
+
+            # Initialise AMReX natively via our C++ bindings — no pyamrex needed.
+            from openimpala._core import init_amrex
+
+            init_amrex()
+            Session._pure_python = False
+
+            # NOTE: HYPRE initialisation is handled automatically by the C++
+            # solver constructors (TortuosityHypre, EffectiveDiffusivityHypre)
+            # via std::call_once, so no Python-side HYPRE_Init() is needed.
+        else:
+            # No compiled backend — use pure-Python solver path.
+            Session._pure_python = True
+
+            # Verify we have at least scipy (required for the pure-Python solver)
+            try:
+                import scipy  # noqa: F401
+            except ImportError:
+                raise ImportError(
+                    "Neither the compiled C++ backend nor SciPy is available. "
+                    "Install scipy (pip install scipy) for the pure-Python solver, "
+                    "or install the full package (pip install openimpala-cuda) for "
+                    "the C++ backend."
+                )
+
+            from . import _solver
+            backend = _solver.backend_name()
+            import sys
+            print(f"OpenImpala: using pure-Python solver backend ({backend})",
+                  file=sys.stderr)
 
     @staticmethod
     def _do_finalize() -> None:
+        if Session._pure_python:
+            return  # Nothing to tear down for pure-Python backend
+
         import gc
 
         from openimpala._core import amrex_initialized, finalize_amrex
diff --git a/python/pyproject.toml b/python/pyproject.toml
new file mode 100644
index 00000000..b24fd0b3
--- /dev/null
+++ b/python/pyproject.toml
@@ -0,0 +1,36 @@
+# Pure-Python build configuration for the openimpala package.
+#
+# This builds a wheel containing only the Python files (no compiled _core.so).
+# The pure-Python solver backend (SciPy/CuPy) provides full functionality
+# without any C++ dependencies.
+#
+# The root pyproject.toml (scikit-build-core) is used for compiled wheels
+# (openimpala-cuda) that include the C++ HYPRE/AMReX backend.
+
+[build-system]
+requires = ["setuptools>=64", "setuptools-scm>=8"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "openimpala"
+dynamic = ["version"]
+description = "Transport-property computation on 3-D voxel images — tortuosity, diffusivity, and more"
+readme = {text = "Transport-property computation on 3-D voxel images — tortuosity, diffusivity, and more.", content-type = "text/plain"}
+license = "BSD-3-Clause"
+requires-python = ">=3.8"
+dependencies = [
+    "numpy",
+    "scipy>=1.7",
+]
+
+[project.scripts]
+openimpala = "openimpala.cli:main"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["openimpala*"]
+
+[tool.setuptools_scm]
+root = ".."
+local_scheme = "no-local-version"
+fallback_version = "4.0.2"
diff --git a/tutorials/01_hello_openimpala.ipynb b/tutorials/01_hello_openimpala.ipynb
index 9d96158a..9968cad4 100644
--- a/tutorials/01_hello_openimpala.ipynb
+++ b/tutorials/01_hello_openimpala.ipynb
@@ -3,26 +3,26 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/01_hello_openimpala.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 1 of 7: Getting Started with OpenImpala\n\n*OpenImpala Tutorial Series \u2014 From first solve to HPC deployment*\n\n---\n\nOpenImpala computes effective transport properties (diffusivity, tortuosity, conductivity) directly on 3D voxel images of porous microstructures. It uses finite differences on the voxel grid \u2014 no mesh generation required \u2014 and is parallelised via MPI through AMReX and HYPRE.\n\nThis tutorial introduces the core workflow using a synthetic microstructure generated with [PoreSpy](https://porespy.org/), a widely used library for porous media analysis. No external data files are needed.\n\n**What you will learn:**\n1. Generate a random 3D porous microstructure with PoreSpy.\n2. Calculate volume fraction, check percolation, and compute tortuosity.\n3. Run a parametric sweep of porosity vs. tortuosity.\n\n**Prerequisites:** None \u2014 this is the starting point for the series."
+   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/01_hello_openimpala.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 1 of 7: Getting Started with OpenImpala\n\n*OpenImpala Tutorial Series — From first solve to HPC deployment*\n\n---\n\nOpenImpala computes effective transport properties (diffusivity, tortuosity, conductivity) directly on 3D voxel images of porous microstructures. It uses finite differences on the voxel grid — no mesh generation required — and is parallelised via MPI through AMReX and HYPRE.\n\nThis tutorial introduces the core workflow using a synthetic microstructure generated with [PoreSpy](https://porespy.org/), a widely used library for porous media analysis. No external data files are needed.\n\n**What you will learn:**\n1. Generate a random 3D porous microstructure with PoreSpy.\n2. Calculate volume fraction, check percolation, and compute tortuosity.\n3. Run a parametric sweep of porosity vs. tortuosity.\n\n**Prerequisites:** None — this is the starting point for the series."
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Install OpenImpala, PoreSpy for structure generation, and Matplotlib for plots.\n!pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 nvidia-cuda-runtime-cu12 nvidia-curand-cu12 nvidia-cusparse-cu12 nvidia-cublas-cu12 porespy matplotlib"
+   "source": "# Install OpenImpala, PoreSpy for structure generation, and Matplotlib for plots.\n!pip install -q openimpala porespy matplotlib"
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "import time\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport openimpala as oi\n\nprint(f\"OpenImpala version {oi.__version__} loaded and ready.\")\n\n# OpenImpala relies on AMReX and MPI under the hood.  In a notebook we\n# need the runtime to stay alive across cells, so we open the session\n# manually.  In a script, use the context manager instead:\n#     with oi.Session():\n#         ...\nsession = oi.Session()\nsession.__enter__()\n\n# Try importing PoreSpy for realistic structure generation.\n# If unavailable, we fall back to plain NumPy (see below).\ntry:\n    import porespy as ps\n    HAS_PORESPY = True\n    print(\"PoreSpy available \u2014 will use blobs() for structure generation.\")\nexcept ImportError:\n    HAS_PORESPY = False\n    print(\"PoreSpy not found \u2014 using NumPy random generation instead.\")"
+   "source": "import time\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport openimpala as oi\n\nprint(f\"OpenImpala version {oi.__version__} loaded and ready.\")\n\n# OpenImpala relies on AMReX and MPI under the hood.  In a notebook we\n# need the runtime to stay alive across cells, so we open the session\n# manually.  In a script, use the context manager instead:\n#     with oi.Session():\n#         ...\nsession = oi.Session()\nsession.__enter__()\n\n# Try importing PoreSpy for realistic structure generation.\n# If unavailable, we fall back to plain NumPy (see below).\ntry:\n    import porespy as ps\n    HAS_PORESPY = True\n    print(\"PoreSpy available — will use blobs() for structure generation.\")\nexcept ImportError:\n    HAS_PORESPY = False\n    print(\"PoreSpy not found — using NumPy random generation instead.\")"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## 1. Generating a Synthetic Microstructure\n\nIf PoreSpy is available, its `blobs` generator creates random two-phase structures that resemble real porous media (e.g. electrode microstructures from X-ray tomography). Otherwise we fall back to `np.random.choice`, which produces a simpler random structure but requires no extra dependencies.\n\nWe generate a $100^3$ voxel grid (~1 million voxels) with a target porosity of 0.40:\n- **Phase 0** \u2014 solid matrix\n- **Phase 1** \u2014 void / pore space\n\n**Phase convention:** OpenImpala labels each voxel with an integer phase ID. The `phase` parameter in functions like `oi.tortuosity(data, phase=1)` tells the solver which phase to treat as the transport medium. The default is `phase=0`, so **always pass `phase=` explicitly** to avoid silently computing the tortuosity of the wrong phase. There is no \"correct\" phase numbering \u2014 it depends on how your image was segmented."
+   "source": "## 1. Generating a Synthetic Microstructure\n\nIf PoreSpy is available, its `blobs` generator creates random two-phase structures that resemble real porous media (e.g. electrode microstructures from X-ray tomography). Otherwise we fall back to `np.random.choice`, which produces a simpler random structure but requires no extra dependencies.\n\nWe generate a $100^3$ voxel grid (~1 million voxels) with a target porosity of 0.40:\n- **Phase 0** — solid matrix\n- **Phase 1** — void / pore space\n\n**Phase convention:** OpenImpala labels each voxel with an integer phase ID. The `phase` parameter in functions like `oi.tortuosity(data, phase=1)` tells the solver which phase to treat as the transport medium. The default is `phase=0`, so **always pass `phase=` explicitly** to avoid silently computing the tortuosity of the wrong phase. There is no \"correct\" phase numbering — it depends on how your image was segmented."
   },
   {
    "cell_type": "code",
@@ -58,7 +58,7 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## 3. Parametric Sweep: Tortuosity vs. Porosity\n\nBecause OpenImpala operates directly on NumPy arrays, scripting parametric studies is straightforward. Below we sweep over a range of porosities. At low porosity the pore network may become disconnected and the phase will not percolate \u2014 in that case, tortuosity is undefined and we skip that point."
+   "source": "## 3. Parametric Sweep: Tortuosity vs. Porosity\n\nBecause OpenImpala operates directly on NumPy arrays, scripting parametric studies is straightforward. Below we sweep over a range of porosities. At low porosity the pore network may become disconnected and the phase will not percolate — in that case, tortuosity is undefined and we skip that point."
   },
   {
    "cell_type": "code",
@@ -69,13 +69,13 @@
   },
   {
    "cell_type": "markdown",
-   "source": "## 4. Loading Your Own Data\n\nThe examples above used synthetic microstructures. When you have a real segmented 3D image (from X-ray CT, FIB-SEM, etc.), the workflow is the same \u2014 load it into a NumPy array, then pass it to OpenImpala.\n\n```python\n# TIFF stack (most common format from tomography)\nimport tifffile\ndata = tifffile.imread(\"my_scan.tif\").astype(np.int32)\nresult = oi.tortuosity(data, phase=1, direction=\"z\")\n\n# HDF5 dataset\nimport h5py\nwith h5py.File(\"my_scan.hdf5\", \"r\") as f:\n    data = f[\"/exchange/data\"][:].astype(np.int32)\n\n# RAW binary (flat file, you must know the dimensions)\ndata = np.fromfile(\"my_scan.raw\", dtype=np.uint8).reshape((Nz, Ny, Nx))\ndata = (data > 128).astype(np.int32)  # threshold to binary\n```\n\n**Key points:**\n- The array must be `int32` with integer phase labels (0, 1, 2, ...).\n- If your image is greyscale, **threshold it first** to produce a segmented phase map.\n- The array shape convention is **(Z, Y, X)** \u2014 the first axis is the stack direction.\n- For very large files that don't fit in RAM, use OpenImpala's built-in parallel C++ readers via `oi.read_image()` \u2014 see [Tutorial 7](07_hpc_scaling.ipynb).\n\n[Tutorial 2](02_digital_twin.ipynb) demonstrates a full worked example with a real TIFF stack.",
+   "source": "## 4. Loading Your Own Data\n\nThe examples above used synthetic microstructures. When you have a real segmented 3D image (from X-ray CT, FIB-SEM, etc.), the workflow is the same — load it into a NumPy array, then pass it to OpenImpala.\n\n```python\n# TIFF stack (most common format from tomography)\nimport tifffile\ndata = tifffile.imread(\"my_scan.tif\").astype(np.int32)\nresult = oi.tortuosity(data, phase=1, direction=\"z\")\n\n# HDF5 dataset\nimport h5py\nwith h5py.File(\"my_scan.hdf5\", \"r\") as f:\n    data = f[\"/exchange/data\"][:].astype(np.int32)\n\n# RAW binary (flat file, you must know the dimensions)\ndata = np.fromfile(\"my_scan.raw\", dtype=np.uint8).reshape((Nz, Ny, Nx))\ndata = (data > 128).astype(np.int32)  # threshold to binary\n```\n\n**Key points:**\n- The array must be `int32` with integer phase labels (0, 1, 2, ...).\n- If your image is greyscale, **threshold it first** to produce a segmented phase map.\n- The array shape convention is **(Z, Y, X)** — the first axis is the stack direction.\n- For very large files that don't fit in RAM, use OpenImpala's built-in parallel C++ readers via `oi.read_image()` — see [Tutorial 7](07_hpc_scaling.ipynb).\n\n[Tutorial 2](02_digital_twin.ipynb) demonstrates a full worked example with a real TIFF stack.",
    "metadata": {}
   },
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## Next Steps\n\nIn this tutorial we generated a microstructure, solved for its transport properties, and ran a parametric sweep \u2014 all from a single Python session, with no mesh generation or input files needed.\n\n**Up next in the series:**\n- [Tutorial 2: From 3D Image to Device Model](02_digital_twin.ipynb) \u2014 Load a real CT scan, measure anisotropy, and export parameters to PyBaMM.\n- [Tutorial 3: REV and Uncertainty](03_rev_and_uncertainty.ipynb) \u2014 Determine whether your sample volume is statistically representative.\n- [Tutorial 4: Effective Diffusivity and Field Visualisation](04_multiphase_and_fields.ipynb) \u2014 Use the cell-problem solver and visualise internal fields.\n- [Tutorial 5: Surrogate Modelling](05_surrogate_modelling.ipynb) \u2014 Generate training data for machine learning models.\n- [Tutorial 6: Topology Optimisation](06_topology_optimisation.ipynb) \u2014 Use OpenImpala inside an optimisation loop.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) \u2014 Deploy on clusters with MPI.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **AMReX:** W. Zhang et al., *AMReX: a framework for block-structured adaptive mesh refinement*, J. Open Source Software 4(37), 1370 (2019). [doi:10.21105/joss.01370](https://doi.org/10.21105/joss.01370)\n3. **HYPRE:** R. D. Falgout & U. M. Yang, *hypre: A library of high performance preconditioners*, Computational Science \u2014 ICCS 2002, LNCS 2331, pp. 632\u2013641 (2002). [doi:10.1007/3-540-47789-6_66](https://doi.org/10.1007/3-540-47789-6_66)\n4. **PoreSpy:** J. T. Gostick et al., *PoreSpy: A Python toolkit for quantitative analysis of porous media images*, J. Open Source Software 4(37), 1296 (2019). [doi:10.21105/joss.01296](https://doi.org/10.21105/joss.01296)\n5. **Tortuosity in porous media:** B. Tjaden et al., *On the origin and application of the Bruggeman correlation for analysing transport phenomena in electrochemical systems*, Current Opinion in Chemical Engineering 12, 44\u201351 (2016). [doi:10.1016/j.coche.2016.02.006](https://doi.org/10.1016/j.coche.2016.02.006)"
+   "source": "## Next Steps\n\nIn this tutorial we generated a microstructure, solved for its transport properties, and ran a parametric sweep — all from a single Python session, with no mesh generation or input files needed.\n\n**Up next in the series:**\n- [Tutorial 2: From 3D Image to Device Model](02_digital_twin.ipynb) — Load a real CT scan, measure anisotropy, and export parameters to PyBaMM.\n- [Tutorial 3: REV and Uncertainty](03_rev_and_uncertainty.ipynb) — Determine whether your sample volume is statistically representative.\n- [Tutorial 4: Effective Diffusivity and Field Visualisation](04_multiphase_and_fields.ipynb) — Use the cell-problem solver and visualise internal fields.\n- [Tutorial 5: Surrogate Modelling](05_surrogate_modelling.ipynb) — Generate training data for machine learning models.\n- [Tutorial 6: Topology Optimisation](06_topology_optimisation.ipynb) — Use OpenImpala inside an optimisation loop.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) — Deploy on clusters with MPI.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **AMReX:** W. Zhang et al., *AMReX: a framework for block-structured adaptive mesh refinement*, J. Open Source Software 4(37), 1370 (2019). [doi:10.21105/joss.01370](https://doi.org/10.21105/joss.01370)\n3. **HYPRE:** R. D. Falgout & U. M. Yang, *hypre: A library of high performance preconditioners*, Computational Science — ICCS 2002, LNCS 2331, pp. 632–641 (2002). [doi:10.1007/3-540-47789-6_66](https://doi.org/10.1007/3-540-47789-6_66)\n4. **PoreSpy:** J. T. Gostick et al., *PoreSpy: A Python toolkit for quantitative analysis of porous media images*, J. Open Source Software 4(37), 1296 (2019). [doi:10.21105/joss.01296](https://doi.org/10.21105/joss.01296)\n5. **Tortuosity in porous media:** B. Tjaden et al., *On the origin and application of the Bruggeman correlation for analysing transport phenomena in electrochemical systems*, Current Opinion in Chemical Engineering 12, 44–51 (2016). [doi:10.1016/j.coche.2016.02.006](https://doi.org/10.1016/j.coche.2016.02.006)"
   }
  ],
  "metadata": {
@@ -91,4 +91,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/tutorials/02_digital_twin.ipynb b/tutorials/02_digital_twin.ipynb
index e4f41a3b..5b51c438 100644
--- a/tutorials/02_digital_twin.ipynb
+++ b/tutorials/02_digital_twin.ipynb
@@ -3,17 +3,14 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/02_digital_twin.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 2 of 7: From 3D Image to Device Model\n\n*OpenImpala Tutorial Series \u2014 From first solve to HPC deployment*\n\n---\n\nA common workflow in electrochemical research is to characterise a 3D microstructure from X-ray tomography and feed the resulting transport parameters into a continuum device model such as [PyBaMM](https://pybamm.org/). This tutorial walks through that pipeline end-to-end.\n\n**What you will learn:**\n1. Load a real 3D TIFF image stack.\n2. Compute directional tortuosity (X, Y, Z) to quantify anisotropy.\n3. Visualise the 3D diffusion field using the C++ core API and `yt`.\n4. Export results to the BPX (Battery Parameter eXchange) JSON format.\n5. Import those parameters into PyBaMM and run a discharge simulation.\n\n**Prerequisites:** [Tutorial 1](01_hello_openimpala.ipynb) \u2014 basic OpenImpala workflow (volume fraction, percolation, tortuosity)."
+   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/02_digital_twin.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 2 of 7: From 3D Image to Device Model\n\n*OpenImpala Tutorial Series — From first solve to HPC deployment*\n\n---\n\nA common workflow in electrochemical research is to characterise a 3D microstructure from X-ray tomography and feed the resulting transport parameters into a continuum device model such as [PyBaMM](https://pybamm.org/). This tutorial walks through that pipeline end-to-end.\n\n**What you will learn:**\n1. Load a real 3D TIFF image stack.\n2. Compute directional tortuosity (X, Y, Z) to quantify anisotropy.\n3. Visualise the 3D diffusion field using the C++ core API and `yt`.\n4. Export results to the BPX (Battery Parameter eXchange) JSON format.\n5. Import those parameters into PyBaMM and run a discharge simulation.\n\n**Prerequisites:** [Tutorial 1](01_hello_openimpala.ipynb) — basic OpenImpala workflow (volume fraction, percolation, tortuosity)."
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Install OpenImpala, PyBaMM, and visualization utilities\n",
-    "!pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 nvidia-cuda-runtime-cu12 nvidia-curand-cu12 nvidia-cusparse-cu12 nvidia-cublas-cu12 pybamm bpx tifffile matplotlib yt"
-   ]
+   "source": "# Install OpenImpala, PyBaMM, and visualization utilities\n!pip install -q openimpala pybamm bpx tifffile matplotlib yt"
   },
   {
    "cell_type": "code",
@@ -171,7 +168,7 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## Next Steps\n\nThis tutorial demonstrated the full pipeline from a 3D tomography image to a device-level simulation, with BPX as the interchange format.\n\n**Continue the series:**\n- [Tutorial 3: REV and Uncertainty](03_rev_and_uncertainty.ipynb) \u2014 How large does your sample need to be for statistically representative results?\n- [Tutorial 4: Effective Diffusivity and Field Visualisation](04_multiphase_and_fields.ipynb) \u2014 Extend to the cell-problem solver and multi-phase composites.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) \u2014 Analyse full-resolution synchrotron datasets with MPI.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **PyBaMM:** M. Sulzer et al., *Python Battery Mathematical Modelling (PyBaMM)*, J. Open Research Software 9(1), 14 (2021). [doi:10.5334/jors.309](https://doi.org/10.5334/jors.309)\n3. **BPX standard:** *Battery Parameter eXchange (BPX)*, an open standard for lithium-ion battery parameterisation. [GitHub](https://github.com/pybop-team/BPX)\n4. **Electrode tortuosity & anisotropy:** M. Ebner et al., *Tortuosity anisotropy in lithium-ion battery electrodes*, Advanced Energy Materials 4(5), 1301278 (2014). [doi:10.1002/aenm.201301278](https://doi.org/10.1002/aenm.201301278)\n5. **yt visualisation:** M. J. Turk et al., *yt: A multi-code analysis toolkit for astrophysical simulation data*, Astrophys. J. Suppl. 192, 9 (2011). [doi:10.1088/0067-0049/192/1/9](https://doi.org/10.1088/0067-0049/192/1/9)\n6. **Digital twin concept for batteries:** A. A. Franco et al., *Boosting rechargeable batteries R&D by multiscale modeling: myth or reality?*, Chemical Reviews 119(7), 4569\u20134627 (2019). [doi:10.1021/acs.chemrev.8b00239](https://doi.org/10.1021/acs.chemrev.8b00239)"
+   "source": "## Next Steps\n\nThis tutorial demonstrated the full pipeline from a 3D tomography image to a device-level simulation, with BPX as the interchange format.\n\n**Continue the series:**\n- [Tutorial 3: REV and Uncertainty](03_rev_and_uncertainty.ipynb) — How large does your sample need to be for statistically representative results?\n- [Tutorial 4: Effective Diffusivity and Field Visualisation](04_multiphase_and_fields.ipynb) — Extend to the cell-problem solver and multi-phase composites.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) — Analyse full-resolution synchrotron datasets with MPI.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **PyBaMM:** M. Sulzer et al., *Python Battery Mathematical Modelling (PyBaMM)*, J. Open Research Software 9(1), 14 (2021). [doi:10.5334/jors.309](https://doi.org/10.5334/jors.309)\n3. **BPX standard:** *Battery Parameter eXchange (BPX)*, an open standard for lithium-ion battery parameterisation. [GitHub](https://github.com/pybop-team/BPX)\n4. **Electrode tortuosity & anisotropy:** M. Ebner et al., *Tortuosity anisotropy in lithium-ion battery electrodes*, Advanced Energy Materials 4(5), 1301278 (2014). [doi:10.1002/aenm.201301278](https://doi.org/10.1002/aenm.201301278)\n5. **yt visualisation:** M. J. Turk et al., *yt: A multi-code analysis toolkit for astrophysical simulation data*, Astrophys. J. Suppl. 192, 9 (2011). [doi:10.1088/0067-0049/192/1/9](https://doi.org/10.1088/0067-0049/192/1/9)\n6. **Digital twin concept for batteries:** A. A. Franco et al., *Boosting rechargeable batteries R&D by multiscale modeling: myth or reality?*, Chemical Reviews 119(7), 4569–4627 (2019). [doi:10.1021/acs.chemrev.8b00239](https://doi.org/10.1021/acs.chemrev.8b00239)"
   }
  ],
  "metadata": {
@@ -187,4 +184,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/tutorials/03_rev_and_uncertainty.ipynb b/tutorials/03_rev_and_uncertainty.ipynb
index 508c883f..a1b17617 100644
--- a/tutorials/03_rev_and_uncertainty.ipynb
+++ b/tutorials/03_rev_and_uncertainty.ipynb
@@ -10,7 +10,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Install OpenImpala and utilities\n!pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 nvidia-cuda-runtime-cu12 nvidia-curand-cu12 nvidia-cusparse-cu12 nvidia-cublas-cu12 tifffile matplotlib scipy"
+   "source": "# Install OpenImpala and utilities\n!pip install -q openimpala tifffile matplotlib"
   },
   {
    "cell_type": "code",
diff --git a/tutorials/04_multiphase_and_fields.ipynb b/tutorials/04_multiphase_and_fields.ipynb
index ea239b37..0e4c463e 100644
--- a/tutorials/04_multiphase_and_fields.ipynb
+++ b/tutorials/04_multiphase_and_fields.ipynb
@@ -3,14 +3,14 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/04_multiphase_and_fields.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 4 of 7: Multi-Phase Transport and Field Visualisation\n\n*OpenImpala Tutorial Series \u2014 From first solve to HPC deployment*\n\n---\n\nOpenImpala provides two approaches to computing transport properties:\n\n1. **Tortuosity solver** (`TortuosityHypre`) \u2014 Solves $\\nabla \\cdot (D\\,\\nabla\\varphi) = 0$ with Dirichlet boundary conditions and computes $\\tau$ from the resulting flux. This is what the high-level `oi.tortuosity()` function uses.\n2. **Effective diffusivity solver** (`EffectiveDiffusivityHypre`) \u2014 Solves the periodic cell problem $\\nabla \\cdot (D\\,\\nabla\\chi) = -\\nabla \\cdot (D\\,\\hat{e})$ to compute the effective diffusivity tensor via homogenisation.\n\nBoth give consistent results on binary structures, but the cell-problem approach generalises naturally to multi-phase composites with spatially varying $D(\\mathbf{x})$.\n\nThis tutorial covers both the effective diffusivity solver *and* multi-phase transport \u2014 the ability to assign different diffusion coefficients to different material phases, which is essential for real composite materials like battery electrodes with a carbon-binder domain (CBD).\n\n**What you will learn:**\n1. Use the `openimpala.core` module directly (lower-level than `oi.tortuosity()`).\n2. Solve the effective diffusivity cell problem on a binary structure.\n3. Visualise the corrector field using AMReX plotfiles and `yt`.\n4. Build and run a multi-phase transport problem with analytically known results.\n5. Construct a 3-phase battery electrode geometry and configure per-phase diffusivities.\n\n**Prerequisites:** [Tutorial 1](01_hello_openimpala.ipynb) for the high-level API. [Tutorial 2](02_digital_twin.ipynb) introduced `openimpala.core` and `yt` visualisation \u2014 we build on that here."
+   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/04_multiphase_and_fields.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 4 of 7: Multi-Phase Transport and Field Visualisation\n\n*OpenImpala Tutorial Series — From first solve to HPC deployment*\n\n---\n\nOpenImpala provides two approaches to computing transport properties:\n\n1. **Tortuosity solver** (`TortuosityHypre`) — Solves $\\nabla \\cdot (D\\,\\nabla\\varphi) = 0$ with Dirichlet boundary conditions and computes $\\tau$ from the resulting flux. This is what the high-level `oi.tortuosity()` function uses.\n2. **Effective diffusivity solver** (`EffectiveDiffusivityHypre`) — Solves the periodic cell problem $\\nabla \\cdot (D\\,\\nabla\\chi) = -\\nabla \\cdot (D\\,\\hat{e})$ to compute the effective diffusivity tensor via homogenisation.\n\nBoth give consistent results on binary structures, but the cell-problem approach generalises naturally to multi-phase composites with spatially varying $D(\\mathbf{x})$.\n\nThis tutorial covers both the effective diffusivity solver *and* multi-phase transport — the ability to assign different diffusion coefficients to different material phases, which is essential for real composite materials like battery electrodes with a carbon-binder domain (CBD).\n\n**What you will learn:**\n1. Use the `openimpala.core` module directly (lower-level than `oi.tortuosity()`).\n2. Solve the effective diffusivity cell problem on a binary structure.\n3. Visualise the corrector field using AMReX plotfiles and `yt`.\n4. Build and run a multi-phase transport problem with analytically known results.\n5. Construct a 3-phase battery electrode geometry and configure per-phase diffusivities.\n\n**Prerequisites:** [Tutorial 1](01_hello_openimpala.ipynb) for the high-level API. [Tutorial 2](02_digital_twin.ipynb) introduced `openimpala.core` and `yt` visualisation — we build on that here."
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Install OpenImpala, PoreSpy, yt (for AMReX plotfile visualisation), and Matplotlib.\n!pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 nvidia-cuda-runtime-cu12 nvidia-curand-cu12 nvidia-cusparse-cu12 nvidia-cublas-cu12 porespy yt matplotlib"
+   "source": "# Install OpenImpala, PoreSpy, yt (for AMReX plotfile visualisation), and Matplotlib.\n!pip install -q openimpala porespy yt matplotlib"
   },
   {
    "cell_type": "code",
@@ -34,7 +34,7 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## 2. Solving with the Core API\n\nThe high-level `oi.tortuosity()` function wraps several steps (VoxelImage creation, percolation check, volume fraction, solver construction). Here we perform those steps explicitly using `openimpala.core`, which gives more control \u2014 for example, enabling plotfile output.\n\nWe solve the **effective diffusivity cell problem** in the Z-direction with `write_plotfile=True` so we can visualise the corrector field afterwards."
+   "source": "## 2. Solving with the Core API\n\nThe high-level `oi.tortuosity()` function wraps several steps (VoxelImage creation, percolation check, volume fraction, solver construction). Here we perform those steps explicitly using `openimpala.core`, which gives more control — for example, enabling plotfile output.\n\nWe solve the **effective diffusivity cell problem** in the Z-direction with `write_plotfile=True` so we can visualise the corrector field afterwards."
   },
   {
    "cell_type": "code",
@@ -46,30 +46,30 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## 3. Visualising the Corrector Field\n\nThe effective diffusivity solver writes the corrector field $\\chi_z$ to an AMReX plotfile. We load it with [yt](https://yt-project.org/) and plot a slice. Regions where the field deviates strongly from zero indicate where the microstructure forces the diffusion flux to deviate from the imposed direction \u2014 this is the physical origin of tortuosity."
+   "source": "## 3. Visualising the Corrector Field\n\nThe effective diffusivity solver writes the corrector field $\\chi_z$ to an AMReX plotfile. We load it with [yt](https://yt-project.org/) and plot a slice. Regions where the field deviates strongly from zero indicate where the microstructure forces the diffusion flux to deviate from the imposed direction — this is the physical origin of tortuosity."
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Find the generated plotfile directory\nplotfile_dirs = [d for d in glob.glob(f\"{out_dir}/*\") if os.path.isdir(d)]\nif plotfile_dirs:\n    latest_plotfile = sorted(plotfile_dirs)[-1]\n    print(f\"Loading plotfile: {latest_plotfile}\")\n\n    yt.funcs.mylog.setLevel(40)  # Suppress verbose yt logging\n    ds = yt.load(latest_plotfile)\n\n    # List available fields to find the corrector field name\n    field_list = [f for f in ds.field_list if f[0] == \"boxlib\"]\n    print(f\"Available fields: {field_list}\")\n\n    # Plot a slice through the Y-midplane\n    field_name = field_list[0]  # Use the first available boxlib field\n    slc = yt.SlicePlot(ds, normal=\"y\", fields=field_name)\n    slc.set_log(field_name, False)\n    slc.set_cmap(field_name, \"RdBu_r\")\n    slc.annotate_title(f\"Corrector Field (Z-direction)\")\n    slc.show()\nelse:\n    print(\"No plotfile found \u2014 check that write_plotfile=True was set.\")"
+   "source": "# Find the generated plotfile directory\nplotfile_dirs = [d for d in glob.glob(f\"{out_dir}/*\") if os.path.isdir(d)]\nif plotfile_dirs:\n    latest_plotfile = sorted(plotfile_dirs)[-1]\n    print(f\"Loading plotfile: {latest_plotfile}\")\n\n    yt.funcs.mylog.setLevel(40)  # Suppress verbose yt logging\n    ds = yt.load(latest_plotfile)\n\n    # List available fields to find the corrector field name\n    field_list = [f for f in ds.field_list if f[0] == \"boxlib\"]\n    print(f\"Available fields: {field_list}\")\n\n    # Plot a slice through the Y-midplane\n    field_name = field_list[0]  # Use the first available boxlib field\n    slc = yt.SlicePlot(ds, normal=\"y\", fields=field_name)\n    slc.set_log(field_name, False)\n    slc.set_cmap(field_name, \"RdBu_r\")\n    slc.annotate_title(f\"Corrector Field (Z-direction)\")\n    slc.show()\nelse:\n    print(\"No plotfile found — check that write_plotfile=True was set.\")"
   },
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## 4. Multi-Phase Transport: Worked Example\n\nReal microstructures often contain more than two phases. A lithium-ion battery cathode, for example, has:\n- **Pore** ($D = D_\\text{bulk}$) \u2014 the electrolyte-filled void\n- **Carbon-binder domain (CBD)** ($D \\approx 0.1\\,D_\\text{bulk}$) \u2014 partially conducting\n- **Active material** ($D = 0$) \u2014 impermeable solid\n\nOpenImpala's multi-phase mode assigns a different diffusion coefficient to each phase via the `tortuosity.active_phases` and `tortuosity.phase_diffusivities` parameters. The HYPRE matrix fill then uses the harmonic mean of adjacent cell coefficients at each face:\n\n$$D_\\text{face} = \\frac{2\\,D_\\text{left}\\,D_\\text{right}}{D_\\text{left} + D_\\text{right}}$$\n\nThis is physically correct (series resistance across an interface) and ensures that a zero-diffusivity phase creates an impermeable boundary.\n\n### Analytical Benchmarks\n\nWe validate multi-phase transport using two classic composite geometries with known analytical solutions on a discrete $N$-cell grid:\n\n**Series layers** (layers perpendicular to flow) \u2014 the Reuss / harmonic bound:\n$$\\tau_\\text{series} = \\frac{(N-1)(D_0 + D_1)}{2N \\cdot D_0 \\cdot D_1}$$\n\n**Parallel layers** (layers parallel to flow) \u2014 the Voigt / arithmetic bound:\n$$\\tau_\\text{parallel} = \\frac{2(N-1)}{N(D_0 + D_1)}$$\n\nThese provide exact targets for validating the multi-phase solver."
+   "source": "## 4. Multi-Phase Transport: Worked Example\n\nReal microstructures often contain more than two phases. A lithium-ion battery cathode, for example, has:\n- **Pore** ($D = D_\\text{bulk}$) — the electrolyte-filled void\n- **Carbon-binder domain (CBD)** ($D \\approx 0.1\\,D_\\text{bulk}$) — partially conducting\n- **Active material** ($D = 0$) — impermeable solid\n\nOpenImpala's multi-phase mode assigns a different diffusion coefficient to each phase via the `tortuosity.active_phases` and `tortuosity.phase_diffusivities` parameters. The HYPRE matrix fill then uses the harmonic mean of adjacent cell coefficients at each face:\n\n$$D_\\text{face} = \\frac{2\\,D_\\text{left}\\,D_\\text{right}}{D_\\text{left} + D_\\text{right}}$$\n\nThis is physically correct (series resistance across an interface) and ensures that a zero-diffusivity phase creates an impermeable boundary.\n\n### Analytical Benchmarks\n\nWe validate multi-phase transport using two classic composite geometries with known analytical solutions on a discrete $N$-cell grid:\n\n**Series layers** (layers perpendicular to flow) — the Reuss / harmonic bound:\n$$\\tau_\\text{series} = \\frac{(N-1)(D_0 + D_1)}{2N \\cdot D_0 \\cdot D_1}$$\n\n**Parallel layers** (layers parallel to flow) — the Voigt / arithmetic bound:\n$$\\tau_\\text{parallel} = \\frac{2(N-1)}{N(D_0 + D_1)}$$\n\nThese provide exact targets for validating the multi-phase solver."
   },
   {
    "cell_type": "code",
-   "source": "# --- Build synthetic multi-phase structures ---\n# Alternating layers of phase 0 (D=1.0) and phase 1 (D=0.5)\n\nN_mp = 32\nD0, D1 = 1.0, 0.5\n\n# Series: layers perpendicular to flow (X-direction)\nseries = np.zeros((N_mp, N_mp, N_mp), dtype=np.int32)\nfor i in range(N_mp):\n    series[:, :, i] = i % 2  # alternating layers along X\n\n# Parallel: layers parallel to flow (Y-direction layering, X flow)\nparallel = np.zeros((N_mp, N_mp, N_mp), dtype=np.int32)\nfor j in range(N_mp):\n    parallel[:, j, :] = j % 2  # alternating layers along Y\n\n# Visualise both geometries\nfig, axes = plt.subplots(1, 2, figsize=(10, 4), dpi=120)\ncmap = plt.cm.colors.ListedColormap(['#4ECDC4', '#FF6B6B'])\n\nax = axes[0]\nax.imshow(series[N_mp//2, :, :], cmap=cmap, interpolation='nearest')\nax.set_title(\"Series layers\\n(perpendicular to X flow)\")\nax.set_xlabel(\"X (flow direction) \u2192\")\nax.set_ylabel(\"Y\")\nax.set_aspect('equal')\n\nax = axes[1]\nax.imshow(parallel[N_mp//2, :, :], cmap=cmap, interpolation='nearest')\nax.set_title(\"Parallel layers\\n(parallel to X flow)\")\nax.set_xlabel(\"X (flow direction) \u2192\")\nax.set_ylabel(\"Y\")\nax.set_aspect('equal')\n\n# Legend\nfrom matplotlib.patches import Patch\nlegend_elements = [Patch(facecolor='#4ECDC4', label=f'Phase 0 (D={D0})'),\n                   Patch(facecolor='#FF6B6B', label=f'Phase 1 (D={D1})')]\nfig.legend(handles=legend_elements, loc='lower center', ncol=2, fontsize=10,\n           bbox_to_anchor=(0.5, -0.02))\nplt.tight_layout()\nplt.show()\n\nprint(f\"Grid size: {N_mp}^3\")\nprint(f\"Phase diffusivities: D0={D0}, D1={D1}\")",
+   "source": "# --- Build synthetic multi-phase structures ---\n# Alternating layers of phase 0 (D=1.0) and phase 1 (D=0.5)\n\nN_mp = 32\nD0, D1 = 1.0, 0.5\n\n# Series: layers perpendicular to flow (X-direction)\nseries = np.zeros((N_mp, N_mp, N_mp), dtype=np.int32)\nfor i in range(N_mp):\n    series[:, :, i] = i % 2  # alternating layers along X\n\n# Parallel: layers parallel to flow (Y-direction layering, X flow)\nparallel = np.zeros((N_mp, N_mp, N_mp), dtype=np.int32)\nfor j in range(N_mp):\n    parallel[:, j, :] = j % 2  # alternating layers along Y\n\n# Visualise both geometries\nfig, axes = plt.subplots(1, 2, figsize=(10, 4), dpi=120)\ncmap = plt.cm.colors.ListedColormap(['#4ECDC4', '#FF6B6B'])\n\nax = axes[0]\nax.imshow(series[N_mp//2, :, :], cmap=cmap, interpolation='nearest')\nax.set_title(\"Series layers\\n(perpendicular to X flow)\")\nax.set_xlabel(\"X (flow direction) →\")\nax.set_ylabel(\"Y\")\nax.set_aspect('equal')\n\nax = axes[1]\nax.imshow(parallel[N_mp//2, :, :], cmap=cmap, interpolation='nearest')\nax.set_title(\"Parallel layers\\n(parallel to X flow)\")\nax.set_xlabel(\"X (flow direction) →\")\nax.set_ylabel(\"Y\")\nax.set_aspect('equal')\n\n# Legend\nfrom matplotlib.patches import Patch\nlegend_elements = [Patch(facecolor='#4ECDC4', label=f'Phase 0 (D={D0})'),\n                   Patch(facecolor='#FF6B6B', label=f'Phase 1 (D={D1})')]\nfig.legend(handles=legend_elements, loc='lower center', ncol=2, fontsize=10,\n           bbox_to_anchor=(0.5, -0.02))\nplt.tight_layout()\nplt.show()\n\nprint(f\"Grid size: {N_mp}^3\")\nprint(f\"Phase diffusivities: D0={D0}, D1={D1}\")",
    "metadata": {},
    "execution_count": null,
    "outputs": []
   },
   {
    "cell_type": "markdown",
-   "source": "### Running the Multi-Phase Solver\n\nMulti-phase transport is configured via AMReX's `ParmParse` parameter database. When using the C++ executable (or the `openimpala.core` API), you specify which phases are active and their diffusion coefficients in an **inputs file**:\n\n```\ntortuosity.active_phases = 0 1\ntortuosity.phase_diffusivities = 1.0 0.5\n```\n\nBelow we generate inputs files for both the series and parallel geometries, write the phase data as a RAW binary file, and run the solver. This is the same workflow you would use on an HPC cluster \u2014 the only difference is the `mpirun` prefix.",
+   "source": "### Running the Multi-Phase Solver\n\nMulti-phase transport is configured via AMReX's `ParmParse` parameter database. When using the C++ executable (or the `openimpala.core` API), you specify which phases are active and their diffusion coefficients in an **inputs file**:\n\n```\ntortuosity.active_phases = 0 1\ntortuosity.phase_diffusivities = 1.0 0.5\n```\n\nBelow we generate inputs files for both the series and parallel geometries, write the phase data as a RAW binary file, and run the solver. This is the same workflow you would use on an HPC cluster — the only difference is the `mpirun` prefix.",
    "metadata": {}
   },
   {
@@ -86,14 +86,14 @@
   },
   {
    "cell_type": "code",
-   "source": "# --- Visualise the bounds as a function of D1/D0 ratio ---\nratios = np.linspace(0.01, 1.0, 200)\ntau_uniform = (N_mp - 1) / N_mp\n\ntau_s = (N_mp - 1) * (1.0 + ratios) / (2 * N_mp * 1.0 * ratios)\ntau_p = 2 * (N_mp - 1) / (N_mp * (1.0 + ratios))\n\nfig, ax = plt.subplots(figsize=(8, 5), dpi=120)\nax.fill_between(ratios, tau_p, tau_s, alpha=0.15, color='#1f77b4',\n                label='Feasible region')\nax.plot(ratios, tau_s, '-', color='#d62728', lw=2, label='Series (Reuss bound)')\nax.plot(ratios, tau_p, '-', color='#2ca02c', lw=2, label='Parallel (Voigt bound)')\nax.axhline(tau_uniform, color='gray', ls=':', lw=1, label=f'Uniform (tau={(N_mp-1)/N_mp:.4f})')\n\n# Mark the specific case D1/D0 = 0.5\nax.plot(D1/D0, tau_series_exact, 'o', color='#d62728', ms=10, zorder=5)\nax.plot(D1/D0, tau_parallel_exact, 'o', color='#2ca02c', ms=10, zorder=5)\nax.annotate(f'Series: tau={tau_series_exact:.4f}',\n            xy=(D1/D0, tau_series_exact), xytext=(0.3, tau_series_exact + 0.15),\n            arrowprops=dict(arrowstyle='->', color='#d62728'), fontsize=9, color='#d62728')\nax.annotate(f'Parallel: tau={tau_parallel_exact:.4f}',\n            xy=(D1/D0, tau_parallel_exact), xytext=(0.3, tau_parallel_exact - 0.12),\n            arrowprops=dict(arrowstyle='->', color='#2ca02c'), fontsize=9, color='#2ca02c')\n\nax.set_xlabel(r'$D_1 / D_0$', fontsize=12)\nax.set_ylabel(r'Tortuosity $\\tau$', fontsize=12)\nax.set_title(f'Reuss\u2013Voigt Bounds for Two-Phase Composite (N={N_mp})', fontweight='bold')\nax.legend(loc='upper right', fontsize=9)\nax.set_xlim(0, 1.05)\nax.set_ylim(0, max(tau_s) * 1.1)\nax.grid(True, alpha=0.3)\nax.spines['top'].set_visible(False)\nax.spines['right'].set_visible(False)\nplt.tight_layout()\nplt.show()\n\nprint(f\"\\nFor D1/D0 = {D1/D0}:\")\nprint(f\"  Series bound:   tau = {tau_series_exact:.6f}\")\nprint(f\"  Parallel bound: tau = {tau_parallel_exact:.6f}\")\nprint(f\"  Ratio (series/parallel): {tau_series_exact/tau_parallel_exact:.2f}x\")",
+   "source": "# --- Visualise the bounds as a function of D1/D0 ratio ---\nratios = np.linspace(0.01, 1.0, 200)\ntau_uniform = (N_mp - 1) / N_mp\n\ntau_s = (N_mp - 1) * (1.0 + ratios) / (2 * N_mp * 1.0 * ratios)\ntau_p = 2 * (N_mp - 1) / (N_mp * (1.0 + ratios))\n\nfig, ax = plt.subplots(figsize=(8, 5), dpi=120)\nax.fill_between(ratios, tau_p, tau_s, alpha=0.15, color='#1f77b4',\n                label='Feasible region')\nax.plot(ratios, tau_s, '-', color='#d62728', lw=2, label='Series (Reuss bound)')\nax.plot(ratios, tau_p, '-', color='#2ca02c', lw=2, label='Parallel (Voigt bound)')\nax.axhline(tau_uniform, color='gray', ls=':', lw=1, label=f'Uniform (tau={(N_mp-1)/N_mp:.4f})')\n\n# Mark the specific case D1/D0 = 0.5\nax.plot(D1/D0, tau_series_exact, 'o', color='#d62728', ms=10, zorder=5)\nax.plot(D1/D0, tau_parallel_exact, 'o', color='#2ca02c', ms=10, zorder=5)\nax.annotate(f'Series: tau={tau_series_exact:.4f}',\n            xy=(D1/D0, tau_series_exact), xytext=(0.3, tau_series_exact + 0.15),\n            arrowprops=dict(arrowstyle='->', color='#d62728'), fontsize=9, color='#d62728')\nax.annotate(f'Parallel: tau={tau_parallel_exact:.4f}',\n            xy=(D1/D0, tau_parallel_exact), xytext=(0.3, tau_parallel_exact - 0.12),\n            arrowprops=dict(arrowstyle='->', color='#2ca02c'), fontsize=9, color='#2ca02c')\n\nax.set_xlabel(r'$D_1 / D_0$', fontsize=12)\nax.set_ylabel(r'Tortuosity $\\tau$', fontsize=12)\nax.set_title(f'Reuss–Voigt Bounds for Two-Phase Composite (N={N_mp})', fontweight='bold')\nax.legend(loc='upper right', fontsize=9)\nax.set_xlim(0, 1.05)\nax.set_ylim(0, max(tau_s) * 1.1)\nax.grid(True, alpha=0.3)\nax.spines['top'].set_visible(False)\nax.spines['right'].set_visible(False)\nplt.tight_layout()\nplt.show()\n\nprint(f\"\\nFor D1/D0 = {D1/D0}:\")\nprint(f\"  Series bound:   tau = {tau_series_exact:.6f}\")\nprint(f\"  Parallel bound: tau = {tau_parallel_exact:.6f}\")\nprint(f\"  Ratio (series/parallel): {tau_series_exact/tau_parallel_exact:.2f}x\")",
    "metadata": {},
    "execution_count": null,
    "outputs": []
   },
   {
    "cell_type": "markdown",
-   "source": "### Extension: Three-Phase Battery Electrode\n\nA realistic battery electrode has three phases. Below we construct a synthetic 3-phase microstructure and write the corresponding inputs file. The key insight is that phases with $D = 0$ are automatically excluded from the linear system \u2014 their matrix rows become $A_{ii} = 1$, $\\text{rhs}_i = 0$, decoupling them completely.\n\nThis means you can model:\n- **Pore** (phase 0): $D = 1.0$ \u2014 full electrolyte diffusivity\n- **CBD** (phase 1): $D = 0.1$ \u2014 reduced diffusivity through carbon-binder\n- **Active material** (phase 2): $D = 0$ \u2014 impermeable solid particle",
+   "source": "### Extension: Three-Phase Battery Electrode\n\nA realistic battery electrode has three phases. Below we construct a synthetic 3-phase microstructure and write the corresponding inputs file. The key insight is that phases with $D = 0$ are automatically excluded from the linear system — their matrix rows become $A_{ii} = 1$, $\\text{rhs}_i = 0$, decoupling them completely.\n\nThis means you can model:\n- **Pore** (phase 0): $D = 1.0$ — full electrolyte diffusivity\n- **CBD** (phase 1): $D = 0.1$ — reduced diffusivity through carbon-binder\n- **Active material** (phase 2): $D = 0$ — impermeable solid particle",
    "metadata": {}
   },
   {
@@ -105,7 +105,7 @@
   },
   {
    "cell_type": "markdown",
-   "source": "## Next Steps\n\nThis tutorial demonstrated the effective diffusivity cell-problem solver, field visualisation, and multi-phase transport with analytically validated benchmarks. The key points:\n\n- **Effective diffusivity** via homogenisation gives the full $D_\\text{eff}$ tensor, not just scalar tortuosity.\n- **Multi-phase transport** assigns per-phase diffusion coefficients via `tortuosity.active_phases` and `tortuosity.phase_diffusivities` in the inputs file.\n- **Analytical validation** against Reuss (series) and Voigt (parallel) bounds confirms the harmonic mean face coefficient implementation.\n- **Three-phase structures** (pore/CBD/solid) are handled naturally \u2014 phases with $D=0$ are decoupled automatically.\n\nFor the test inputs used in OpenImpala's CI/CD regression suite, see `tests/inputs/tMultiPhaseTransport*.inputs`.\n\n**Continue the series:**\n- [Tutorial 5: Surrogate Modelling](05_surrogate_modelling.ipynb) \u2014 Generate labelled datasets for machine learning.\n- [Tutorial 6: Topology Optimisation](06_topology_optimisation.ipynb) \u2014 Use OpenImpala as a cost-function evaluator in an optimisation loop.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) \u2014 Run on larger datasets with MPI.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **Homogenisation theory:** S. Torquato, *Random Heterogeneous Materials: Microstructure and Macroscopic Properties*, Springer (2002). Chapter 17 covers the cell problem for effective conductivity.\n3. **Reuss and Voigt bounds:** W. Voigt, *Lehrbuch der Kristallphysik*, Teubner (1928); A. Reuss, *Berechnung der Fliessgrenze von Mischkristallen*, Z. Angew. Math. Mech. 9, 49\u201358 (1929). The harmonic (Reuss) and arithmetic (Voigt) means give the tightest possible bounds without geometric information.\n4. **Effective diffusivity in electrodes:** L. Holzer et al., *Microstructure\u2013property relationships in a gas diffusion layer (GDL) for Polymer Electrolyte Fuel Cells, Part I: effect of compression and anisotropy of dry GDL*, Electrochimica Acta 227, 419\u2013434 (2017). [doi:10.1016/j.electacta.2017.01.030](https://doi.org/10.1016/j.electacta.2017.01.030)\n5. **Carbon-binder domain modelling:** F. L. E. Usseglio-Viretta et al., *Resolving the discrepancy in tortuosity factor estimation for Li-ion battery electrodes through micro-macro modeling and experiment*, J. Electrochem. Soc. 165(14), A3403 (2018). [doi:10.1149/2.0731814jes](https://doi.org/10.1149/2.0731814jes)\n6. **yt for AMReX data:** M. J. Turk et al., *yt: A multi-code analysis toolkit for astrophysical simulation data*, Astrophys. J. Suppl. 192, 9 (2011). [doi:10.1088/0067-0049/192/1/9](https://doi.org/10.1088/0067-0049/192/1/9)",
+   "source": "## Next Steps\n\nThis tutorial demonstrated the effective diffusivity cell-problem solver, field visualisation, and multi-phase transport with analytically validated benchmarks. The key points:\n\n- **Effective diffusivity** via homogenisation gives the full $D_\\text{eff}$ tensor, not just scalar tortuosity.\n- **Multi-phase transport** assigns per-phase diffusion coefficients via `tortuosity.active_phases` and `tortuosity.phase_diffusivities` in the inputs file.\n- **Analytical validation** against Reuss (series) and Voigt (parallel) bounds confirms the harmonic mean face coefficient implementation.\n- **Three-phase structures** (pore/CBD/solid) are handled naturally — phases with $D=0$ are decoupled automatically.\n\nFor the test inputs used in OpenImpala's CI/CD regression suite, see `tests/inputs/tMultiPhaseTransport*.inputs`.\n\n**Continue the series:**\n- [Tutorial 5: Surrogate Modelling](05_surrogate_modelling.ipynb) — Generate labelled datasets for machine learning.\n- [Tutorial 6: Topology Optimisation](06_topology_optimisation.ipynb) — Use OpenImpala as a cost-function evaluator in an optimisation loop.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) — Run on larger datasets with MPI.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **Homogenisation theory:** S. Torquato, *Random Heterogeneous Materials: Microstructure and Macroscopic Properties*, Springer (2002). Chapter 17 covers the cell problem for effective conductivity.\n3. **Reuss and Voigt bounds:** W. Voigt, *Lehrbuch der Kristallphysik*, Teubner (1928); A. Reuss, *Berechnung der Fliessgrenze von Mischkristallen*, Z. Angew. Math. Mech. 9, 49–58 (1929). The harmonic (Reuss) and arithmetic (Voigt) means give the tightest possible bounds without geometric information.\n4. **Effective diffusivity in electrodes:** L. Holzer et al., *Microstructure–property relationships in a gas diffusion layer (GDL) for Polymer Electrolyte Fuel Cells, Part I: effect of compression and anisotropy of dry GDL*, Electrochimica Acta 227, 419–434 (2017). [doi:10.1016/j.electacta.2017.01.030](https://doi.org/10.1016/j.electacta.2017.01.030)\n5. **Carbon-binder domain modelling:** F. L. E. Usseglio-Viretta et al., *Resolving the discrepancy in tortuosity factor estimation for Li-ion battery electrodes through micro-macro modeling and experiment*, J. Electrochem. Soc. 165(14), A3403 (2018). [doi:10.1149/2.0731814jes](https://doi.org/10.1149/2.0731814jes)\n6. **yt for AMReX data:** M. J. Turk et al., *yt: A multi-code analysis toolkit for astrophysical simulation data*, Astrophys. J. Suppl. 192, 9 (2011). [doi:10.1088/0067-0049/192/1/9](https://doi.org/10.1088/0067-0049/192/1/9)",
    "metadata": {}
   }
  ],
@@ -122,4 +122,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/tutorials/05_surrogate_modelling.ipynb b/tutorials/05_surrogate_modelling.ipynb
index 29c27410..628d5066 100644
--- a/tutorials/05_surrogate_modelling.ipynb
+++ b/tutorials/05_surrogate_modelling.ipynb
@@ -3,17 +3,14 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/05_surrogate_modelling.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 5 of 7: Surrogate Modelling with OpenImpala-Generated Data\n\n*OpenImpala Tutorial Series \u2014 From first solve to HPC deployment*\n\n---\n\nA surrogate model approximates an expensive computation \u2014 in our case, a 3D PDE solve \u2014 with a fast statistical or machine learning model trained on labelled examples. The bottleneck is always the labelled data: you need many (structure, property) pairs, each requiring a full physics solve.\n\n**This is where OpenImpala's HPC capabilities become a decisive advantage.** Because OpenImpala scales across hundreds of MPI ranks (see [Tutorial 7](07_hpc_scaling.ipynb)), you can generate labelled datasets that would be infeasible with serial solvers:\n\n| Campaign size | Serial solver (1 core) | OpenImpala (128 cores) |\n|--------------|----------------------|----------------------|\n| 200 samples @ $32^3$ | ~30 min | ~15 sec |\n| 2,000 samples @ $64^3$ | ~days | ~1 hour |\n| 10,000 samples @ $128^3$ | infeasible | overnight batch job |\n\nThe ML model itself (Random Forest, neural network, etc.) is generic \u2014 any solver could provide the labels. **OpenImpala's USP is the throughput**: the ability to churn through thousands of full 3D PDE solves fast enough to make large surrogate campaigns practical. On a cluster, you can parallelise both across samples (trivially, via job arrays) and within each sample (via MPI domain decomposition).\n\nThis tutorial demonstrates the workflow at small scale (200 samples on $32^3$ grids, runnable on Colab). The same script scales to production campaigns by increasing `N_SAMPLES`, `SIZE`, and launching with `mpirun`.\n\n**What you will learn:**\n1. Generate randomised microstructures with PoreSpy and label them with OpenImpala.\n2. Explore the structure-property relationships in the dataset.\n3. Train a Random Forest regressor and evaluate it on held-out data.\n4. Use feature importance to identify which geometric descriptors control tortuosity.\n\n**Prerequisites:** [Tutorial 1](01_hello_openimpala.ipynb) for the OpenImpala API. [Tutorial 3](03_rev_and_uncertainty.ipynb) motivates the sampling strategy. Basic familiarity with scikit-learn is helpful."
+   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/05_surrogate_modelling.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 5 of 7: Surrogate Modelling with OpenImpala-Generated Data\n\n*OpenImpala Tutorial Series — From first solve to HPC deployment*\n\n---\n\nA surrogate model approximates an expensive computation — in our case, a 3D PDE solve — with a fast statistical or machine learning model trained on labelled examples. The bottleneck is always the labelled data: you need many (structure, property) pairs, each requiring a full physics solve.\n\n**This is where OpenImpala's HPC capabilities become a decisive advantage.** Because OpenImpala scales across hundreds of MPI ranks (see [Tutorial 7](07_hpc_scaling.ipynb)), you can generate labelled datasets that would be infeasible with serial solvers:\n\n| Campaign size | Serial solver (1 core) | OpenImpala (128 cores) |\n|--------------|----------------------|----------------------|\n| 200 samples @ $32^3$ | ~30 min | ~15 sec |\n| 2,000 samples @ $64^3$ | ~days | ~1 hour |\n| 10,000 samples @ $128^3$ | infeasible | overnight batch job |\n\nThe ML model itself (Random Forest, neural network, etc.) is generic — any solver could provide the labels. **OpenImpala's USP is the throughput**: the ability to churn through thousands of full 3D PDE solves fast enough to make large surrogate campaigns practical. On a cluster, you can parallelise both across samples (trivially, via job arrays) and within each sample (via MPI domain decomposition).\n\nThis tutorial demonstrates the workflow at small scale (200 samples on $32^3$ grids, runnable on Colab). The same script scales to production campaigns by increasing `N_SAMPLES`, `SIZE`, and launching with `mpirun`.\n\n**What you will learn:**\n1. Generate randomised microstructures with PoreSpy and label them with OpenImpala.\n2. Explore the structure-property relationships in the dataset.\n3. Train a Random Forest regressor and evaluate it on held-out data.\n4. Use feature importance to identify which geometric descriptors control tortuosity.\n\n**Prerequisites:** [Tutorial 1](01_hello_openimpala.ipynb) for the OpenImpala API. [Tutorial 3](03_rev_and_uncertainty.ipynb) motivates the sampling strategy. Basic familiarity with scikit-learn is helpful."
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "# Install OpenImpala and ML libraries\n",
-    "!pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 nvidia-cuda-runtime-cu12 nvidia-curand-cu12 nvidia-cusparse-cu12 nvidia-cublas-cu12 porespy scikit-learn matplotlib"
-   ]
+   "source": "# Install OpenImpala and ML libraries\n!pip install -q openimpala porespy scikit-learn matplotlib"
   },
   {
    "cell_type": "code",
@@ -27,7 +24,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "## 1. Generating the Labelled Dataset\n\nFor each sample we:\n1. Generate a random structure with PoreSpy (`blobs`), varying both the target porosity and the characteristic feature size (`blobiness`).\n2. Compute three morphological features that are easy to measure on any segmented image:\n   - **Porosity** ($\\varepsilon$) \u2014 volume fraction of the pore phase.\n   - **Specific surface area** ($S_v$) \u2014 interfacial area per unit volume (units: 1/voxel).\n   - **Hydraulic diameter estimate** ($d_h = 4\\varepsilon / S_v$) \u2014 a classical proxy for mean pore size. It introduces a nonlinear combination of $\\varepsilon$ and $S_v$ that captures how \"open\" the pore space is.\n3. Use OpenImpala to check percolation and, if the phase is connected, solve for the ground-truth tortuosity."
+   "source": "## 1. Generating the Labelled Dataset\n\nFor each sample we:\n1. Generate a random structure with PoreSpy (`blobs`), varying both the target porosity and the characteristic feature size (`blobiness`).\n2. Compute three morphological features that are easy to measure on any segmented image:\n   - **Porosity** ($\\varepsilon$) — volume fraction of the pore phase.\n   - **Specific surface area** ($S_v$) — interfacial area per unit volume (units: 1/voxel).\n   - **Hydraulic diameter estimate** ($d_h = 4\\varepsilon / S_v$) — a classical proxy for mean pore size. It introduces a nonlinear combination of $\\varepsilon$ and $S_v$ that captures how \"open\" the pore space is.\n3. Use OpenImpala to check percolation and, if the phase is connected, solve for the ground-truth tortuosity."
   },
   {
    "cell_type": "code",
@@ -43,19 +40,19 @@
   },
   {
    "cell_type": "code",
-   "source": "fig, axes = plt.subplots(1, 3, figsize=(14, 4), dpi=120)\ncolors = ['#1f77b4', '#ff7f0e', '#2ca02c']\n\nfor ax, col, name, c in zip(axes, range(3), feature_names, colors):\n    ax.scatter(X_features[:, col], y_target, s=15, alpha=0.6, color=c, edgecolor='none')\n    ax.set_xlabel(name)\n    ax.set_ylabel(r\"Tortuosity ($\\tau$)\")\n    ax.grid(True, alpha=0.3)\n    ax.spines['top'].set_visible(False)\n    ax.spines['right'].set_visible(False)\n\n    # Pearson correlation\n    corr = np.corrcoef(X_features[:, col], y_target)[0, 1]\n    ax.set_title(f\"{name} (r = {corr:.2f})\")\n\nplt.suptitle(f\"Feature\u2013Tortuosity Relationships (N = {len(y_target)})\", fontweight='bold', y=1.02)\nplt.tight_layout()\nplt.show()",
+   "source": "fig, axes = plt.subplots(1, 3, figsize=(14, 4), dpi=120)\ncolors = ['#1f77b4', '#ff7f0e', '#2ca02c']\n\nfor ax, col, name, c in zip(axes, range(3), feature_names, colors):\n    ax.scatter(X_features[:, col], y_target, s=15, alpha=0.6, color=c, edgecolor='none')\n    ax.set_xlabel(name)\n    ax.set_ylabel(r\"Tortuosity ($\\tau$)\")\n    ax.grid(True, alpha=0.3)\n    ax.spines['top'].set_visible(False)\n    ax.spines['right'].set_visible(False)\n\n    # Pearson correlation\n    corr = np.corrcoef(X_features[:, col], y_target)[0, 1]\n    ax.set_title(f\"{name} (r = {corr:.2f})\")\n\nplt.suptitle(f\"Feature–Tortuosity Relationships (N = {len(y_target)})\", fontweight='bold', y=1.02)\nplt.tight_layout()\nplt.show()",
    "metadata": {},
    "execution_count": null,
    "outputs": []
   },
   {
    "cell_type": "markdown",
-   "source": "## 3. Training and Evaluating the Surrogate\n\nWe hold out 20% of the data for testing. A Random Forest with 100 trees is a reasonable default \u2014 it handles nonlinear relationships, requires minimal tuning, and provides built-in feature importance estimates.",
+   "source": "## 3. Training and Evaluating the Surrogate\n\nWe hold out 20% of the data for testing. A Random Forest with 100 trees is a reasonable default — it handles nonlinear relationships, requires minimal tuning, and provides built-in feature importance estimates.",
    "metadata": {}
   },
   {
    "cell_type": "code",
-   "source": "X_train, X_test, y_train, y_test = train_test_split(\n    X_features, y_target, test_size=0.2, random_state=42\n)\n\nmodel = RandomForestRegressor(n_estimators=100, random_state=42)\nmodel.fit(X_train, y_train)\n\ny_pred = model.predict(X_test)\nr2 = r2_score(y_test, y_pred)\nmae = mean_absolute_error(y_test, y_pred)\n\nprint(f\"Test set size:  {len(y_test)} samples\")\nprint(f\"R\u00b2 score:       {r2:.3f}\")\nprint(f\"MAE:            {mae:.4f}\")\n\n# --- Evaluation plots ---\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), dpi=120)\n\n# Left: True vs. Predicted\nax1.scatter(y_test, y_pred, color='#1f77b4', s=40, alpha=0.7, edgecolor='black', linewidth=0.5)\nlims = [min(y_test.min(), y_pred.min()) * 0.95, max(y_test.max(), y_pred.max()) * 1.05]\nax1.plot(lims, lims, 'r--', lw=2, label=\"Perfect prediction\")\nax1.set_xlim(lims)\nax1.set_ylim(lims)\nax1.set_xlabel(\"True Tortuosity (OpenImpala)\")\nax1.set_ylabel(\"Predicted Tortuosity (Random Forest)\")\nax1.set_title(f\"True vs. Predicted (R\u00b2 = {r2:.2f})\")\nax1.set_aspect('equal')\nax1.legend()\nax1.grid(True, alpha=0.3)\nax1.spines['top'].set_visible(False)\nax1.spines['right'].set_visible(False)\n\n# Right: Residuals\nresiduals = y_test - y_pred\nax2.scatter(y_test, residuals, color='#ff7f0e', s=40, alpha=0.7, edgecolor='black', linewidth=0.5)\nax2.axhline(0, color='r', linestyle='--', lw=2)\nax2.set_xlabel(\"True Tortuosity\")\nax2.set_ylabel(\"Residual (True - Predicted)\")\nax2.set_title(f\"Residuals (MAE = {mae:.3f})\")\nax2.grid(True, alpha=0.3)\nax2.spines['top'].set_visible(False)\nax2.spines['right'].set_visible(False)\n\nplt.tight_layout()\nplt.show()",
+   "source": "X_train, X_test, y_train, y_test = train_test_split(\n    X_features, y_target, test_size=0.2, random_state=42\n)\n\nmodel = RandomForestRegressor(n_estimators=100, random_state=42)\nmodel.fit(X_train, y_train)\n\ny_pred = model.predict(X_test)\nr2 = r2_score(y_test, y_pred)\nmae = mean_absolute_error(y_test, y_pred)\n\nprint(f\"Test set size:  {len(y_test)} samples\")\nprint(f\"R² score:       {r2:.3f}\")\nprint(f\"MAE:            {mae:.4f}\")\n\n# --- Evaluation plots ---\nfig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5), dpi=120)\n\n# Left: True vs. Predicted\nax1.scatter(y_test, y_pred, color='#1f77b4', s=40, alpha=0.7, edgecolor='black', linewidth=0.5)\nlims = [min(y_test.min(), y_pred.min()) * 0.95, max(y_test.max(), y_pred.max()) * 1.05]\nax1.plot(lims, lims, 'r--', lw=2, label=\"Perfect prediction\")\nax1.set_xlim(lims)\nax1.set_ylim(lims)\nax1.set_xlabel(\"True Tortuosity (OpenImpala)\")\nax1.set_ylabel(\"Predicted Tortuosity (Random Forest)\")\nax1.set_title(f\"True vs. Predicted (R² = {r2:.2f})\")\nax1.set_aspect('equal')\nax1.legend()\nax1.grid(True, alpha=0.3)\nax1.spines['top'].set_visible(False)\nax1.spines['right'].set_visible(False)\n\n# Right: Residuals\nresiduals = y_test - y_pred\nax2.scatter(y_test, residuals, color='#ff7f0e', s=40, alpha=0.7, edgecolor='black', linewidth=0.5)\nax2.axhline(0, color='r', linestyle='--', lw=2)\nax2.set_xlabel(\"True Tortuosity\")\nax2.set_ylabel(\"Residual (True - Predicted)\")\nax2.set_title(f\"Residuals (MAE = {mae:.3f})\")\nax2.grid(True, alpha=0.3)\nax2.spines['top'].set_visible(False)\nax2.spines['right'].set_visible(False)\n\nplt.tight_layout()\nplt.show()",
    "metadata": {},
    "execution_count": null,
    "outputs": []
@@ -75,7 +72,7 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## Scaling Up: From Colab to HPC\n\nThis tutorial ran 200 samples on $32^3$ grids \u2014 a toy dataset to demonstrate the workflow. For a production surrogate, you need larger datasets on larger domains. This is where OpenImpala's exascale architecture pays off.\n\n### Parallelisation Strategy\n\nThere are two levels of parallelism you can exploit:\n\n1. **Within each sample** \u2014 `mpirun -np 32 python label_one_sample.py` uses 32 cores to solve one structure faster. Essential for large domains ($128^3$+).\n2. **Across samples** \u2014 SLURM job arrays run many independent solves simultaneously. This is embarrassingly parallel and scales linearly with cluster size.\n\n```bash\n#!/bin/bash\n#SBATCH --job-name=surrogate_campaign\n#SBATCH --array=0-999          # 1000 independent samples\n#SBATCH --ntasks-per-node=16\n#SBATCH --time=00:30:00\n\n# Each array task generates and solves one structure\nmpirun -np $SLURM_NTASKS python generate_and_label.py \\\n    --sample-id $SLURM_ARRAY_TASK_ID \\\n    --size 128 \\\n    --output-dir ./dataset/\n```\n\nWith 1000 array tasks on a 64-node cluster, you can label 1000 structures at $128^3$ resolution in a single batch submission \u2014 a dataset that would take weeks with a serial solver.\n\n### What to do with a bigger dataset\n\n- **Increase sample count** (500-5000+) for better generalisation.\n- **Use richer features** \u2014 two-point correlation functions, chord-length distributions, Minkowski functionals, or learned CNN embeddings capture geometry more completely than porosity and surface area alone.\n- **Use larger domains** ($64^3$-$128^3$) to reduce finite-size noise (see [Tutorial 3](03_rev_and_uncertainty.ipynb) on REV).\n- **Train on the full D_eff tensor** \u2014 OpenImpala can solve all three directions, giving you $D_{xx}$, $D_{yy}$, $D_{zz}$ per sample for anisotropy-aware surrogates.\n\n**Continue the series:**\n- [Tutorial 6: Topology Optimisation](06_topology_optimisation.ipynb) \u2014 Put OpenImpala inside a design loop to optimise microstructure geometry.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) \u2014 Run production-scale campaigns on a cluster with MPI.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **Surrogate models for porous media:** S. Kench & S. J. Cooper, *Generating three-dimensional structures from a two-dimensional slice with generative adversarial network-based dimensionality reduction*, Nature Machine Intelligence 3, 299-305 (2021). [doi:10.1038/s42256-021-00322-1](https://doi.org/10.1038/s42256-021-00322-1)\n3. **ML for structure-property linkages:** R. Cang et al., *Microstructure representation and reconstruction of heterogeneous materials via deep belief network for computational material design*, J. Mech. Design 139(7), 071404 (2017). [doi:10.1115/1.4036649](https://doi.org/10.1115/1.4036649)\n4. **Random Forests:** L. Breiman, *Random Forests*, Machine Learning 45(1), 5-32 (2001). [doi:10.1023/A:1010933404324](https://doi.org/10.1023/A:1010933404324)\n5. **PoreSpy morphological descriptors:** J. T. Gostick et al., *PoreSpy: A Python toolkit for quantitative analysis of porous media images*, J. Open Source Software 4(37), 1296 (2019). [doi:10.21105/joss.01296](https://doi.org/10.21105/joss.01296)\n6. **Feature importance interpretation:** C. Molnar, *Interpretable Machine Learning*, 2nd ed. (2022). Available at [christophm.github.io/interpretable-ml-book](https://christophm.github.io/interpretable-ml-book/)"
+   "source": "## Scaling Up: From Colab to HPC\n\nThis tutorial ran 200 samples on $32^3$ grids — a toy dataset to demonstrate the workflow. For a production surrogate, you need larger datasets on larger domains. This is where OpenImpala's exascale architecture pays off.\n\n### Parallelisation Strategy\n\nThere are two levels of parallelism you can exploit:\n\n1. **Within each sample** — `mpirun -np 32 python label_one_sample.py` uses 32 cores to solve one structure faster. Essential for large domains ($128^3$+).\n2. **Across samples** — SLURM job arrays run many independent solves simultaneously. This is embarrassingly parallel and scales linearly with cluster size.\n\n```bash\n#!/bin/bash\n#SBATCH --job-name=surrogate_campaign\n#SBATCH --array=0-999          # 1000 independent samples\n#SBATCH --ntasks-per-node=16\n#SBATCH --time=00:30:00\n\n# Each array task generates and solves one structure\nmpirun -np $SLURM_NTASKS python generate_and_label.py \\\n    --sample-id $SLURM_ARRAY_TASK_ID \\\n    --size 128 \\\n    --output-dir ./dataset/\n```\n\nWith 1000 array tasks on a 64-node cluster, you can label 1000 structures at $128^3$ resolution in a single batch submission — a dataset that would take weeks with a serial solver.\n\n### What to do with a bigger dataset\n\n- **Increase sample count** (500-5000+) for better generalisation.\n- **Use richer features** — two-point correlation functions, chord-length distributions, Minkowski functionals, or learned CNN embeddings capture geometry more completely than porosity and surface area alone.\n- **Use larger domains** ($64^3$-$128^3$) to reduce finite-size noise (see [Tutorial 3](03_rev_and_uncertainty.ipynb) on REV).\n- **Train on the full D_eff tensor** — OpenImpala can solve all three directions, giving you $D_{xx}$, $D_{yy}$, $D_{zz}$ per sample for anisotropy-aware surrogates.\n\n**Continue the series:**\n- [Tutorial 6: Topology Optimisation](06_topology_optimisation.ipynb) — Put OpenImpala inside a design loop to optimise microstructure geometry.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) — Run production-scale campaigns on a cluster with MPI.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **Surrogate models for porous media:** S. Kench & S. J. Cooper, *Generating three-dimensional structures from a two-dimensional slice with generative adversarial network-based dimensionality reduction*, Nature Machine Intelligence 3, 299-305 (2021). [doi:10.1038/s42256-021-00322-1](https://doi.org/10.1038/s42256-021-00322-1)\n3. **ML for structure-property linkages:** R. Cang et al., *Microstructure representation and reconstruction of heterogeneous materials via deep belief network for computational material design*, J. Mech. Design 139(7), 071404 (2017). [doi:10.1115/1.4036649](https://doi.org/10.1115/1.4036649)\n4. **Random Forests:** L. Breiman, *Random Forests*, Machine Learning 45(1), 5-32 (2001). [doi:10.1023/A:1010933404324](https://doi.org/10.1023/A:1010933404324)\n5. **PoreSpy morphological descriptors:** J. T. Gostick et al., *PoreSpy: A Python toolkit for quantitative analysis of porous media images*, J. Open Source Software 4(37), 1296 (2019). [doi:10.21105/joss.01296](https://doi.org/10.21105/joss.01296)\n6. **Feature importance interpretation:** C. Molnar, *Interpretable Machine Learning*, 2nd ed. (2022). Available at [christophm.github.io/interpretable-ml-book](https://christophm.github.io/interpretable-ml-book/)"
   }
  ],
  "metadata": {
@@ -91,4 +88,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/tutorials/06_topology_optimisation.ipynb b/tutorials/06_topology_optimisation.ipynb
index bec96372..b668c30f 100644
--- a/tutorials/06_topology_optimisation.ipynb
+++ b/tutorials/06_topology_optimisation.ipynb
@@ -3,14 +3,14 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/06_topology_optimisation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 6 of 7: Topology Optimisation\n\n*OpenImpala Tutorial Series \u2014 From first solve to HPC deployment*\n\n---\n\nCan we rearrange voxels to minimise tortuosity at a fixed porosity? This tutorial uses OpenImpala as the cost-function evaluator inside two different optimisation strategies and compares their results.\n\n**Why OpenImpala for optimisation?** Each iteration requires a full 3D PDE solve to evaluate the objective function. A typical optimisation campaign needs hundreds to thousands of evaluations. With a serial solver, this is painfully slow \u2014 a 100-iteration loop on a $64^3$ grid might take hours. OpenImpala's MPI-parallel solver makes each evaluation fast enough that meaningful optimisation becomes practical:\n\n| Grid size | Evaluations | Serial solver | OpenImpala (32 cores) |\n|-----------|------------|--------------|----------------------|\n| $24^3$ | 100 | ~10 min | ~30 sec |\n| $64^3$ | 500 | ~12 hours | ~20 min |\n| $128^3$ | 1000 | days | ~3 hours |\n\nOn HPC, you can also run **population-based** optimisers (genetic algorithms, particle swarm) where each member of the population is evaluated independently \u2014 scaling linearly with cluster size via job arrays or MPI task farming.\n\nBoth algorithms below use the same mutation operator \u2014 swap a random solid voxel with a random pore voxel, so porosity stays exactly fixed \u2014 but differ in their acceptance rule:\n\n- **Greedy hill-climbing**: accept a swap only if it improves (lowers) the tortuosity. Simple, but gets trapped in local optima.\n- **Simulated annealing (SA)**: sometimes accept worsening swaps early on (controlled by a \"temperature\" that cools over time), allowing the search to escape local optima before converging.\n\nBy running both on the same starting structure, we can see the practical difference.\n\n**What you will learn:**\n1. Use OpenImpala as a physics evaluator inside an optimisation loop.\n2. Implement greedy and simulated annealing acceptance rules.\n3. Compare convergence behaviour and final structures.\n4. Analyse what the optimiser found relative to the theoretical bound.\n\n**Prerequisites:** [Tutorial 1](01_hello_openimpala.ipynb) for the core API. [Tutorial 5](05_surrogate_modelling.ipynb) introduced the idea of using OpenImpala in automated pipelines \u2014 here we close the loop with an optimiser."
+   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/06_topology_optimisation.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 6 of 7: Topology Optimisation\n\n*OpenImpala Tutorial Series — From first solve to HPC deployment*\n\n---\n\nCan we rearrange voxels to minimise tortuosity at a fixed porosity? This tutorial uses OpenImpala as the cost-function evaluator inside two different optimisation strategies and compares their results.\n\n**Why OpenImpala for optimisation?** Each iteration requires a full 3D PDE solve to evaluate the objective function. A typical optimisation campaign needs hundreds to thousands of evaluations. With a serial solver, this is painfully slow — a 100-iteration loop on a $64^3$ grid might take hours. OpenImpala's MPI-parallel solver makes each evaluation fast enough that meaningful optimisation becomes practical:\n\n| Grid size | Evaluations | Serial solver | OpenImpala (32 cores) |\n|-----------|------------|--------------|----------------------|\n| $24^3$ | 100 | ~10 min | ~30 sec |\n| $64^3$ | 500 | ~12 hours | ~20 min |\n| $128^3$ | 1000 | days | ~3 hours |\n\nOn HPC, you can also run **population-based** optimisers (genetic algorithms, particle swarm) where each member of the population is evaluated independently — scaling linearly with cluster size via job arrays or MPI task farming.\n\nBoth algorithms below use the same mutation operator — swap a random solid voxel with a random pore voxel, so porosity stays exactly fixed — but differ in their acceptance rule:\n\n- **Greedy hill-climbing**: accept a swap only if it improves (lowers) the tortuosity. Simple, but gets trapped in local optima.\n- **Simulated annealing (SA)**: sometimes accept worsening swaps early on (controlled by a \"temperature\" that cools over time), allowing the search to escape local optima before converging.\n\nBy running both on the same starting structure, we can see the practical difference.\n\n**What you will learn:**\n1. Use OpenImpala as a physics evaluator inside an optimisation loop.\n2. Implement greedy and simulated annealing acceptance rules.\n3. Compare convergence behaviour and final structures.\n4. Analyse what the optimiser found relative to the theoretical bound.\n\n**Prerequisites:** [Tutorial 1](01_hello_openimpala.ipynb) for the core API. [Tutorial 5](05_surrogate_modelling.ipynb) introduced the idea of using OpenImpala in automated pipelines — here we close the loop with an optimiser."
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "!pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 nvidia-cuda-runtime-cu12 nvidia-curand-cu12 nvidia-cusparse-cu12 nvidia-cublas-cu12 matplotlib"
+   "source": "!pip install -q openimpala matplotlib"
   },
   {
    "cell_type": "code",
@@ -36,7 +36,7 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## 1. Convergence Comparison\n\nThe plot below shows both the \"current state\" tortuosity (which can go up for SA) and the \"best found so far\" for each method. Notice that SA's current state wanders upward early on \u2014 it is intentionally exploring worse configurations \u2014 but its best-so-far can ultimately surpass the greedy result by escaping local optima."
+   "source": "## 1. Convergence Comparison\n\nThe plot below shows both the \"current state\" tortuosity (which can go up for SA) and the \"best found so far\" for each method. Notice that SA's current state wanders upward early on — it is intentionally exploring worse configurations — but its best-so-far can ultimately surpass the greedy result by escaping local optima."
   },
   {
    "cell_type": "code",
@@ -71,7 +71,7 @@
   },
   {
    "cell_type": "markdown",
-   "source": "## Scaling Up: From Toy Demo to Production Optimisation\n\nThis tutorial is a **toy demonstration** \u2014 100 iterations on a $24^3$ grid with single-voxel swaps. But the same pattern scales directly to production runs on HPC, where OpenImpala's throughput transforms what is feasible:\n\n### Scaling the inner loop (faster evaluations)\n\nLaunch the same script with MPI to speed up each PDE solve:\n```bash\nmpirun -np 64 python topology_opt.py --size 128 --iterations 5000\n```\n\n### Scaling the outer loop (more parallel searches)\n\nRun many independent SA trajectories simultaneously via SLURM job arrays, then pick the best:\n```bash\n#SBATCH --array=0-31  # 32 independent SA runs with different random seeds\nmpirun -np 16 python topology_opt.py --seed $SLURM_ARRAY_TASK_ID\n```\n\nThis \"multi-start\" strategy is embarrassingly parallel and particularly effective for SA, where different random seeds explore different regions of configuration space.\n\n### Production-grade improvements\n\n- **Increase iterations** (1,000-10,000+) and domain size ($64^3$-$128^3$) for meaningful results.\n- **Use smarter moves** \u2014 swap clusters of voxels, or use gradient-based sensitivity information to guide which voxels to swap.\n- **Tune SA parameters** \u2014 the initial temperature and cooling schedule strongly affect performance. Adaptive schemes (e.g., reheating when the acceptance rate drops too low) are common in practice.\n- **Optimise in multiple directions** \u2014 minimise the maximum tortuosity across X, Y, and Z for isotropic transport.\n- **Add constraints** \u2014 in battery electrode design, for example, you may need to maintain mechanical connectivity of the solid phase while optimising ionic transport through the pore phase.\n- **Combine with surrogates** \u2014 use the surrogate from [Tutorial 5](05_surrogate_modelling.ipynb) to pre-screen candidates cheaply, then validate the best ones with full OpenImpala solves.\n\n**Continue the series:**\n- [Tutorial 5: Surrogate Modelling](05_surrogate_modelling.ipynb) \u2014 Train a fast surrogate to replace the PDE solve, enabling much larger optimisation campaigns.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) \u2014 Deploy production-scale optimisation campaigns on a cluster.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **Topology optimisation for transport:** J. K. Guest & J. H. Prevost, *Topology optimization of creeping fluid flows using a Darcy-Stokes finite element*, Int. J. Numer. Methods Eng. 66(3), 461-484 (2006). [doi:10.1002/nme.1560](https://doi.org/10.1002/nme.1560)\n3. **Simulated annealing:** S. Kirkpatrick, C. D. Gelatt Jr., & M. P. Vecchi, *Optimization by simulated annealing*, Science 220(4598), 671-680 (1983). [doi:10.1126/science.220.4598.671](https://doi.org/10.1126/science.220.4598.671)\n4. **Electrode microstructure design:** G. M. Goldin et al., *Three-dimensional particle-resolved models of Li-ion batteries to assist the evaluation of empirical parameters in one-dimensional models*, Electrochimica Acta 64, 118-129 (2012). [doi:10.1016/j.electacta.2011.12.119](https://doi.org/10.1016/j.electacta.2011.12.119)\n5. **Voxel-based topology optimisation:** O. Sigmund, *A 99 line topology optimization code written in Matlab*, Structural and Multidisciplinary Optimization 21(2), 120-127 (2001). [doi:10.1007/s001580050176](https://doi.org/10.1007/s001580050176)\n6. **Hashin-Shtrikman bounds:** Z. Hashin & S. Shtrikman, *A variational approach to the theory of the effective magnetic permeability of multiphase materials*, J. Applied Physics 33(10), 3125-3131 (1962). [doi:10.1063/1.1728579](https://doi.org/10.1063/1.1728579)",
+   "source": "## Scaling Up: From Toy Demo to Production Optimisation\n\nThis tutorial is a **toy demonstration** — 100 iterations on a $24^3$ grid with single-voxel swaps. But the same pattern scales directly to production runs on HPC, where OpenImpala's throughput transforms what is feasible:\n\n### Scaling the inner loop (faster evaluations)\n\nLaunch the same script with MPI to speed up each PDE solve:\n```bash\nmpirun -np 64 python topology_opt.py --size 128 --iterations 5000\n```\n\n### Scaling the outer loop (more parallel searches)\n\nRun many independent SA trajectories simultaneously via SLURM job arrays, then pick the best:\n```bash\n#SBATCH --array=0-31  # 32 independent SA runs with different random seeds\nmpirun -np 16 python topology_opt.py --seed $SLURM_ARRAY_TASK_ID\n```\n\nThis \"multi-start\" strategy is embarrassingly parallel and particularly effective for SA, where different random seeds explore different regions of configuration space.\n\n### Production-grade improvements\n\n- **Increase iterations** (1,000-10,000+) and domain size ($64^3$-$128^3$) for meaningful results.\n- **Use smarter moves** — swap clusters of voxels, or use gradient-based sensitivity information to guide which voxels to swap.\n- **Tune SA parameters** — the initial temperature and cooling schedule strongly affect performance. Adaptive schemes (e.g., reheating when the acceptance rate drops too low) are common in practice.\n- **Optimise in multiple directions** — minimise the maximum tortuosity across X, Y, and Z for isotropic transport.\n- **Add constraints** — in battery electrode design, for example, you may need to maintain mechanical connectivity of the solid phase while optimising ionic transport through the pore phase.\n- **Combine with surrogates** — use the surrogate from [Tutorial 5](05_surrogate_modelling.ipynb) to pre-screen candidates cheaply, then validate the best ones with full OpenImpala solves.\n\n**Continue the series:**\n- [Tutorial 5: Surrogate Modelling](05_surrogate_modelling.ipynb) — Train a fast surrogate to replace the PDE solve, enabling much larger optimisation campaigns.\n- [Tutorial 7: Scaling to HPC](07_hpc_scaling.ipynb) — Deploy production-scale optimisation campaigns on a cluster.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **Topology optimisation for transport:** J. K. Guest & J. H. Prevost, *Topology optimization of creeping fluid flows using a Darcy-Stokes finite element*, Int. J. Numer. Methods Eng. 66(3), 461-484 (2006). [doi:10.1002/nme.1560](https://doi.org/10.1002/nme.1560)\n3. **Simulated annealing:** S. Kirkpatrick, C. D. Gelatt Jr., & M. P. Vecchi, *Optimization by simulated annealing*, Science 220(4598), 671-680 (1983). [doi:10.1126/science.220.4598.671](https://doi.org/10.1126/science.220.4598.671)\n4. **Electrode microstructure design:** G. M. Goldin et al., *Three-dimensional particle-resolved models of Li-ion batteries to assist the evaluation of empirical parameters in one-dimensional models*, Electrochimica Acta 64, 118-129 (2012). [doi:10.1016/j.electacta.2011.12.119](https://doi.org/10.1016/j.electacta.2011.12.119)\n5. **Voxel-based topology optimisation:** O. Sigmund, *A 99 line topology optimization code written in Matlab*, Structural and Multidisciplinary Optimization 21(2), 120-127 (2001). [doi:10.1007/s001580050176](https://doi.org/10.1007/s001580050176)\n6. **Hashin-Shtrikman bounds:** Z. Hashin & S. Shtrikman, *A variational approach to the theory of the effective magnetic permeability of multiphase materials*, J. Applied Physics 33(10), 3125-3131 (1962). [doi:10.1063/1.1728579](https://doi.org/10.1063/1.1728579)",
    "metadata": {}
   }
  ],
@@ -88,4 +88,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file
diff --git a/tutorials/07_hpc_scaling.ipynb b/tutorials/07_hpc_scaling.ipynb
index 2c40d7eb..7c3cb77f 100644
--- a/tutorials/07_hpc_scaling.ipynb
+++ b/tutorials/07_hpc_scaling.ipynb
@@ -3,7 +3,7 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/07_hpc_scaling.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 7 of 7: Scaling from Laptop to HPC\n\n*OpenImpala Tutorial Series \u2014 From first solve to HPC deployment*\n\n---\n\nThe previous tutorials all ran on a single machine. Real synchrotron datasets, however, can be $2000^3$ voxels or larger \u2014 too big to fit in a single node's memory, let alone solve interactively.\n\nOpenImpala is built on AMReX and MPI, so the same Python script you wrote in earlier tutorials can be launched with `mpirun` across many nodes. For very large files, OpenImpala's C++ readers can also bypass Python entirely, reading TIFF and HDF5 data in parallel directly from disk.\n\nThis tutorial combines **reference material** (MPI scripts, SLURM templates, reader tables) with **executable benchmarks** you can run right now \u2014 a problem-size scaling study and a solver comparison that produces real timing data.\n\n**What you will learn:**\n1. How OpenImpala distributes work across MPI ranks.\n2. The role of `max_grid_size` in domain decomposition.\n3. A complete MPI-ready Python script.\n4. How to use the C++ parallel readers for out-of-core I/O.\n5. A ready-to-use SLURM batch submission script.\n6. How solve time scales with problem size (executable benchmark).\n7. Which HYPRE solver to choose for your problem (executable comparison).\n\n**Prerequisites:** Familiarity with the OpenImpala Python API ([Tutorials 1-2](01_hello_openimpala.ipynb)) and access to a multi-core machine or HPC cluster. All earlier tutorials in the series can inform what scripts you deploy at scale."
+   "source": "<a href=\"https://colab.research.google.com/github/BASE-Laboratory/OpenImpala/blob/master/tutorials/07_hpc_scaling.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n\n# Tutorial 7 of 7: Scaling from Laptop to HPC\n\n*OpenImpala Tutorial Series — From first solve to HPC deployment*\n\n---\n\nThe previous tutorials all ran on a single machine. Real synchrotron datasets, however, can be $2000^3$ voxels or larger — too big to fit in a single node's memory, let alone solve interactively.\n\nOpenImpala is built on AMReX and MPI, so the same Python script you wrote in earlier tutorials can be launched with `mpirun` across many nodes. For very large files, OpenImpala's C++ readers can also bypass Python entirely, reading TIFF and HDF5 data in parallel directly from disk.\n\nThis tutorial combines **reference material** (MPI scripts, SLURM templates, reader tables) with **executable benchmarks** you can run right now — a problem-size scaling study and a solver comparison that produces real timing data.\n\n**What you will learn:**\n1. How OpenImpala distributes work across MPI ranks.\n2. The role of `max_grid_size` in domain decomposition.\n3. A complete MPI-ready Python script.\n4. How to use the C++ parallel readers for out-of-core I/O.\n5. A ready-to-use SLURM batch submission script.\n6. How solve time scales with problem size (executable benchmark).\n7. Which HYPRE solver to choose for your problem (executable comparison).\n\n**Prerequisites:** Familiarity with the OpenImpala Python API ([Tutorials 1-2](01_hello_openimpala.ipynb)) and access to a multi-core machine or HPC cluster. All earlier tutorials in the series can inform what scripts you deploy at scale."
   },
   {
    "cell_type": "markdown",
@@ -235,14 +235,14 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## 6. Executable Scaling Study: Problem Size vs. Wall Time\n\nEven on a single machine, we can measure how solve time scales with problem size. This is a **weak-scaling proxy** \u2014 we increase the domain while keeping one rank, which shows the computational complexity of the solver. On a real cluster with proportionally more MPI ranks, wall time would stay roughly constant (ideal weak scaling).\n\nThis cell actually runs and produces real timing data."
+   "source": "## 6. Executable Scaling Study: Problem Size vs. Wall Time\n\nEven on a single machine, we can measure how solve time scales with problem size. This is a **weak-scaling proxy** — we increase the domain while keeping one rank, which shows the computational complexity of the solver. On a real cluster with proportionally more MPI ranks, wall time would stay roughly constant (ideal weak scaling).\n\nThis cell actually runs and produces real timing data."
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "!pip install -q openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 nvidia-cuda-runtime-cu12 nvidia-curand-cu12 nvidia-cusparse-cu12 nvidia-cublas-cu12 porespy matplotlib"
+   "source": "!pip install -q openimpala porespy matplotlib"
   },
   {
    "cell_type": "code",
@@ -266,7 +266,7 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## Summary\n\n| Scenario | Approach |\n|----------|----------|\n| **Laptop / Colab** | `pip install openimpala-cuda --find-links https://github.com/BASE-Laboratory/OpenImpala/releases/expanded_assets/v4.0.6 nvidia-cuda-runtime-cu12 nvidia-curand-cu12 nvidia-cusparse-cu12 nvidia-cublas-cu12`, use NumPy arrays directly |\n| **Small cluster (1-16 cores)** | `mpirun -np 16 python script.py` with NumPy loading |\n| **Large cluster (16-128+ cores)** | `mpirun -np 128 python script.py` with `oi.read_image()` for parallel I/O |\n| **HPC without Python** | `mpirun -np 128 Diffusion ./inputs` (pure C++ application) |\n\n### Solver Quick Reference\n\n| Solver | Best For | Notes |\n|--------|----------|-------|\n| **FlexGMRES** | General use | Robust default, handles non-symmetric systems |\n| **PCG** | Symmetric problems | Fastest when applicable |\n| **SMG/PFMG** | Structured grids | Geometric multigrid, excellent on regular domains |\n| **BiCGSTAB** | Non-symmetric | Alternative to GMRES |\n\nThe API is the same at every scale \u2014 only the launch command changes. The scaling study and solver comparison in Sections 6-7 provide concrete data to guide your deployment decisions.\n\n**Back to the beginning:**\n- [Tutorial 1: Getting Started](01_hello_openimpala.ipynb) \u2014 Core workflow with synthetic microstructures.\n- [Tutorial 2: From 3D Image to Device Model](02_digital_twin.ipynb) \u2014 Load real CT scans and export to PyBaMM.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **AMReX:** W. Zhang et al., *AMReX: a framework for block-structured adaptive mesh refinement*, J. Open Source Software 4(37), 1370 (2019). [doi:10.21105/joss.01370](https://doi.org/10.21105/joss.01370)\n3. **AMReX scaling:** A. S. Almgren et al., *Block-structured adaptive mesh refinement \u2014 theory, implementation and application*, J. Comput. Physics 332, 1-28 (2017). [doi:10.1016/j.jcp.2016.12.073](https://doi.org/10.1016/j.jcp.2016.12.073)\n4. **HYPRE:** R. D. Falgout & U. M. Yang, *hypre: A library of high performance preconditioners*, Computational Science \u2014 ICCS 2002, LNCS 2331, pp. 632-641 (2002). [doi:10.1007/3-540-47789-6_66](https://doi.org/10.1007/3-540-47789-6_66)\n5. **Parallel HDF5:** The HDF Group, *HDF5 \u2014 Parallel I/O*, [hdfgroup.org](https://www.hdfgroup.org/solutions/hdf5/)\n6. **Apptainer/Singularity for HPC:** G. M. Kurtzer et al., *Singularity: Scientific containers for mobility of compute*, PLoS ONE 12(5), e0177459 (2017). [doi:10.1371/journal.pone.0177459](https://doi.org/10.1371/journal.pone.0177459)\n7. **MPI standard:** Message Passing Interface Forum, *MPI: A Message-Passing Interface Standard, Version 4.0* (2021). [mpi-forum.org](https://www.mpi-forum.org/docs/mpi-4.0/mpi40-report.pdf)"
+   "source": "## Summary\n\n| Scenario | Approach |\n|----------|----------|\n| **Laptop / Colab** | `pip install openimpala`, use NumPy arrays directly |\n| **Small cluster (1-16 cores)** | `mpirun -np 16 python script.py` with NumPy loading |\n| **Large cluster (16-128+ cores)** | `mpirun -np 128 python script.py` with `oi.read_image()` for parallel I/O |\n| **HPC without Python** | `mpirun -np 128 Diffusion ./inputs` (pure C++ application) |\n\n### Solver Quick Reference\n\n| Solver | Best For | Notes |\n|--------|----------|-------|\n| **FlexGMRES** | General use | Robust default, handles non-symmetric systems |\n| **PCG** | Symmetric problems | Fastest when applicable |\n| **SMG/PFMG** | Structured grids | Geometric multigrid, excellent on regular domains |\n| **BiCGSTAB** | Non-symmetric | Alternative to GMRES |\n\nThe API is the same at every scale — only the launch command changes. The scaling study and solver comparison in Sections 6-7 provide concrete data to guide your deployment decisions.\n\n**Back to the beginning:**\n- [Tutorial 1: Getting Started](01_hello_openimpala.ipynb) — Core workflow with synthetic microstructures.\n- [Tutorial 2: From 3D Image to Device Model](02_digital_twin.ipynb) — Load real CT scans and export to PyBaMM.\n\n---\n\n## References & Further Reading\n\n1. **OpenImpala:** S. Mayner, F. Ciucci, *OpenImpala: open-source computational framework for microstructural analysis of 3D tomography data*, SoftwareX (2024). [GitHub](https://github.com/BASE-Laboratory/OpenImpala)\n2. **AMReX:** W. Zhang et al., *AMReX: a framework for block-structured adaptive mesh refinement*, J. Open Source Software 4(37), 1370 (2019). [doi:10.21105/joss.01370](https://doi.org/10.21105/joss.01370)\n3. **AMReX scaling:** A. S. Almgren et al., *Block-structured adaptive mesh refinement — theory, implementation and application*, J. Comput. Physics 332, 1-28 (2017). [doi:10.1016/j.jcp.2016.12.073](https://doi.org/10.1016/j.jcp.2016.12.073)\n4. **HYPRE:** R. D. Falgout & U. M. Yang, *hypre: A library of high performance preconditioners*, Computational Science — ICCS 2002, LNCS 2331, pp. 632-641 (2002). [doi:10.1007/3-540-47789-6_66](https://doi.org/10.1007/3-540-47789-6_66)\n5. **Parallel HDF5:** The HDF Group, *HDF5 — Parallel I/O*, [hdfgroup.org](https://www.hdfgroup.org/solutions/hdf5/)\n6. **Apptainer/Singularity for HPC:** G. M. Kurtzer et al., *Singularity: Scientific containers for mobility of compute*, PLoS ONE 12(5), e0177459 (2017). [doi:10.1371/journal.pone.0177459](https://doi.org/10.1371/journal.pone.0177459)\n7. **MPI standard:** Message Passing Interface Forum, *MPI: A Message-Passing Interface Standard, Version 4.0* (2021). [mpi-forum.org](https://www.mpi-forum.org/docs/mpi-4.0/mpi40-report.pdf)"
   }
  ],
  "metadata": {
@@ -282,4 +282,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
+}
\ No newline at end of file