diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..2796cca
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,14 @@
+.git
+.gitignore
+logs
+static
+data/wsi
+tmp
+**/__pycache__/
+*.pyc
+*.egg-info
+.env
+.venv
+.pytest_cache
+.idea
+.vscode
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..9ec8383
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,3 @@
+* text=auto eol=lf
+*.sh text eol=lf
+Dockerfile text eol=lf
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
new file mode 100644
index 0000000..87e1dc5
--- /dev/null
+++ b/.github/workflows/docker.yml
@@ -0,0 +1,79 @@
+name: Build and publish Docker image
+
+on:
+  push:
+    branches: [main]
+    tags: ['v*']
+  pull_request:
+    paths:
+      - Dockerfile
+      - requirements.txt
+      - setup.py
+      - .dockerignore
+      - .github/workflows/docker.yml
+  workflow_dispatch:
+
+concurrency:
+  group: docker-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  build-and-push:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set lowercase image name
+        id: img
+        run: echo "name=ghcr.io/${GITHUB_REPOSITORY_OWNER,,}/ectil-inference" >> "$GITHUB_OUTPUT"
+
+      - uses: docker/setup-buildx-action@v3
+
+      # Same-repo PRs get a publishable :pr-N tag so reviewers can `docker pull`
+      # the branch instead of building locally. Forked PRs stay build-only —
+      # they don't get a writable GITHUB_TOKEN anyway, and we don't want
+      # unreviewed fork code pushing tags to our registry.
+      - name: Decide whether to push
+        id: push
+        run: |
+          if [[ "${{ github.event_name }}" == "pull_request" \
+                && "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]]; then
+            echo "enabled=false" >> "$GITHUB_OUTPUT"
+          else
+            echo "enabled=true" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Log in to GHCR
+        if: steps.push.outputs.enabled == 'true'
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Compute tags
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ steps.img.outputs.name }}
+          tags: |
+            type=ref,event=branch
+            type=ref,event=pr
+            type=semver,pattern={{version}}
+            type=semver,pattern={{major}}.{{minor}}
+            type=sha,format=short
+            type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
+
+      - name: Build (and push for non-PRs and same-repo PRs)
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64
+          push: ${{ steps.push.outputs.enabled == 'true' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/.gitignore b/.gitignore
index 54b5ab7..381a0bf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 # project specific
 pyrightconfig.json
 data/wsi
+data/inference_output
 model_zoo/**/*.ckpt
 model_zoo/**/*.pth
 tmp
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..c676142
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,69 @@
+# ECTIL inference image.
+#
+# Get the image — either pull the published one or build it yourself:
+#   docker pull ghcr.io/nki-ai/ectil-inference:latest
+#   # or
+#   docker build -t ghcr.io/nki-ai/ectil-inference:latest .
+#
+# Run (mount the WSI, the weights, and an output directory):
+#   docker run --rm \
+#     -v /path/to/slides:/input:ro \
+#     -v /path/to/weights:/weights:ro \
+#     -v /path/to/output:/output \
+#     ghcr.io/nki-ai/ectil-inference:latest \
+#       --wsi /input/slide.svs \
+#       --classifier-weights /weights/ectil_fold_0_weights_only.ckpt \
+#       --retccl-weights /weights/retccl_best_ckpt.pth \
+#       --output /output
+#
+# Add `--gpus all` to `docker run` and `--device cuda` to the command for GPU.
+#
+# Weights are NOT bundled in the image; mount them at runtime.
+#   - ECTIL classifier: https://files.aiforoncology.nl/ectil  (see model_zoo/ectil/tcga/readme.md)
+#   - RetCCL encoder:   see model_zoo/retccl/readme.md
+# If --retccl-weights is omitted it defaults to /app/model_zoo/retccl/retccl_best_ckpt.pth
+# or the RETCCL_WEIGHTS environment variable.
+
+# Pinned (not :latest) so the build is reproducible. The :latest tag moved to
+# conda 26.x in April 2026, where the conda-forge solve for the WSI libs below
+# can drag GraalPy into the env and break the pip install of torch==2.4.1
+# ("Could not find a version that satisfies the requirement torch==2.4.1").
+FROM continuumio/miniconda3:24.11.1-0
+
+ENV DEBIAN_FRONTEND=noninteractive
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Python 3.10.9 plus the WSI system libraries via conda-forge (mirrors README install).
+# Force the CPython build of python and keep it pinned across the second install
+# so newer conda solvers can't swap it for graalpy when resolving conda-forge deps
+# (that swap silently breaks the torch==2.4.1 pip install in the next layer).
+RUN conda create -y -n ectil -c conda-forge "python=3.10.9=*_cpython" \
+    && echo "python 3.10.9" > /opt/conda/envs/ectil/conda-meta/pinned \
+    && conda install -y -n ectil -c conda-forge openslide pixman libvips \
+    && conda clean -afy
+
+ENV PATH=/opt/conda/envs/ectil/bin:$PATH
+ENV CONDA_DEFAULT_ENV=ectil
+
+WORKDIR /app
+
+# Install Python dependencies first for better layer caching.
+# Pin the build toolchain as a matched set: an old pip (23.3.2) paired with a
+# newer setuptools whose `_core_metadata` calls `canonicalize_version(..., strip_trailing_zero=)`
+# needs a `packaging` >= 23.2 that actually has that kwarg, otherwise the editable
+# install of this package below dies with
+#   TypeError: canonicalize_version() got an unexpected keyword argument 'strip_trailing_zero'
+# Pinning setuptools/wheel/packaging together keeps the toolchain self-consistent.
+COPY requirements.txt setup.py ./
+RUN python -m pip install --no-cache-dir \
+        pip==23.3.2 setuptools==69.5.1 wheel==0.43.0 packaging==24.0 \
+    && python -m pip install --no-cache-dir -r requirements.txt
+
+# Install the ectil package itself.
+COPY . .
+RUN python -m pip install --no-cache-dir --no-deps -e .
+
+ENTRYPOINT ["python", "-m", "ectil.inference"]
+CMD ["--help"]
diff --git a/README.md b/README.md
index b6dce5b..b708bc7 100644
--- a/README.md
+++ b/README.md
@@ -12,78 +12,143 @@
 
 ## Description
 
+This is the repository with code related to the paper
 
+**ECTIL: Label-efficient Computational Tumour Infiltrating Lymphocyte (TIL) assessment in breast cancer: Multicentre validation in 2,340 patients with breast cancer" (publication and DOI pending)**.
 
-This is the repository with code related to the paper 
+ECTIL scores stromal TILs directly from a breast cancer whole-slide image (WSI): tissue mask → foreground tiling → RetCCL feature extraction → ECTIL regression. The trained models are available in the [model zoo](model_zoo/ectil/tcga/readme.md). The sections below cover running inference on your own slides, integrating ECTIL into your own pipeline, and reproducing the manuscript results on the TCGA cohort.
 
-**ECTIL: Label-efficient Computational Tumour Infiltrating Lymphocyte (TIL) assessment in breast cancer: Multicentre validation in 2,340 patients with breast cancer" (publication and DOI pending)**. 
+![Main model figure of ECTIL](static/images/model_figure.jpg)
 
-Below we show how to reproduce training, validation, and internal testing on the TCGA cohort. The trained model is available in the model zoo, and a script is available to infer the model on any WSI. Additionally, a minimal example is provided to use a pre-trained ECTIL model in your own custom pipeline. Main scripts to reproduce the analyses in the manuscript are also available.
+## Quick start: infer on a WSI
 
-![Main model figure of ECTIL](static/images/model_figure.jpg)
+The end-to-end entry point [`ectil/inference.py`](ectil/inference.py) runs the whole pipeline on a WSI (tissue mask → foreground tiling → RetCCL features → ECTIL). RetCCL is loaded automatically; you only provide the WSI and the ECTIL classifier weights. Download the weights first: [ECTIL classifier](model_zoo/ectil/tcga/readme.md) and [RetCCL encoder](model_zoo/retccl/readme.md).
+
+### Easiest: end-to-end smoke test
+
+[`tools/infer/run_demo.sh`](tools/infer/run_demo.sh) downloads the RetCCL and ECTIL weights and a handful of public TCGA-BRCA slides, builds the Docker image, runs both single-slide and directory inference, and checks the outputs. Run it to confirm your setup works end to end:
+
+```bash
+~/ectil$ ./tools/infer/run_demo.sh
+```
+
+### One WSI with Docker (no local Python needed)
+
+Weights are not bundled in the image; mount them at runtime.
+
+Either pull the pre-built image from GitHub Container Registry, or build it locally:
+
+```bash
+# Option A — pull (linux/amd64). :latest tracks main; pin a release tag for reproducibility.
+~$ docker pull ghcr.io/nki-ai/ectil-inference:latest
+
+# Option B — build from the repo.
+~/ectil$ docker build -t ghcr.io/nki-ai/ectil-inference:latest .
+```
+
+Then run:
+
+```bash
+~/ectil$ docker run --rm \
+    -v /path/to/slides:/input:ro \
+    -v /path/to/weights:/weights:ro \
+    -v /path/to/output:/output \
+    ghcr.io/nki-ai/ectil-inference:latest \
+        --wsi /input/slide.svs \
+        --classifier-weights /weights/ectil_fold_0_weights_only.ckpt \
+        --retccl-weights /weights/retccl_best_ckpt.pth \
+        --output /output
+```
+
+Add `--gpus all` to `docker run` and `--device cuda` to the command for GPU. A runnable wrapper is provided in [`tools/infer/infer_docker.sh`](tools/infer/infer_docker.sh). Published image tags are listed at [ghcr.io/nki-ai/ectil-inference](https://github.com/NKI-AI/ectil/pkgs/container/ectil-inference): `:latest` follows `main`, `:vX.Y.Z` is published on git tags, and `:sha-<short>` exists per commit for forensic pinning.
+
+### Directly, without Docker
+
+After [installing the dependencies](#installation):
+
+```bash
+~/ectil$ python -m ectil.inference \
+    --wsi /path/to/slide.svs \
+    --classifier-weights model_zoo/ectil/tcga/fold_0/epoch_065_step_858_weights_only.ckpt \
+    --retccl-weights model_zoo/retccl/retccl_best_ckpt.pth \
+    --output /path/to/output
+```
+
+> **Slides without an embedded spacing** (many TCGA SVS) otherwise raise `UnsupportedSlideError`. Pass `--overwrite-mpp 0.25` (the native micron-per-pixel of TCGA 40x diagnostic slides) to set the spacing explicitly.
 
-## Main files of interest
+`--wsi` accepts either a single slide or a directory of slides (recursively globbed by extension, including `.mrxs`); failed slides are skipped and recorded rather than aborting the run.
 
-### TILs scores
-The TILs scores for TCGA samples are available under [data/clini/tcga_bc_tils.csv](data/clini/tcga_bc_tils.csv), and may be used in future research.
+### What you get
 
-### Scripts for preprocessing and feature extraction
-The scripts for foreground selection and tile- and feature extraction is provided, and can be run with [tools/extract/retccl/extract_retccl_tcga_bc.sh](tools/extract/retccl/extract_retccl_tcga_bc.sh)
+Each run writes a timestamped directory `<output>/<run_name>/` (override the name with `--run-name`) containing a `config.json`, an aggregate `tils_scores.csv` (one row per slide, for easy analysis), and a per-slide subdir with:
 
-### Using a pre-trained ECTIL model on external WSIs
-- To download the pre-trained ECTIL models, read [model_zoo/ectil/tcga/readme.md](model_zoo/ectil/tcga/readme.md)
-- A minimal example of running inference of a pre-trained TILs regression model on patches of a WSI is provided in [tools/infer/minimal_example.py](tools/infer/minimal_example.py), which may be adjusted for your own pipeline
-- The configuration and bash scripts for running inference on a collection of WSIs for which features are extracted with [tools/extract/retccl/extract_retccl_tcga_bc.sh](tools/extract/retccl/extract_retccl_tcga_bc.sh) is available in [tools/infer/infer_tcga_retccl_external.sh](tools/infer/infer_tcga_retccl_external.sh)
+- `tils_score.json` — slide-level TIL score + full config
+- `tile_predictions.csv` — per-tile TIL score, attention weight, and region
+- `features.h5` — the generated dataset of RetCCL features + tile metadata
+- `thumbnail.png`, `mask.png`, `mask_overlay.png`
+- `attention_heatmap.png`, `til_heatmap.png`
 
-### Reproducing training, validation, and testing
-- The center-level folds used in the experiments presented in the paper are found in [data/clini/tcga_bc_folds.csv](data/clini/tcga_bc_folds.csv)
-- The configuration and bash scripts for training, validation, and testing on TCGA is provided, and can be run with [tools/train/train_evaluate_test_tcga_retccl_internal.sh](tools/train/train_evaluate_test_tcga_retccl_internal.sh)
+## Use a pre-trained ECTIL model in your own pipeline
 
-### Analyses
-The scripts for the analyses are found in [tools/analysis](tools/analysis), which produce the main metrics and figures for the output on test folds of TCGA, found at [logs/tcga_output](logs/tcga_output)
+A minimal, framework-agnostic example of running a pre-trained ECTIL regressor on patch features is provided in [`tools/infer/minimal_example.py`](tools/infer/minimal_example.py); adapt it to your own pipeline. To download the pre-trained models, see [`model_zoo/ectil/tcga/readme.md`](model_zoo/ectil/tcga/readme.md).
+
+## Installation
+
+[RECOMMENDED] Use conda — it greatly simplifies installing openslide and pixman.
 
-## How to run
-### Install dependencies
 ```bash
 # clone project
-git clone https://github.com/YoniSchirris/ectil
+git clone https://github.com/nki-ai/ectil
 cd ectil
 
-# [RECOMMENDED] create conda environment; this greatly simplifies installation of openslide and pixman 
+# create conda environment
 conda create -n ectil python=3.10.9
 conda activate ectil
-pip install pip==23.3.2 # Required for older version of pytorch-lightning that was used during this project
+pip install pip==23.3.2  # required for the older pytorch-lightning used in this project
 
-conda install conda-forge::openslide #  Required for DLUP for loading WSIs
-conda install conda-forge::pixman # Requires for DLUP for loading WSIs. Should be included in openslide
-conda install conda-forge::libvips # Required for DLUP for loading WSIs
+# system libraries required by DLUP for loading WSIs
+conda install conda-forge::openslide
+conda install conda-forge::pixman   # usually pulled in by openslide
+conda install conda-forge::libvips
 
-# install pytorch according to instructions
-# https://pytorch.org/get-started/ # We use Stable (2.4.1+cu121) on linux for development and training on HPC)
+# install PyTorch per the official instructions:
+# https://pytorch.org/get-started/  (we use 2.4.1+cu121 on Linux for development/training on HPC)
 
-# install requirements
+# install ectil and its requirements
 python -m pip install .
 python -m pip install -r requirements.txt
 ```
 
-### Preprocessing: Feature extraction
-Automatically perform foreground selection, extract patches, extract features with RetCCL, and save them in `h5` format. A working example is presented in [tools/extract/retccl/extract_retccl_tcga_bc.sh](tools/extract/retccl/extract_retccl_tcga_bc.sh)
+Docker users can skip this — the image builds the environment for you.
+
+## Reproduce the manuscript (TCGA)
+
+<details>
+<summary><b>Data, feature extraction, training, evaluation, and analysis on the TCGA cohort</b></summary>
+
+### Data and TILs scores
+
+- The TILs scores for TCGA samples are in [`data/clini/tcga_bc_tils.csv`](data/clini/tcga_bc_tils.csv) and may be used in future research.
+- The center-level folds used in the experiments are in [`data/clini/tcga_bc_folds.csv`](data/clini/tcga_bc_folds.csv).
+
+### Feature extraction
 
-To reproduce the TCGA experiments, first download the slides from the GDC repository to `/path/to/your/data/dir`, and download the RetCCL model (see [model_zoo/retccl/readme.md](model_zoo/retccl/readme.md))
+Automatically perform foreground selection, extract patches, extract RetCCL features, and save them as `h5`. A working example is [`tools/extract/retccl/extract_retccl_tcga_bc.sh`](tools/extract/retccl/extract_retccl_tcga_bc.sh).
 
-E.g. extract RetCCL features from all `*.svs` files in the directory `/path/to/your/data/dir` on a small cpu with only the main thread and a relatively small batch size and write the h5 files to `/your/log/dir`.
+First download the slides from the GDC repository to `/path/to/your/data/dir`, and download the RetCCL model (see [`model_zoo/retccl/readme.md`](model_zoo/retccl/readme.md)).
 
-Rename [.env.example](.env.example) to `.env` and set 
+Rename [`.env.example`](.env.example) to `.env` and set:
 ```bash
 TCGA_BRCA_IMAGES_ROOT="/path/to/your/data_dir"
-TCGA_BRCA_H5_ROOT_DIR="/your/log/dir" 
+TCGA_BRCA_H5_ROOT_DIR="/your/log/dir"
 ```
 
+E.g. extract RetCCL features from all `*.svs` files in a directory on a small CPU with a single worker and a relatively small batch size, writing the `h5` files to your log dir:
 ```bash
-# path to data dir and log dir can also be set in the CLI of ectil
+# paths to the data dir and log dir can also be set in the CLI
 ~/ectil$ python ectil/extract.py \
     experiment=ectil/extract/tcga_retccl \
-    task_name=ectil_extract
+    task_name=ectil_extract \
     datamodule.num_workers=0 \
     datamodule.batch_size=16 \
     trainer=cpu \
@@ -91,86 +156,73 @@ TCGA_BRCA_H5_ROOT_DIR="/your/log/dir"
     datamodule.image_glob='**/*.svs' \
     model.h5_writer.h5_root_dir='/your/preferred/log/dir'
 ```
-
-If a gpu is available, set `trainer=gpu`.
-
-If `/path/to/your/dir` contains more slides than you want to extract features for, you can add 
+Set `trainer=gpu` if a GPU is available. To extract only a subset of slides, add:
 ```bash
 +datamodule.image_paths_file=/path/to/file.txt
 ```
-where `file.txt` contains, for each WSI of interest, an **absolute** path (which should be located in a subdirectory of `datamodule.image_root_dir`) on each row.
-
-The log directory will also contain a png with a thumbnail with mask.
+where `file.txt` lists one **absolute** path per WSI of interest (each located under a subdirectory of `datamodule.image_root_dir`). The log directory also gets a thumbnail-with-mask PNG.
 
+### Train, validate, and test
 
-### Train, validation, and testing on TCGA
-- Note that the first training epoch may take longer than subsequent epochs.
-- As a test for reproducibility on any hardware, we noticed that on a CPU with `num_workers=0` it takes ~10 seconds per epoch of training and validation (25 epochs in ~10 minutes), which can be improved by using a GPU with more workers.
-- Logs training curves on tensorboard
-- Logs best metrics and hparams on mlflow
+Notes:
+- The first training epoch may take longer than subsequent ones.
+- For reproducibility on any hardware: on a CPU with `num_workers=0`, ~10 s per epoch of training and validation (25 epochs in ~10 min); a GPU with more workers is faster.
+- Training curves are logged to TensorBoard; best metrics and hparams to MLflow.
 
-E.g. to train-validate-test on the first fold of breast cancer samples from TCGA on a cpu with no additional workers (bare minimum hardware requirements), set the `datamodule.root_dir` to the path where your `h5`s are saved (this is not static due to timestamp versioning)
-
-```sh
-python ectil/train.py \
-  experiment=ectil/train/tcga/train_val.yaml \
+E.g. to train-validate-test on the first TCGA breast cancer fold on a CPU with no additional workers (bare-minimum hardware), set `datamodule.root_dir` to where your `h5`s are saved (this path is timestamp-versioned):
+```bash
+~/ectil$ python ectil/train.py \
+    experiment=ectil/train/tcga/train_val.yaml \
     task_name=ectil_train_val_test \
     datamodule.num_workers=0 \
     datamodule.root_dir='/path/to/h5s/in/v/yyyy-mm-dd-ss-ms' \
     trainer=cpu
 ```
+A full train-validate-test driver is in [`tools/train/train_evaluate_test_tcga_retccl_internal.sh`](tools/train/train_evaluate_test_tcga_retccl_internal.sh).
 
-To view training curves, plots, and final metrics, run
-
+View training curves, plots, and final metrics with:
 ```bash
 tensorboard --logdir=/your/log/dir
 ```
-
-and view the results in your localhost under the `scalars` and `images` tab.
-
-Results of a hyperparameter search is better viewed through mlflow, which can be started with
+under the `scalars` and `images` tabs. Hyperparameter searches are better viewed in MLflow:
 ```bash
 mlflow ui --backend-store-uri file:///path/to/your/logs/mlflow
 ```
 
-### Infer on any WSI
-
-A minimal example to add ECTIL to your own pipeline is provided in [tools/infer/minimal_example.py](tools/infer/minimal_example.py).
+### Infer on pre-extracted features
 
-An example to run inference on an `h5` of already extracted features of 1 or multiple slides is provided in [tools/infer/infer_tcga_retccl_external.sh]([tools/infer/infer_tcga_retccl_external.sh]).
+To run a trained ECTIL model on an `h5` of already-extracted features (1 or more slides), use [`tools/infer/infer_tcga_retccl_external.sh`](tools/infer/infer_tcga_retccl_external.sh). First extract features (above), then provide the directory and relative paths to the `h5` files when calling `eval.py`.
 
-First extract features from your WSIs of interest, then provide the directory and relative pathnames to these `h5` files when calling `eval.py`.
-
-E.g., after running [tools/extract/retccl/extract_retccl_tcga_bc.sh](tools/extract/retccl/extract_retccl_tcga_bc.sh), the h5s may be saved in `~/ectil/logs/extract/1970-01-01-00-00/....`. 
-```sh
+E.g. after running the extraction, the h5s might be saved in `~/ectil/logs/extract/1970-01-01-00-00/...`:
+```bash
 cd ~/ectil/logs/extract/1970-01-01-00-00
 echo "paths" > paths.csv
 find * -name "*.h5" >> paths.csv
 ```
-Now run inference with the following command (note that `~` may not always work properly, it is recommended to write out the full absolute path)
-```
+Then run inference (write out the full absolute path; `~` may not expand correctly here):
+```bash
 ~/ectil$ python ectil/eval.py \
     ckpt_path=model_zoo/ectil/tcga/fold_0/epoch_065_step_858_weights_only.ckpt \
     trainer=cpu \
     datamodule.num_workers=0 \
-    datamodule.root_dir=~/ectil/logs/extract/1970-01-01-00-00
+    datamodule.root_dir=~/ectil/logs/extract/1970-01-01-00-00 \
     datamodule.test_paths=~/ectil/logs/extract/1970-01-01-00-00/paths.csv
 ```
 
 ### Analysis
-The results on the 5-fold test folds on TCGA are found in [logs/tcga_output](logs/tcga_output/). To produce a calibration plot, scatter plot, and detailed metrics, run 
 
-```sh
+The results on the 5-fold test folds on TCGA are in [`logs/tcga_output`](logs/tcga_output/). To produce a calibration plot, scatter plot, and detailed metrics (reproducing the manuscript results on TCGA):
+```bash
 ~/ectil$ python -m tools.analysis.calibration_curve.create_calibration_curve
 ~/ectil$ python -m tools.analysis.scatter_plot.create_scatter_plot
 ~/ectil$ python -m tools.analysis.metrics.compute_metrics
 ```
 
-which will reproduce the results as presented in the manuscript on TCGA.
-
 ### Prognostic analysis
-The `Rmd` script used to produce the cox regression results and the kaplan meier plots can be found under [tools/analysis/prognostic/prognostic_analysis.Rmd](tools/analysis/prognostic/prognostic_analysis.Rmd). This script is for illustration purposes only, since the raw data to produce the regressions and km plots can not be shared.
 
+The `Rmd` script used to produce the Cox regression results and the Kaplan-Meier plots is at [`tools/analysis/prognostic/prognostic_analysis.Rmd`](tools/analysis/prognostic/prognostic_analysis.Rmd). It is for illustration purposes only, since the raw data behind the regressions and KM plots cannot be shared.
+
+</details>
 
 ## Citation
 
@@ -192,5 +244,3 @@ or the following plain bibliography:
 ```
 Schirris, Y. (2024). ECTIL: Label-efficient Computational stromal TIL assessment model (Version 1.0.0) [Computer software]. https://github.com/nki-ai/ectil
 ```
-
-
diff --git a/ectil/inference.py b/ectil/inference.py
new file mode 100644
index 0000000..f545c00
--- /dev/null
+++ b/ectil/inference.py
@@ -0,0 +1,629 @@
+"""End-to-end WSI inference for ECTIL.
+
+Given a whole-slide image (WSI) -- or a directory of WSIs -- and a path to
+ECTIL classifier weights, this runs the full pipeline in one command:
+
+    tissue mask  ->  foreground tiling  ->  RetCCL features  ->  ECTIL
+
+and writes everything needed for practical/clinical use into a timestamped run
+directory, with one subdir per slide and an aggregate scores table:
+
+    <output>/<run_name>/
+        config.json             full run configuration
+        tils_scores.csv         one row per slide (score, status) for easy analysis
+        <slide_id>/
+            tils_score.json     slide-level TIL score + full config
+            tile_predictions.csv per-tile TIL score, attention weight, region
+            features.h5         the generated dataset (RetCCL features + tile meta)
+            thumbnail.png       plain slide thumbnail
+            mask.png            tissue mask used for tiling
+            mask_overlay.png    mask drawn on the thumbnail (sanity check)
+            attention_heatmap.png per-tile attention painted on the thumbnail
+            til_heatmap.png     per-tile TIL score painted on the thumbnail
+
+`<run_name>` defaults to a timestamp (override with --run-name). RetCCL is
+loaded automatically; only the ECTIL classifier weights have to be provided
+explicitly. This reuses the same components as the training/extraction pipeline
+(DLUP tiling + FESI mask, RetCCL encoder, MeanMIL + GatedAttention), so results
+match `extract.py` + `eval.py`.
+
+Batch mode skips slides it cannot process and records the failure in
+tils_scores.csv rather than aborting the whole run.
+
+Examples:
+    # single slide
+    python -m ectil.inference \
+        --wsi /input/slide.svs \
+        --classifier-weights /weights/ectil_fold_0_weights_only.ckpt \
+        --output /output
+
+    # a directory of slides (recursively globbed by extension)
+    python -m ectil.inference \
+        --wsi /input/cohort \
+        --classifier-weights /weights/ectil_fold_0_weights_only.ckpt \
+        --output /output
+"""
+
+import argparse
+import csv
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Optional
+
+import h5py
+import matplotlib
+
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+from dlup import SlideImage
+from dlup.tiling import GridOrder, TilingMode
+from PIL import Image
+from torch.nn import Identity, Linear, ReLU, Sequential, Sigmoid
+from torch.utils.data import DataLoader
+
+from ectil.datamodules.components.dlup_dataset import (
+    DLUPDatasetWrapper,
+    compute_mask,
+    save_overlay,
+    transform_factory,
+)
+from ectil.models.components import GatedAttention, MeanMIL, RetCCL
+from ectil.models.extraction_module import H5Writer
+from ectil.utils.background import AvailableMaskFunctions
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s"
+)
+log = logging.getLogger("ectil.inference")
+
+# Default location of the RetCCL weights inside the repo / container.
+# Can be overridden with --retccl-weights or the RETCCL_WEIGHTS env var.
+DEFAULT_RETCCL_WEIGHTS = (
+    Path(__file__).resolve().parent.parent
+    / "model_zoo"
+    / "retccl"
+    / "retccl_best_ckpt.pth"
+)
+
+
+def resolve_device(device: str) -> torch.device:
+    if device == "auto":
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+    return torch.device(device)
+
+
+def build_ectil(
+    in_features: int,
+    hidden_features: int,
+    attention_hidden_features: int,
+) -> MeanMIL:
+    """Instantiate the ECTIL MeanMIL model.
+
+    The Identity() layers stand in for the Dropout layers used during training so
+    that the indices in the Sequential modules match the saved state dict keys.
+    """
+    return MeanMIL(
+        post_encoder=Sequential(
+            Identity(),
+            Identity(),
+            Linear(in_features=in_features, out_features=hidden_features, bias=True),
+            ReLU(),
+        ),
+        classifier=Sequential(
+            Identity(),
+            Identity(),
+            Linear(in_features=hidden_features, out_features=1, bias=True),
+            Sigmoid(),
+        ),
+        attention=GatedAttention(
+            in_features=hidden_features, hidden_features=attention_hidden_features
+        ),
+    ).eval()
+
+
+def load_ectil_weights(model: MeanMIL, ckpt_path: Path, device: torch.device) -> None:
+    weights = torch.load(ckpt_path, map_location=device, weights_only=True)
+    # Saved checkpoints are the `net` state dict of the LightningModule and keep a
+    # `net.` prefix; a plain torch model expects the prefix stripped.
+    weights = {k.replace("net.", "", 1): v for k, v in weights.items()}
+    model.load_state_dict(weights)
+
+
+def save_mask_images(slide: SlideImage, mask: np.ndarray, out_dir: Path) -> None:
+    """Write a viewable tissue mask and a mask-on-thumbnail overlay."""
+    mask_path = out_dir / "mask.png"
+    Image.fromarray((mask.astype(np.uint8) * 255)).save(mask_path)
+    # save_overlay derives `<stem>_overlay.png` next to mask_path, i.e. mask_overlay.png
+    save_overlay(mask_path=mask_path, mask=mask, slide=slide)
+
+
+def extract_features(
+    slide_path: Path,
+    mask: np.ndarray,
+    encoder: RetCCL,
+    device: torch.device,
+    mpp: float,
+    tile_size: int,
+    mask_threshold: float,
+    batch_size: int,
+    num_workers: int,
+    overwrite_mpp: Optional[float] = None,
+):
+    """Tile the foreground and extract RetCCL features.
+
+    Returns the stacked per-tile outputs (numpy), the list of tile regions
+    (x, y, w, h, mpp) aligned with the features, and the dataset.
+    """
+    transform = transform_factory("imagenet_normalization")
+    # `overwrite_mpp` is forwarded to dlup for slides that lack an embedded
+    # spacing (common for TCGA SVS); it sets the native micron-per-pixel so the
+    # requested tiling `mpp` can be resolved. Tiling is unaffected for slides
+    # that already carry a spacing.
+    extra = {"overwrite_mpp": (overwrite_mpp, overwrite_mpp)} if overwrite_mpp else {}
+    dataset = DLUPDatasetWrapper.from_standard_tiling(
+        path=slide_path,
+        mpp=mpp,
+        tile_size=(tile_size, tile_size),
+        tile_overlap=(0, 0),
+        tile_mode=TilingMode.skip,
+        grid_order=GridOrder.C,
+        crop=False,
+        transform=transform,
+        mask=mask,
+        mask_threshold=mask_threshold,
+        limit_bounds=True,
+        **extra,
+    )
+    if len(dataset) == 0:
+        raise RuntimeError(
+            "No foreground tiles were selected. The tissue mask may be empty; "
+            "try a different --mask-function or check the slide."
+        )
+
+    loader = DataLoader(
+        dataset=dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False
+    )
+
+    outputs = []
+    encoder = encoder.to(device).eval()
+    log.info(f"Extracting RetCCL features for {len(dataset)} tiles")
+    with torch.no_grad():
+        for batch in loader:
+            batch["image"] = encoder(batch["image"].to(device)).cpu()
+            outputs.append(batch)
+
+    stacked = H5Writer(h5_root_dir="").stack_output(outputs)
+
+    # Regions (x, y, w, h, mpp) aligned with the masked tiles, same as extract.py.
+    regions = [
+        region
+        for idx, region in enumerate(dataset.regions)
+        if idx in set(dataset.masked_indices)
+    ]
+    return stacked, regions, dataset
+
+
+def run_ectil(model: MeanMIL, features: np.ndarray, device: torch.device):
+    model = model.to(device).eval()
+    x = torch.from_numpy(features).float().unsqueeze(0).to(device)  # 1 x n_tiles x dim
+    with torch.no_grad():
+        out = model(x)
+    score = float(out["out"].reshape(-1)[0].item())
+    til = out["meta"]["out_per_instance"].reshape(-1).cpu().numpy()
+    attention = out["meta"]["attention_weights"].reshape(-1).cpu().numpy()
+    return score, til, attention
+
+
+def save_features_h5(
+    out_path: Path,
+    stacked: dict,
+    regions: np.ndarray,
+    slide_id: str,
+    slide_path: Path,
+    til: np.ndarray,
+    attention: np.ndarray,
+) -> None:
+    with h5py.File(out_path, "w") as hf:
+        hf.create_dataset("features", data=stacked["image"])
+        if regions is not None and len(regions):
+            hf.create_dataset("regions", data=np.asarray(regions, dtype=float))
+        for key in [
+            "coordinates",
+            "mpp",
+            "region_index",
+            "grid_local_coordinates",
+            "grid_index",
+        ]:
+            if key in stacked:
+                hf.create_dataset(key, data=stacked[key])
+        hf.create_dataset("tile_level_output", data=til)
+        hf.create_dataset("attention_weights", data=attention)
+        hf.attrs["slide_id"] = slide_id
+        hf.attrs["path"] = str(slide_path)
+
+
+def tile_regions_for_output(stacked: dict, regions: list, n_tiles: int, mpp: float, tile_size: int):
+    """Return regions (x, y, w, h, mpp) guaranteed aligned with the n_tiles outputs.
+
+    Prefer the DLUP regions; if their count does not match (defensive), rebuild
+    them from the per-tile coordinates, which are always aligned with the features.
+    """
+    if len(regions) == n_tiles:
+        return np.asarray(regions, dtype=float)
+    coords = np.asarray(stacked["coordinates"], dtype=float)  # n x 2
+    rebuilt = np.zeros((n_tiles, 5), dtype=float)
+    rebuilt[:, 0] = coords[:, 0]
+    rebuilt[:, 1] = coords[:, 1]
+    rebuilt[:, 2] = tile_size
+    rebuilt[:, 3] = tile_size
+    rebuilt[:, 4] = mpp
+    return rebuilt
+
+
+def save_tile_csv(out_path: Path, regions: np.ndarray, til: np.ndarray, attention: np.ndarray) -> None:
+    with open(out_path, "w", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerow(["x", "y", "w", "h", "mpp", "tile_level_output", "attention_weights"])
+        for r, t, a in zip(regions, til, attention):
+            writer.writerow([r[0], r[1], r[2], r[3], r[4], float(t), float(a)])
+
+
+def make_heatmap(
+    slide: SlideImage,
+    thumb: Image.Image,
+    mpp: float,
+    regions: np.ndarray,
+    values: np.ndarray,
+    title: str,
+    out_path: Path,
+    cmap: str,
+    vmin: Optional[float] = None,
+    vmax: Optional[float] = None,
+    cbar_label: str = "",
+) -> None:
+    """Paint per-tile `values` onto the (pre-decoded) slide thumbnail and save as a PNG overlay."""
+    scaling = slide.get_scaling(mpp)
+    scaled_w, scaled_h = (np.asarray(slide.size, dtype=float) * scaling)
+    tw, th = thumb.size
+    sx = tw / scaled_w
+    sy = th / scaled_h
+
+    heat = np.full((th, tw), np.nan, dtype=float)
+    for (x, y, w, h, _), v in zip(regions, values):
+        x0, y0 = int(round(x * sx)), int(round(y * sy))
+        x1, y1 = int(round((x + w) * sx)), int(round((y + h) * sy))
+        x0, x1 = max(0, x0), min(tw, x1)
+        y0, y1 = max(0, y0), min(th, y1)
+        if x1 > x0 and y1 > y0:
+            heat[y0:y1, x0:x1] = v
+
+    fig, ax = plt.subplots(figsize=(tw / 100.0, th / 100.0), dpi=100)
+    ax.imshow(np.asarray(thumb))
+    im = ax.imshow(
+        np.ma.masked_invalid(heat), cmap=cmap, alpha=0.5, vmin=vmin, vmax=vmax
+    )
+    cbar = fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04)
+    if cbar_label:
+        cbar.set_label(cbar_label)
+    ax.set_title(title)
+    ax.axis("off")
+    fig.savefig(out_path, bbox_inches="tight")
+    plt.close(fig)
+
+
+# Whole-slide formats globbed by default when --wsi points to a directory.
+# Note on MRXS: a `.mrxs` slide is the single file you open; it is accompanied by
+# a same-named directory of raw data. Globbing by extension matches the `.mrxs`
+# file (which dlup/openslide opens) and never the companion directory.
+DEFAULT_SLIDE_GLOB = "*.svs,*.tif,*.tiff,*.ndpi,*.mrxs,*.scn,*.svslide,*.bif"
+
+
+def discover_slides(input_path: Path, glob_patterns: str) -> list:
+    """Return the list of slide files for `input_path` (a file or a directory)."""
+    p = input_path.expanduser().resolve()
+    if p.is_file():
+        return [p]
+    if not p.is_dir():
+        raise FileNotFoundError(f"WSI path not found: {p}")
+
+    patterns = [pat.strip() for pat in glob_patterns.split(",") if pat.strip()]
+    slides: list = []
+    seen = set()
+    for pattern in patterns:
+        for match in sorted(p.rglob(pattern)):
+            if match.is_file() and match not in seen:
+                seen.add(match)
+                slides.append(match)
+    return slides
+
+
+def build_config(args: argparse.Namespace, device: torch.device, run_name: str) -> dict:
+    """All run configuration, embedded in each slide's JSON and the run config.json."""
+    return {
+        "run_name": run_name,
+        "device": str(device),
+        "mpp": args.mpp,
+        "overwrite_mpp": args.overwrite_mpp,
+        "tile_size": args.tile_size,
+        "mask_function": args.mask_function,
+        "mask_threshold": args.mask_threshold,
+        "batch_size": args.batch_size,
+        "num_workers": args.num_workers,
+        "heatmap_size": args.heatmap_size,
+        "in_features": args.in_features,
+        "hidden_features": args.hidden_features,
+        "attention_hidden_features": args.attention_hidden_features,
+        "classifier_weights": str(Path(args.classifier_weights).expanduser()),
+        "retccl_weights": str(Path(args.retccl_weights).expanduser()),
+    }
+
+
+def _unique_slide_dir(run_dir: Path, slide_id: str) -> Path:
+    """Per-slide output dir, suffixed if two input slides share a filename stem."""
+    out_dir = run_dir / slide_id
+    suffix = 1
+    while out_dir.exists():
+        out_dir = run_dir / f"{slide_id}_{suffix}"
+        suffix += 1
+    out_dir.mkdir(parents=True)
+    return out_dir
+
+
+def process_slide(
+    wsi_path: Path,
+    run_dir: Path,
+    encoder: RetCCL,
+    model: MeanMIL,
+    device: torch.device,
+    config: dict,
+    args: argparse.Namespace,
+) -> dict:
+    """Run the full pipeline for one slide and write its outputs. Returns a summary row."""
+    slide_id = wsi_path.stem
+    out_dir = _unique_slide_dir(run_dir, slide_id)
+    log.info(f"[{slide_id}] writing outputs to {out_dir}")
+
+    # 1. Tissue mask + thumbnail. Decode the thumbnail once and reuse it for both
+    # the saved PNG and the two heatmap overlays below (a thumbnail decode reads and
+    # resamples a pyramid level, so it is the expensive part to avoid repeating).
+    # overwrite_mpp lets slides without an embedded spacing (e.g. many TCGA SVS)
+    # still be opened/tiled; dlup otherwise raises UnsupportedSlideError.
+    open_kwargs = (
+        {"overwrite_mpp": (args.overwrite_mpp, args.overwrite_mpp)}
+        if args.overwrite_mpp
+        else {}
+    )
+    slide = SlideImage.from_file_path(wsi_path, **open_kwargs)
+    log.info(f"[{slide_id}] computing tissue mask with '{args.mask_function}'")
+    mask = compute_mask(slide=slide, mask_function=args.mask_function)
+    thumb = slide.get_thumbnail(size=(args.heatmap_size, args.heatmap_size)).convert("RGB")
+    thumb.save(out_dir / "thumbnail.png")
+    save_mask_images(slide=slide, mask=mask, out_dir=out_dir)
+
+    # 2. Foreground tiling + RetCCL feature extraction
+    stacked, regions, _ = extract_features(
+        slide_path=wsi_path,
+        mask=mask,
+        encoder=encoder,
+        device=device,
+        mpp=args.mpp,
+        tile_size=args.tile_size,
+        mask_threshold=args.mask_threshold,
+        batch_size=args.batch_size,
+        num_workers=args.num_workers,
+        overwrite_mpp=args.overwrite_mpp,
+    )
+    features = stacked["image"]
+    n_tiles = features.shape[0]
+
+    # 3. ECTIL classifier
+    score, til, attention = run_ectil(model, features, device)
+    log.info(f"[{slide_id}] TIL score: {score:.4f} ({score * 100:.1f}%) over {n_tiles} tiles")
+
+    # 4. Persist results
+    out_regions = tile_regions_for_output(stacked, regions, n_tiles, args.mpp, args.tile_size)
+    save_features_h5(
+        out_dir / "features.h5", stacked, out_regions, slide_id, wsi_path, til, attention
+    )
+    save_tile_csv(out_dir / "tile_predictions.csv", out_regions, til, attention)
+
+    summary = {
+        "slide_id": slide_id,
+        "slide_path": str(wsi_path),
+        "til_score": score,
+        "til_score_percent": score * 100.0,
+        "num_tiles": int(n_tiles),
+        "output_dir": str(out_dir),
+        **config,
+    }
+    with open(out_dir / "tils_score.json", "w") as f:
+        json.dump(summary, f, indent=2)
+
+    # 5. Heatmaps
+    make_heatmap(
+        slide=slide,
+        thumb=thumb,
+        mpp=args.mpp,
+        regions=out_regions,
+        values=attention,
+        title=f"{slide_id} - attention",
+        out_path=out_dir / "attention_heatmap.png",
+        cmap="viridis",
+        cbar_label="attention weight",
+    )
+    make_heatmap(
+        slide=slide,
+        thumb=thumb,
+        mpp=args.mpp,
+        regions=out_regions,
+        values=til,
+        title=f"{slide_id} - tile-level TIL (slide score {score * 100:.1f}%)",
+        out_path=out_dir / "til_heatmap.png",
+        cmap="jet",
+        vmin=0.0,
+        vmax=1.0,
+        cbar_label="tile TIL score",
+    )
+    return summary
+
+
+def run_inference(args: argparse.Namespace) -> dict:
+    """Discover one or more slides, run ECTIL on each, and aggregate the results."""
+    import csv
+    from datetime import datetime
+
+    device = resolve_device(args.device)
+
+    retccl_weights = Path(args.retccl_weights).expanduser()
+    if not retccl_weights.is_file():
+        raise FileNotFoundError(
+            f"RetCCL weights not found at {retccl_weights}. Provide them via "
+            "--retccl-weights or the RETCCL_WEIGHTS env var (see model_zoo/retccl/readme.md)."
+        )
+    classifier_weights = Path(args.classifier_weights).expanduser()
+    if not classifier_weights.is_file():
+        raise FileNotFoundError(f"ECTIL classifier weights not found: {classifier_weights}")
+
+    slides = discover_slides(Path(args.wsi), args.glob)
+    if not slides:
+        raise FileNotFoundError(
+            f"No slides matching '{args.glob}' found under {args.wsi}"
+        )
+
+    run_name = args.run_name or datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+    run_dir = Path(args.output).expanduser() / run_name
+    run_dir.mkdir(parents=True, exist_ok=True)
+    config = build_config(args, device, run_name)
+    with open(run_dir / "config.json", "w") as f:
+        json.dump({**config, "num_slides": len(slides)}, f, indent=2)
+    log.info(f"Found {len(slides)} slide(s); writing run to {run_dir} (device={device})")
+
+    # Load the encoder and classifier once and reuse them across all slides.
+    encoder = RetCCL(project_root_dir="", weights_path=str(retccl_weights)).to(device).eval()
+    model = build_ectil(
+        in_features=args.in_features,
+        hidden_features=args.hidden_features,
+        attention_hidden_features=args.attention_hidden_features,
+    )
+    load_ectil_weights(model, classifier_weights, device)
+    model = model.to(device).eval()
+
+    summary_path = run_dir / "tils_scores.csv"
+    fieldnames = [
+        "slide_id",
+        "slide_path",
+        "til_score",
+        "til_score_percent",
+        "num_tiles",
+        "status",
+        "error",
+        "output_dir",
+    ]
+    n_ok = 0
+    # Write the summary incrementally so partial results survive an interrupted batch.
+    with open(summary_path, "w", newline="") as summary_file:
+        writer = csv.DictWriter(summary_file, fieldnames=fieldnames, extrasaction="ignore")
+        writer.writeheader()
+        for i, wsi_path in enumerate(slides, start=1):
+            log.info(f"({i}/{len(slides)}) {wsi_path}")
+            try:
+                row = process_slide(wsi_path, run_dir, encoder, model, device, config, args)
+                row.update({"status": "ok", "error": ""})
+                n_ok += 1
+            except Exception as exc:  # keep going on per-slide failures
+                log.exception(f"Failed to process {wsi_path}: {exc}")
+                row = {
+                    "slide_id": wsi_path.stem,
+                    "slide_path": str(wsi_path),
+                    "status": "failed",
+                    "error": str(exc),
+                }
+            writer.writerow(row)
+            summary_file.flush()
+
+    log.info(f"Done. {n_ok}/{len(slides)} slide(s) succeeded. Summary: {summary_path}")
+    return {"run_dir": str(run_dir), "summary": str(summary_path), "n_ok": n_ok, "n_total": len(slides)}
+
+
+def parse_args(argv=None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Run end-to-end ECTIL TIL inference on a WSI or a directory of WSIs.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--wsi", required=True, help="Path to a WSI file or a directory of WSIs."
+    )
+    parser.add_argument(
+        "--glob",
+        default=DEFAULT_SLIDE_GLOB,
+        help="Comma-separated glob patterns used (recursively) when --wsi is a directory.",
+    )
+    parser.add_argument(
+        "--classifier-weights",
+        required=True,
+        help="Path to the ECTIL classifier weights (e.g. *_weights_only.ckpt).",
+    )
+    parser.add_argument(
+        "--output",
+        "-o",
+        required=True,
+        help="Output directory; a timestamped run subdir with per-slide subdirs is created.",
+    )
+    parser.add_argument(
+        "--run-name",
+        default=None,
+        help="Name of the run subdir under --output. Defaults to a timestamp.",
+    )
+    parser.add_argument(
+        "--retccl-weights",
+        default=os.environ.get("RETCCL_WEIGHTS", str(DEFAULT_RETCCL_WEIGHTS)),
+        help="Path to RetCCL weights. Auto-loaded from model_zoo / RETCCL_WEIGHTS by default.",
+    )
+    parser.add_argument(
+        "--device", default="auto", choices=["auto", "cpu", "cuda"], help="Compute device."
+    )
+    parser.add_argument("--mpp", type=float, default=0.5, help="Microns per pixel for tiling.")
+    parser.add_argument(
+        "--overwrite-mpp",
+        type=float,
+        default=None,
+        help="Native microns-per-pixel to assume when a slide has no embedded spacing "
+        "(e.g. many TCGA SVS). For TCGA-BRCA 40x diagnostic slides this is 0.25. "
+        "Leave unset to use the slide's own spacing.",
+    )
+    parser.add_argument("--tile-size", type=int, default=512, help="Tile size in pixels.")
+    parser.add_argument(
+        "--mask-function",
+        default="fesi",
+        choices=list(AvailableMaskFunctions.__members__),
+        help="Tissue foreground segmentation function.",
+    )
+    parser.add_argument(
+        "--mask-threshold",
+        type=float,
+        default=0.1,
+        help="Minimum foreground fraction for a tile to be kept.",
+    )
+    parser.add_argument("--batch-size", type=int, default=16, help="RetCCL extraction batch size.")
+    parser.add_argument("--num-workers", type=int, default=0, help="DataLoader workers.")
+    parser.add_argument(
+        "--heatmap-size", type=int, default=2048, help="Long-edge size of thumbnails/heatmaps."
+    )
+    parser.add_argument("--in-features", type=int, default=2048, help="RetCCL feature dimension.")
+    parser.add_argument("--hidden-features", type=int, default=512, help="ECTIL hidden dimension.")
+    parser.add_argument(
+        "--attention-hidden-features", type=int, default=128, help="Attention hidden dimension."
+    )
+    return parser.parse_args(argv)
+
+
+def main(argv=None) -> None:
+    run_inference(parse_args(argv))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
index 73cf4f1..e804f78 100644
--- a/setup.py
+++ b/setup.py
@@ -5,11 +5,14 @@
 setup(
     name="ectil",
     version="0.0.1",
-    python_requires="==3.10.9",
+    # The project targets Python 3.10; an exact "==3.10.9" pin was too strict and
+    # broke editable installs when conda resolved a newer 3.10.x patch (e.g. 3.10.20),
+    # which newer pip/setuptools now enforce on `python_requires`.
+    python_requires=">=3.10,<3.11",
     description="ECTIL: Label-efficient Computational Tumour Infiltrating Lymphocyte (TIL) assessment in breast cancer",
     author="Yoni Schirris",
     author_email="yschirris@gmail.com",
-    url="https://github.com/YoniSchirris/ectil",
+    url="https://github.com/nki-ai/ectil",
     install_requires=["pytorch-lightning", "hydra-core"],
     packages=find_packages(),
 )
diff --git a/tools/infer/infer_docker.sh b/tools/infer/infer_docker.sh
new file mode 100755
index 0000000..84b9aa0
--- /dev/null
+++ b/tools/infer/infer_docker.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# Example: run ECTIL inference on a single WSI inside the Docker container.
+#
+# 1. Get the image — either pull the published one or build it yourself:
+#       docker pull ghcr.io/nki-ai/ectil-inference:latest
+#       # or
+#       docker build -t ghcr.io/nki-ai/ectil-inference:latest .
+#
+# 2. Download the weights (not bundled in the image):
+#       - ECTIL classifier: https://files.aiforoncology.nl/ectil  (model_zoo/ectil/tcga/readme.md)
+#       - RetCCL encoder:   see model_zoo/retccl/readme.md
+#
+# 3. Set the paths below and run this script.
+#
+# WSI may be a single slide OR a directory of slides (set WSI to the directory;
+# it is recursively globbed by extension, including .mrxs).
+#
+# The container writes a timestamped run dir under $OUTPUT containing
+# config.json, an aggregate tils_scores.csv, and a per-slide subdir with:
+#   tils_score.json, tile_predictions.csv, features.h5,
+#   thumbnail.png, mask.png, mask_overlay.png,
+#   attention_heatmap.png, til_heatmap.png
+
+set -euo pipefail
+
+# A single slide file, or a directory of slides.
+WSI="/path/to/slide.svs"
+CLASSIFIER_WEIGHTS="/path/to/ectil_fold_0_weights_only.ckpt"
+RETCCL_WEIGHTS="/path/to/retccl_best_ckpt.pth"
+OUTPUT="/path/to/output"
+
+docker run --rm \
+    -v "$(dirname "$WSI")":/input:ro \
+    -v "$(dirname "$CLASSIFIER_WEIGHTS")":/weights/ectil:ro \
+    -v "$(dirname "$RETCCL_WEIGHTS")":/weights/retccl:ro \
+    -v "$OUTPUT":/output \
+    ghcr.io/nki-ai/ectil-inference:latest \
+        --wsi "/input/$(basename "$WSI")" \
+        --classifier-weights "/weights/ectil/$(basename "$CLASSIFIER_WEIGHTS")" \
+        --retccl-weights "/weights/retccl/$(basename "$RETCCL_WEIGHTS")" \
+        --output /output
+
+# For GPU, add `--gpus all` to `docker run` and `--device cuda` to the command above.
diff --git a/tools/infer/run_demo.sh b/tools/infer/run_demo.sh
new file mode 100755
index 0000000..4604816
--- /dev/null
+++ b/tools/infer/run_demo.sh
@@ -0,0 +1,316 @@
+#!/bin/bash
+# One-command ECTIL WSI inference smoke test.
+#
+# After `git clone`, just run (from anywhere):
+#
+#     ./tools/infer/run_demo.sh
+#
+# It will, idempotently (skip-if-exists everywhere):
+#   1. Pre-flight check the host tools (docker, curl, gdown)
+#   2. Download the RetCCL encoder weights      -> model_zoo/retccl/retccl_best_ckpt.pth
+#   3. Download the ECTIL classifier weights     -> model_zoo/ectil/tcga/fold_0/...ckpt
+#   4. Download 5 small TCGA-BRCA slides          -> data/wsi/*.svs
+#   5. Build the Docker image (`docker build -t ectil-inference .`)
+#   6. Run the pipeline in the container, CPU, in TWO modes:
+#         - single-slide:  --wsi <one .svs>   (expect 1 result row)
+#         - directory:     --wsi data/wsi     (expect 5 result rows)
+#   7. Validate the outputs of both runs and print SMOKE TEST PASSED / FAILED.
+#
+# Heavy C deps (openslide, libvips, dlup, torch) live inside the image, NOT on
+# your host. The only host requirements are Docker (daemon running), curl, and
+# python3+pip (used solely to bootstrap `gdown` for the Google-Drive download
+# and to parse the result JSON). Everything downloaded lands under data/wsi and
+# model_zoo, both of which are gitignored.
+
+set -euo pipefail
+
+# Resolve repo root from this script's location so it works from any cwd.
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+cd "$REPO_ROOT"
+
+IMAGE="ectil-inference"
+
+RETCCL_DIR="model_zoo/retccl"
+RETCCL_WEIGHTS="$RETCCL_DIR/retccl_best_ckpt.pth"
+RETCCL_GDRIVE_FOLDER="https://drive.google.com/drive/folders/1AhstAFVqtTqxeS9WlBpU41BV08LYFUnL"
+
+ECTIL_DIR="model_zoo/ectil/tcga/fold_0"
+ECTIL_WEIGHTS="$ECTIL_DIR/epoch_065_step_858_weights_only.ckpt"
+ECTIL_URL="https://files.aiforoncology.nl/ectil/tcga/fold_0/epoch_065_step_858_weights_only.ckpt"
+
+# The demo owns its own slide subdir so the directory-mode row count is exactly
+# the 5 slides we download, regardless of anything else under data/wsi.
+WSI_DIR="data/wsi/demo"
+OUTPUT_DIR="data/inference_output"
+
+# 5 verified small TCGA-BRCA diagnostic slides served by the GDC data endpoint.
+GDC_FILE_IDS=(
+    "f2d5aa37-d9ce-4264-a447-fc69dd0d7d85"
+    "a5b0148b-afba-4cc6-9cb4-c346966d73e3"
+    "ba1d2e38-fd12-478a-976a-e6701ed784e2"
+    "2449ff02-6925-4f25-9074-7c5fbeab0bd2"
+    "acc47852-3f83-4ed3-a85e-ecba28c06aa9"
+)
+NUM_SLIDES=${#GDC_FILE_IDS[@]}
+
+echo "==> ECTIL WSI inference smoke test (repo: $REPO_ROOT)"
+
+# ---------------------------------------------------------------------------
+# 1. Pre-flight: host tools
+# ---------------------------------------------------------------------------
+echo "==> [1/7] Pre-flight checks ..."
+if ! command -v docker >/dev/null 2>&1; then
+    echo "ERROR: docker not found on PATH. Install Docker (https://docs.docker.com/get-docker/) and retry." >&2
+    exit 1
+fi
+if ! docker info >/dev/null 2>&1; then
+    echo "ERROR: the Docker daemon is not running. Start Docker Desktop / dockerd and retry." >&2
+    exit 1
+fi
+if ! command -v curl >/dev/null 2>&1; then
+    echo "ERROR: curl not found on PATH. Install curl and retry." >&2
+    exit 1
+fi
+# gdown is only needed if the RetCCL weights are not already present.
+if [ ! -f "$RETCCL_WEIGHTS" ] && ! command -v gdown >/dev/null 2>&1; then
+    if command -v python3 >/dev/null 2>&1 && python3 -m pip --version >/dev/null 2>&1; then
+        echo "    gdown not found; will install it via pip for the Google-Drive download."
+    else
+        echo "ERROR: RetCCL weights are missing and neither 'gdown' nor 'python3 -m pip' is available." >&2
+        echo "       Install gdown (pip install gdown) or place the weights at $RETCCL_WEIGHTS." >&2
+        exit 1
+    fi
+fi
+echo "    docker, curl present; daemon running."
+
+# ---------------------------------------------------------------------------
+# 2. RetCCL encoder weights (Google Drive, ~94 MB)
+# ---------------------------------------------------------------------------
+echo "==> [2/7] RetCCL encoder weights ..."
+if [ -f "$RETCCL_WEIGHTS" ]; then
+    echo "    already present, skipping: $RETCCL_WEIGHTS"
+else
+    mkdir -p "$RETCCL_DIR"
+    if ! command -v gdown >/dev/null 2>&1; then
+        echo "    installing gdown ..."
+        python3 -m pip install --user --quiet gdown
+        # Make sure the freshly user-installed gdown is on PATH for this shell.
+        export PATH="$PATH:$(python3 -m site --user-base)/bin"
+    fi
+    # The Drive folder contains a single best_ckpt.pth; pull it into a temp dir,
+    # then rename to what inference.py expects.
+    TMP_RETCCL="$(mktemp -d)"
+    gdown --folder "$RETCCL_GDRIVE_FOLDER" -O "$TMP_RETCCL"
+    SRC="$(find "$TMP_RETCCL" -name 'best_ckpt.pth' -type f | head -n1)"
+    if [ -z "$SRC" ]; then
+        echo "ERROR: could not find best_ckpt.pth in the downloaded RetCCL folder." >&2
+        rm -rf "$TMP_RETCCL"
+        exit 1
+    fi
+    mv "$SRC" "$RETCCL_WEIGHTS"
+    rm -rf "$TMP_RETCCL"
+    echo "    -> $RETCCL_WEIGHTS"
+fi
+
+# ---------------------------------------------------------------------------
+# 3. ECTIL classifier weights (~4.7 MB)
+# ---------------------------------------------------------------------------
+echo "==> [3/7] ECTIL classifier weights ..."
+if [ -f "$ECTIL_WEIGHTS" ]; then
+    echo "    already present, skipping: $ECTIL_WEIGHTS"
+else
+    mkdir -p "$ECTIL_DIR"
+    curl -fL "$ECTIL_URL" -o "$ECTIL_WEIGHTS"
+    echo "    -> $ECTIL_WEIGHTS"
+fi
+
+# ---------------------------------------------------------------------------
+# 4. 5 whole-slide images (TCGA-BRCA, GDC data endpoint)
+# ---------------------------------------------------------------------------
+echo "==> [4/7] TCGA-BRCA slides ($NUM_SLIDES total) ..."
+mkdir -p "$WSI_DIR"
+for FID in "${GDC_FILE_IDS[@]}"; do
+    # GDC rejects HEAD, so we cannot probe for the Content-Disposition filename
+    # cheaply; instead key skip-if-exists on a per-id marker, and download into a
+    # private temp dir so a re-run never collides with an already-present .svs.
+    MARKER="$WSI_DIR/.$FID.done"
+    if [ -f "$MARKER" ]; then
+        echo "    [$FID] already present, skipping."
+        continue
+    fi
+    echo "    [$FID] downloading ..."
+    TMP_DL="$(mktemp -d)"
+    # -J -O preserves the server's Content-Disposition .svs filename.
+    ( cd "$TMP_DL" && curl -fL -J -O "https://api.gdc.cancer.gov/data/$FID" )
+    DL_SVS="$(find "$TMP_DL" -name '*.svs' -type f | head -n1)"
+    if [ -z "$DL_SVS" ]; then
+        echo "ERROR: GDC id $FID did not yield an .svs file." >&2
+        rm -rf "$TMP_DL"
+        exit 1
+    fi
+    mv -f "$DL_SVS" "$WSI_DIR/"
+    rm -rf "$TMP_DL"
+    touch "$MARKER"
+done
+
+# Count without bash-4 builtins (macOS ships bash 3.2, which lacks mapfile).
+HAVE_SLIDES="$(find "$WSI_DIR" -maxdepth 1 -name '*.svs' -type f | wc -l | tr -d ' ')"
+if [ "$HAVE_SLIDES" -ne "$NUM_SLIDES" ]; then
+    echo "ERROR: expected exactly $NUM_SLIDES slides in $WSI_DIR, found $HAVE_SLIDES." >&2
+    echo "       (Directory mode asserts a $NUM_SLIDES-row result, so $WSI_DIR must hold only these slides.)" >&2
+    exit 1
+fi
+echo "    have $HAVE_SLIDES slides in $WSI_DIR."
+
+# Pick the smallest slide for the single-slide run to keep that run fast.
+SINGLE_SLIDE="$(find "$WSI_DIR" -maxdepth 1 -name '*.svs' -type f -exec ls -S {} + | tail -n1)"
+echo "    single-slide run will use: $(basename "$SINGLE_SLIDE")"
+
+# ---------------------------------------------------------------------------
+# 5. Build the Docker image (cached after the first build)
+# ---------------------------------------------------------------------------
+echo "==> [5/7] Building Docker image '$IMAGE' (cached after first build) ..."
+docker build -t "$IMAGE" .
+
+# ---------------------------------------------------------------------------
+# 6. Run the pipeline in the container, CPU, in two modes.
+#    Volume-mount pattern mirrors tools/infer/infer_docker.sh.
+# ---------------------------------------------------------------------------
+mkdir -p "$OUTPUT_DIR"
+
+run_ectil () {  # $1 = run name, $2 = --wsi target inside the container
+    local run_name="$1" wsi_arg="$2"
+    # TCGA-BRCA diagnostic SVS are scanned at 40x but often ship without an
+    # embedded micron-per-pixel; --overwrite-mpp 0.25 supplies the native spacing
+    # so dlup can tile them (otherwise it raises UnsupportedSlideError).
+    docker run --rm \
+        -v "$REPO_ROOT/$WSI_DIR":/input:ro \
+        -v "$REPO_ROOT/$ECTIL_DIR":/weights/ectil:ro \
+        -v "$REPO_ROOT/$RETCCL_DIR":/weights/retccl:ro \
+        -v "$REPO_ROOT/$OUTPUT_DIR":/output \
+        "$IMAGE" \
+            --wsi "$wsi_arg" \
+            --classifier-weights "/weights/ectil/$(basename "$ECTIL_WEIGHTS")" \
+            --retccl-weights "/weights/retccl/$(basename "$RETCCL_WEIGHTS")" \
+            --output /output \
+            --run-name "$run_name" \
+            --overwrite-mpp 0.25 \
+            --device cpu
+}
+
+SINGLE_RUN="demo_single"
+DIR_RUN="demo_dir"
+
+# These run names are fixed (not timestamped), so clear any leftovers from a
+# previous invocation to keep the validation counts deterministic.
+rm -rf "$OUTPUT_DIR/$SINGLE_RUN" "$OUTPUT_DIR/$DIR_RUN"
+
+echo "==> [6/7] Running SINGLE-SLIDE mode ($(basename "$SINGLE_SLIDE")) ..."
+run_ectil "$SINGLE_RUN" "/input/$(basename "$SINGLE_SLIDE")"
+
+echo "==> [6/7] Running DIRECTORY mode (all $NUM_SLIDES slides in $WSI_DIR) ..."
+run_ectil "$DIR_RUN" "/input"
+
+# ---------------------------------------------------------------------------
+# 7. Validate the outputs of both runs (the actual smoke test).
+# ---------------------------------------------------------------------------
+echo "==> [7/7] Validating outputs ..."
+
+FAIL=0
+fail () { echo "  [FAIL] $1" >&2; FAIL=1; }
+ok ()   { echo "  [ok]   $1"; }
+
+# Count data rows (everything after the header) in a CSV.
+csv_data_rows () { [ -f "$1" ] && { local n; n=$(($(wc -l < "$1") - 1)); [ "$n" -lt 0 ] && n=0; echo "$n"; } || echo 0; }
+
+# Parse + range-check the til_score from a per-slide tils_score.json. Echoes
+# "slide_id\tscore" on success; returns non-zero on any problem.
+parse_score () {
+    python3 - "$1" <<'PY'
+import json, sys
+p = sys.argv[1]
+try:
+    d = json.load(open(p))
+    s = float(d["til_score"])
+except Exception as e:
+    print(f"parse-error: {e}", file=sys.stderr); sys.exit(1)
+if not (0.0 <= s <= 1.0):
+    print(f"out-of-range: {s}", file=sys.stderr); sys.exit(1)
+print(f"{d.get('slide_id','?')}\t{s:.4f}")
+PY
+}
+
+# Required per-slide artifacts.
+PER_SLIDE_FILES=(
+    tils_score.json tile_predictions.csv features.h5
+    thumbnail.png mask.png mask_overlay.png
+    attention_heatmap.png til_heatmap.png
+)
+
+validate_run () {  # $1 = run name, $2 = expected number of slide rows/subdirs
+    local run_name="$1" expect="$2"
+    local run_dir="$OUTPUT_DIR/$run_name"
+    echo "  -- run '$run_name' (expecting $expect slide(s)) --"
+
+    [ -d "$run_dir" ] && ok "run dir exists: $run_dir" || { fail "run dir missing: $run_dir"; return; }
+
+    local scores_csv="$run_dir/tils_scores.csv"
+    if [ -s "$scores_csv" ]; then ok "tils_scores.csv exists and is non-empty"
+    else fail "tils_scores.csv missing or empty: $scores_csv"; return; fi
+
+    local rows; rows=$(csv_data_rows "$scores_csv")
+    [ "$rows" -eq "$expect" ] && ok "tils_scores.csv has $rows data row(s)" \
+        || fail "tils_scores.csv has $rows data row(s), expected $expect"
+
+    # Per-slide subdirs (exclude the top-level config.json / tils_scores.csv).
+    local subdirs; subdirs=$(find "$run_dir" -mindepth 1 -maxdepth 1 -type d | wc -l | tr -d ' ')
+    [ "$subdirs" -eq "$expect" ] && ok "$subdirs per-slide subdir(s)" \
+        || fail "$subdirs per-slide subdir(s), expected $expect"
+
+    # Each subdir must hold all required artifacts and a sane, parseable score.
+    local d f
+    for d in "$run_dir"/*/; do
+        [ -d "$d" ] || continue
+        for f in "${PER_SLIDE_FILES[@]}"; do
+            [ -s "$d$f" ] || fail "missing/empty artifact: $d$f"
+        done
+        local line
+        if line=$(parse_score "$d/tils_score.json"); then
+            ok "score sane for $(basename "$d"): $(echo "$line" | cut -f2)"
+        else
+            fail "bad TIL score in $d/tils_score.json"
+        fi
+    done
+}
+
+validate_run "$SINGLE_RUN" 1
+validate_run "$DIR_RUN" "$NUM_SLIDES"
+
+echo ""
+echo "============================================================"
+echo " Per-slide TIL scores (directory run):"
+DIR_RUN_DIR="$OUTPUT_DIR/$DIR_RUN"
+for d in "$DIR_RUN_DIR"/*/; do
+    [ -d "$d" ] || continue
+    line=$(parse_score "$d/tils_score.json" 2>/dev/null || true)
+    if [ -n "$line" ]; then
+        sid=$(echo "$line" | cut -f1); sc=$(echo "$line" | cut -f2)
+        printf "   %-60s TIL = %s (%.1f%%)\n" "$sid" "$sc" "$(python3 -c "print($sc*100)")"
+    fi
+done
+
+# Single, headline TIL score = the single-slide run.
+SINGLE_ID="$(basename "$SINGLE_SLIDE" .svs)"
+SINGLE_JSON="$OUTPUT_DIR/$SINGLE_RUN/$SINGLE_ID/tils_score.json"
+SINGLE_SCORE="$(python3 -c "import json;print(f\"{json.load(open('$SINGLE_JSON'))['til_score']:.4f}\")" 2>/dev/null || echo '?')"
+echo "============================================================"
+
+if [ "$FAIL" -eq 0 ]; then
+    echo "SMOKE TEST PASSED"
+    echo "TIL score (single-slide $SINGLE_ID): $SINGLE_SCORE   |   run dir: $OUTPUT_DIR/$SINGLE_RUN"
+    exit 0
+else
+    echo "SMOKE TEST FAILED  (see [FAIL] lines above)"
+    exit 1
+fi