Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
name: tests

on:
push:
pull_request:
workflow_dispatch:
inputs:
run_conda_validation:
description: "Create workflow conda envs and run package import checks"
required: false
default: false
type: boolean
run_container_validation:
description: "Pull workflow containers"
required: false
default: false
type: boolean
run_doubletfinder_install:
description: "Run the networked DoubletFinder GitHub install rule"
required: false
default: false
type: boolean

jobs:
pytest:
runs-on: ubuntu-latest
defaults:
run:
shell: bash -el {0}
env:
SNAKEMAKE_CONDA_FRONTEND: mamba
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Create test environment
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
cache-environment: true
cache-downloads: true

- name: Cache Snakemake rule environments
uses: actions/cache@v4
with:
path: .snakemake/conda
key: snakemake-conda-${{ runner.os }}-${{ hashFiles('workflow/envs/*.yml') }}
restore-keys: |
snakemake-conda-${{ runner.os }}-

- name: Run tests
run: pytest tests -q

conda-validation:
if: github.event_name == 'workflow_dispatch' && inputs.run_conda_validation
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
env_name:
- cellbender.yml
- doubletfinder.yml
- emptydrops.yml
- posthocfilter.yml
- scdblfinder.yml
- soupx.yml
- tenx2seuratrds.yml
defaults:
run:
shell: bash -el {0}
env:
SNAKEMAKE_CONDA_FRONTEND: mamba
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Create test environment
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
cache-environment: true
cache-downloads: true

- name: Run conda validation for ${{ matrix.env_name }}
run: pytest tests/test_conda_container_validation.py --run-conda-validation --conda-env-name "${{ matrix.env_name }}" -q

container-validation:
if: github.event_name == 'workflow_dispatch' && inputs.run_container_validation
runs-on: ubuntu-latest
defaults:
run:
shell: bash -el {0}
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Create test environment
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
cache-environment: true
cache-downloads: true

- name: Run container validation
run: pytest tests/test_conda_container_validation.py --run-container-validation -q

doubletfinder-install:
if: github.event_name == 'workflow_dispatch' && inputs.run_doubletfinder_install
runs-on: ubuntu-latest
defaults:
run:
shell: bash -el {0}
env:
SNAKEMAKE_CONDA_FRONTEND: mamba
steps:
- name: Check out repository
uses: actions/checkout@v4

- name: Create test environment
uses: mamba-org/setup-micromamba@v2
with:
environment-file: environment.yml
cache-environment: true
cache-downloads: true

- name: Run DoubletFinder install validation
run: pytest tests/test_conda_container_validation.py --run-doubletfinder-install -q
21 changes: 21 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
__pycache__/
*.py[cod]
.pytest_cache/
.cache/
Rplots.pdf
logs/
.snakemake/
.jupyter/
.vscode/
.ipython/
scratch/
*err
*out
results/
testdata/results/
# Keep golden test outputs trackable even though results/ is ignored.
!tests/reference_outputs/
!tests/reference_outputs/**
!tests/reference_outputs/testdata/
!tests/reference_outputs/testdata/results/
!tests/reference_outputs/testdata/results/**
1 change: 1 addition & 0 deletions config/config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
conda-channel-priority: "strict"

sampleTable: "samplesheet.tsv"
workflow_seed: 12345

emptydrop_removal_methods: ["tenx","emptydrops"]
ambient_decon_methods: ["soupx","cellbender_fromraw"]
Expand Down
13 changes: 13 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: scrnaseq-preprocessing-tests
channels:
- conda-forge
- bioconda
channel_priority: strict
dependencies:
- python=3.12
- pytest
- pyyaml
- snakemake=9.16.3
- conda
- mamba
- hdf5
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added testdata/raw_feature_bc_matrix.h5
Binary file not shown.
Binary file added testdata/raw_feature_bc_matrix/barcodes.tsv.gz
Binary file not shown.
Binary file added testdata/raw_feature_bc_matrix/features.tsv.gz
Binary file not shown.
Binary file added testdata/raw_feature_bc_matrix/matrix.mtx.gz
Binary file not shown.
2 changes: 2 additions & 0 deletions testdata/samplesheet_test.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
sampleid tenx_datadir
test testdata
64 changes: 64 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Tests

These tests are intended to run from the repository root in any environment where `snakemake` and `pytest` are available on `PATH`. How that environment is created or activated is site-specific; for example, an HPC may require loading a module before `conda activate` is available.

Example:

```bash
conda env create -f environment.yml
conda activate scrnaseq-preprocessing-tests
pytest tests
```

`python -m pytest tests` is also fine when `python` resolves to the same environment that provides `pytest` and `snakemake`. If it does not, the active shell is likely resolving commands from different environments. Confirm with:

```bash
which python
which pytest
which snakemake
```

The dry-run DAG test uses the small CellRanger-style input data in `testdata/` and the test-specific sample sheet `testdata/samplesheet_test.tsv`. The tests override `sampleTable` on the Snakemake command line, so they do not use the normal workflow `samplesheet.tsv`. They do not require `--conda-prefix`, a cluster profile, or any site-specific paths. Sample-sheet validation tests cover malformed headers, duplicate sample IDs, missing or incomplete 10x data paths, absolute `tenx_datadir` values, and multi-sample DAG expansion. Config validation tests cover required keys, allowed method values, threshold types/ranges, invalid-config failures, and dry-run DAG variants driven by method-list config changes. The default suite also runs `snakemake --lint` against the test-data configuration, which catches workflow-structure problems such as missing `log:` directives, helper functions embedded in rule files, long `run:` blocks that should live in scripts, path-composition warnings, missing rule-level conda/container declarations, and shell commands that directly interpolate global workflow variables instead of passing values through `params`.

GitHub Actions runs the default test suite via `.github/workflows/tests.yml`, using the same top-level `environment.yml` test runner environment and caching Snakemake-created rule environments under `.snakemake/conda`.

The default test suite also includes a focused local rule-execution smoke test. It runs the real `tenx2seuratrds`, `find_markers`, and `combine_markers` rule chain against `testdata/`, using Snakemake's `--use-conda` support and writing outputs under pytest's temporary directory. This catches broken R package imports, script argument drift, invalid Seurat object creation, and marker CSV schema changes without submitting to SLURM.

The R output validator also checks that the Seurat object has at least 100 features and 100 cells; metadata rows match the cell count; barcode row names are present, unique, and nonempty; `orig.ident`, `nCount_RNA`, `nFeature_RNA`, `percent.mt`, and `seurat_clusters` metadata columns exist; RNA count and feature-count metadata values are finite and positive; mitochondrial percentages are finite and within `[0, 100]`; at least two clusters are present; PCA and UMAP reductions exist; the marker table is nonempty and has the expected columns; marker gene symbols are present and nonempty; marker numeric columns are finite; marker p-value and percent columns are within `[0, 1]`; marker clusters are present in the Seurat metadata; markers are reported for at least two clusters; and the marker `workflow` column matches the expected test workflow label. The test runner environment is defined in the repository-level `environment.yml`; the rule-specific R environment is still created by Snakemake from `workflow/envs/tenx2seuratrds.yml`. A separate lightweight checkpoint-expansion test uses a fake `Rscript` to materialize the `marker_manifest` checkpoint, verify that dynamic `find_markers` jobs are generated for each cluster id, and confirm that `combine_markers` receives the expected marker chunks.


## Optional conda and container validation

The default suite validates that workflow conda environment files are well formed, that rule-level conda references resolve to existing files, and that container declarations are recognizable. Expensive network-dependent validation is opt-in:

```bash
pytest tests --run-conda-validation
pytest tests --run-container-validation
pytest tests --run-doubletfinder-install
```

`--run-conda-validation` creates each `workflow/envs/*.yml` environment in a pytest temporary directory and checks key R/Python package imports. To validate only one environment, pass `--conda-env-name ENV_FILE`, for example `--conda-env-name soupx.yml`; GitHub Actions uses this selector to run conda validation as one matrix job per env. `--run-container-validation` pulls the CellBender container with Docker, Apptainer, or Singularity. `--run-doubletfinder-install` runs the networked Snakemake `install_doubletfinder` rule and confirms that `DoubletFinder` can be imported from the created rule environment.


## Optional full workflow run

The default tests build and inspect the DAG and run a focused local R-rule smoke test. To submit the full test-data workflow and verify all declared outputs against the reference snapshot, opt in explicitly:

```bash
pytest tests --run-workflow
```

The full-run test calls `tests/run_test_workflow.sh`, which uses `testdata/samplesheet_test.tsv` and overrides the workflow output directory with `resultsDir=testdata/results`. The manifest in `tests/test_sample_rule_output_files.txt` is therefore written with paths under `testdata/results/`.

For testing, omit `--snakemake-conda-prefix` so Snakemake uses its default `.snakemake/conda` location under the repository root. The runner assumes that the current environment already provides `snakemake` on `PATH`.


## Reference outputs

The full workflow test compares regenerated files in `testdata/results/` against reference files under `tests/reference_outputs/`. The compared file list is in `tests/test_reference_output_files.txt`. Seurat `.rds` files are compared at the metadata-table level, marker CSVs are compared by columns and `(cluster, genesymbol)` rows with numeric tolerance, emptyDrops matrix files are compared after gzip decompression, CellBender H5 outputs are compared with `h5diff`, and remaining durable outputs are compared byte-for-byte.

To refresh the reference snapshot after intentionally changing workflow behavior, first run the full test workflow so `testdata/results/` contains the desired outputs, then run:

```bash
python tests/update_reference_outputs.py
```
97 changes: 97 additions & 0 deletions tests/compare_seurat_metadata.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
args <- commandArgs(trailingOnly = TRUE)
if (length(args) == 0 || length(args) %% 2 != 0) {
stop("Provide current/reference RDS path pairs", call. = FALSE)
}

suppressPackageStartupMessages(library("Seurat"))

numeric_tolerance <- as.numeric(Sys.getenv("SEURAT_METADATA_NUMERIC_TOLERANCE", "1e-8"))
if (is.na(numeric_tolerance) || length(numeric_tolerance) != 1) {
stop("SEURAT_METADATA_NUMERIC_TOLERANCE must be numeric", call. = FALSE)
}

fail <- function(path, message) {
stop(sprintf("%s: %s", path, message), call. = FALSE)
}

compare_metadata_column <- function(current_path, column, current_values, reference_values) {
current_na <- is.na(current_values)
reference_na <- is.na(reference_values)
if (!identical(current_na, reference_na)) {
fail(current_path, sprintf("metadata column %s has different NA positions", column))
}

both_numeric <- is.numeric(current_values) && is.numeric(reference_values)
both_integer <- is.integer(current_values) && is.integer(reference_values)
if (both_numeric || both_integer) {
current_numeric <- as.numeric(current_values)
reference_numeric <- as.numeric(reference_values)
comparable <- !current_na & !reference_na
if (any(comparable)) {
diff <- abs(current_numeric[comparable] - reference_numeric[comparable])
scale <- pmax(abs(current_numeric[comparable]), abs(reference_numeric[comparable]), 1)
bad <- diff > numeric_tolerance * scale
if (any(bad)) {
fail(
current_path,
sprintf(
"metadata column %s differs numerically; max abs diff %.12g",
column,
max(diff)
)
)
}
}
return(invisible(TRUE))
}

current_character <- as.character(current_values)
reference_character <- as.character(reference_values)
comparable <- !current_na & !reference_na
if (!identical(current_character[comparable], reference_character[comparable])) {
fail(current_path, sprintf("metadata column %s differs", column))
}

invisible(TRUE)
}

compare_metadata_pair <- function(current_path, reference_path) {
current <- readRDS(current_path)
reference <- readRDS(reference_path)

current_metadata <- current@meta.data
reference_metadata <- reference@meta.data

if (!identical(colnames(current_metadata), colnames(reference_metadata))) {
fail(
current_path,
paste0(
"metadata columns differ\ncurrent: ",
paste(colnames(current_metadata), collapse = ","),
"\nreference: ",
paste(colnames(reference_metadata), collapse = ",")
)
)
}

if (!identical(rownames(current_metadata), rownames(reference_metadata))) {
fail(current_path, "metadata barcodes differ")
}

for (column in colnames(current_metadata)) {
compare_metadata_column(
current_path,
column,
current_metadata[[column]],
reference_metadata[[column]]
)
}

invisible(TRUE)
}

for (i in seq(1, length(args), by = 2)) {
compare_metadata_pair(args[[i]], args[[i + 1]])
}

cat(sprintf("Compared Seurat metadata for %d object(s)\n", length(args) / 2))
36 changes: 36 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
def pytest_addoption(parser):
parser.addoption(
"--run-workflow",
action="store_true",
default=False,
help="run the full Snakemake workflow on the test data",
)
parser.addoption(
"--snakemake-conda-prefix",
default=None,
help="optional value to pass to Snakemake --conda-prefix for --run-workflow",
)
parser.addoption(
"--run-conda-validation",
action="store_true",
default=False,
help="create workflow conda environments and validate key package imports",
)
parser.addoption(
"--conda-env-name",
default=None,
help="optional workflow/envs/*.yml filename to validate with --run-conda-validation",
)
parser.addoption(
"--run-container-validation",
action="store_true",
default=False,
help="pull workflow containers with docker, apptainer, or singularity",
)
parser.addoption(
"--run-doubletfinder-install",
action="store_true",
default=False,
help="execute the DoubletFinder GitHub install rule on test outputs",
)

Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Loading