Reed-CompBio · tristan-f-r · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025 · Jul 16, 2025
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
 
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3

diff --git a/.github/workflows/test-spras.yml b/.github/workflows/test-spras.yml
@@ -12,13 +12,13 @@ jobs:
         os: [macos-latest, windows-latest]
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v2
+        uses: actions/checkout@v4
       - name: Install conda environment
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v4
         with:
           activate-environment: spras
           environment-file: environment.yml
-          auto-activate-base: false
+          auto-activate: false
           miniconda-version: 'latest'
       - name: Log conda environment
         # Log conda environment contents
@@ -49,18 +49,22 @@ jobs:
           sudo docker image prune --all --force
           sudo docker builder prune -a
       - name: Install conda environment
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v4
         with:
           activate-environment: spras
           environment-file: environment.yml
-          auto-activate-base: false
+          auto-activate: false
           miniconda-version: 'latest'
       - name: Install spras in conda env
         # Install spras in the environment using pip
         shell: bash --login {0}
         run: pip install .
-      - name: Log conda environment
-        # Log conda environment contents
+      - name: Get pipx
+        shell: bash --login {0}
+        run: pip install pipx
+      - shell: bash --login {0}
+        run: pipx install .
+      - name: Log conda environment contents
         shell: bash --login {0}
         run: conda list
       - name: Install Apptainer
@@ -85,11 +89,11 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v4
       - name: Install conda environment
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v4
         with:
           activate-environment: spras
           environment-file: environment.yml
-          auto-activate-base: false
+          auto-activate: false
           miniconda-version: 'latest'
       # Install spras in the environment using pip
       - name: Install spras in conda env
@@ -100,7 +104,7 @@ jobs:
         # We enable high parallelization (cores 4) to test our way out of the experienced
         # race conditions from #268 and #279
         # We also enforce strict DAG evaluation to catch DAG problems before they appear as user errors. (#359)
-        run: snakemake --cores 4 --configfile config/config.yaml --show-failed-logs --strict-dag-evaluation cyclic-graph --strict-dag-evaluation functions --strict-dag-evaluation periodic-wildcards
+        run: spras run --cores 4 --configfile config/config.yaml --show-failed-logs --strict-dag-evaluation cyclic-graph --strict-dag-evaluation functions --strict-dag-evaluation periodic-wildcards
 
   # Run pre-commit checks on source files
   pre-commit:
@@ -110,11 +114,11 @@ jobs:
       - name: Checkout repository
         uses: actions/checkout@v3
       - name: Install conda environment
-        uses: conda-incubator/setup-miniconda@v2
+        uses: conda-incubator/setup-miniconda@v4
         with:
           activate-environment: spras
           environment-file: environment.yml
-          auto-activate-base: false
+          auto-activate: false
           miniconda-version: 'latest'
       - name: Run pre-commit
         shell: bash --login {0}

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 default_language_version:
   # Match this to the version specified in environment.yml
-  python: python3.11
+  python: python3.13
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.4.0 # Use the ref you want to point at

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -8,7 +8,7 @@ version: 2
 build:
   os: ubuntu-22.04
   tools:
-    python: "3.11"
+    python: "3.13"
 
 # Build documentation in the "docs/" directory with Sphinx
 sphinx:

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1,4 @@
+include README.md
+include LICENSE
+include Snakefile
 include spras/cgroup_wrapper.sh
diff --git a/README.md b/README.md
@@ -53,7 +53,7 @@ After installing Docker, start Docker before running SPRAS.
 Once you have activated the conda environment and started Docker, you can run SPRAS with the example Snakemake workflow.
 From the root directory of the `spras` repository, run the command
 ```
-snakemake --cores 1 --configfile config/config.yaml
+spras run --cores 1 --configfile config/config.yaml
 ```
 This will run the SPRAS workflow with the example config file (`config/config.yaml`) and input files.
 Output files will be written to the `output` directory.

diff --git a/Snakefile b/Snakefile
@@ -1,12 +1,16 @@
 import os
 from spras import runner
 import shutil
+import json
 import yaml
+from pathlib import Path
+from spras.containers import TimeoutError
 from spras.dataset import Dataset
 from spras.evaluation import Evaluation
 from spras.analysis import ml, summary, cytoscape
 from spras.config.revision import detach_spras_revision
 import spras.config.config as _config
+from spras.errors import mark_error, mark_success, is_error, TimeoutArtifactError, FailedDependencyError
 
 # Snakemake updated the behavior in the 6.5.0 release https://github.com/snakemake/snakemake/pull/1037
 # and using the wrong separator prevents Snakemake from matching filenames to the rules that can produce them
@@ -44,11 +48,14 @@ def algo_has_mult_param_combos(algo):
 
 algorithms_mult_param_combos = [algo for algo in algorithms if algo_has_mult_param_combos(algo)]
 
+# Gets the associated parameter hash out of a params wildcard
+def params_index(params_hash):
+    return params_hash.replace('params-', '')
+
 # Get the parameter dictionary for the specified
 # algorithm and parameter combination hash
 def reconstruction_params(algorithm, params_hash):
-    index = params_hash.replace('params-', '')
-    return algorithm_params[algorithm][index]
+    return algorithm_params[algorithm][params_index(params_hash)]
 
 # Log the parameter dictionary for this parameter configuration in a yaml file
 def write_parameter_log(algorithm, param_label, logfile):
@@ -262,33 +269,82 @@ def collect_prepared_input(wildcards):
 
     return prepared_inputs
 
+def collect_dependent_artifact_info(wildcards):
+    # Get the associated runs that this run depends on
+    dependent_runs = _config.config.conditional_run_dependencies[params_index(wildcards.params)]
+    return [
+        SEP.join([out_dir, f'{wildcards.dataset}-{wildcards.algorithm}-params-{params}', 'artifact-log.json'])
+        for params in dependent_runs
+    ]
+
+def filter_successful(files):
+    """Convenient function for filtering iterators by whether or not their items are error files."""
+    return [file for file in files if not is_error(file)]
+
+def filter_error(files):
+    """This function is precisely described as the list difference of `files` and `filter_successful(files)`"""
+    return [file for file in files if is_error(file)]
+
 # Run the pathway reconstruction algorithm
 rule reconstruct:
-    input: collect_prepared_input
+    input:
+        prepared_files=collect_prepared_input,
+        required_artifact_info=collect_dependent_artifact_info
     # Each reconstruct call should be in a separate output subdirectory that is unique for the parameter combination so
     # that multiple instances of the container can run simultaneously without overwriting the output files
     # Overwriting files can happen because the pathway reconstruction algorithms often generate output files with the
     # same name regardless of the inputs or parameters, and these aren't renamed until after the container command
     # terminates
-    output: pathway_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt'])
+    output:
+        pathway_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']),
+        # Despite this being a 'log' file, we don't use the log directive as this rule doesn't actually throw errors.
+        artifact_info = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'artifact-log.json'])
+    params:
+        # Get the timeout from the config and use it as an input.
+        # TODO: This has unexpected behavior when this rule succeeds but the timeout extends,
+        # making this rule run again.
+        timeout = lambda wildcards: _config.config.algorithm_param_run_settings[params_index(wildcards.params)].timeout
     run:
+        successful_runs = filter_successful(input.required_artifact_info)
+        errorful_runs = filter_error(input.required_artifact_info)
+        # NOTE: conditionals happen as a big OR statement, so we are looking for at least one success.
+        if len(successful_runs) == 0 and len(errorful_runs) != 0:
+            # We don't raise the error here (analogous to `--keep-going`, except we avoid unnecessarily re-running this rule.)
+            mark_error(output.artifact_info, FailedDependencyError(failing_dependencies=errorful_runs))
+            # and we touch pathway_file still: Snakemake doesn't have optional files, so we output a 'artifact info' file,
+            # which contains the status (success/failure) of specific Snakemake jobs.
+            # We filter for the successful files (such as ones that didn't time out) with the `filter_successful` function.  
+            Path(output.pathway_file).touch()
+
         # Create a copy so that the updates are not written to the parameters logfile
-        params = reconstruction_params(wildcards.algorithm, wildcards.params).copy()
+        algorithm_params = reconstruction_params(wildcards.algorithm, wildcards.params).copy()
         # Declare the input files as a dictionary.
-        inputs = dict(zip(runner.get_required_inputs(detach_spras_revision(_config.config.immutable_files, wildcards.algorithm)), *{input}, strict=True))
+        inputs = dict(zip(runner.get_required_inputs(detach_spras_revision(_config.config.immutable_files, wildcards.algorithm)), *{input.prepared_files}, strict=True))
         # Remove the _spras_run_name parameter added for keeping track of the run name for parameters.yml
-        if '_spras_run_name' in params:
-            params.pop('_spras_run_name')
-        runner.run(detach_spras_revision(_config.config.immutable_files, wildcards.algorithm), inputs, output.pathway_file, params, container_settings)
+        if '_spras_run_name' in algorithm_params:
+            algorithm_params.pop('_spras_run_name')
+        try:
+            runner.run(detach_spras_revision(_config.config.immutable_files, wildcards.algorithm), inputs, output.pathway_file, algorithm_params, container_settings, params.timeout)
+            mark_success(output.artifact_info)
+        except TimeoutError as err:
+            # See the above notes on conditional runs for precisely why we write this as is.
+            mark_error(output.artifact_info, TimeoutArtifactError(duration=params.timeout))
+            Path(output.pathway_file).touch()
 
 # Original pathway reconstruction output to universal output
 # Use PRRunner as a wrapper to call the algorithm-specific parse_output
 rule parse_output:
     input:
-        raw_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'raw-pathway.txt']),
+        # We propagate up the artifact_info error if it exists.
+        artifact_info = rules.reconstruct.output.artifact_info,
+        raw_file = rules.reconstruct.output.pathway_file,
         dataset_file = SEP.join([out_dir, 'dataset-{dataset}-merged.pickle'])
     output: standardized_file = SEP.join([out_dir, '{dataset}-{algorithm}-{params}', 'pathway.txt'])
     run:
+        if is_error(input.artifact_info):
+            mark_error(output.standardized_file)
+            return
+
         params = reconstruction_params(wildcards.algorithm, wildcards.params).copy()
         params['dataset'] = input.dataset_file
         runner.parse_output(detach_spras_revision(_config.config.immutable_files, wildcards.algorithm), input.raw_file, output.standardized_file, params)
@@ -310,7 +366,7 @@ rule viz_cytoscape:
     output: 
         session = SEP.join([out_dir, '{dataset}-cytoscape.cys'])
     run:
-        cytoscape.run_cytoscape(input.pathways, output.session, container_settings)
+        cytoscape.run_cytoscape(filter_successful(input.pathways), output.session, container_settings)
 
 
 # Write a single summary table for all pathways for each dataset
@@ -323,7 +379,7 @@ rule summary_table:
     run:
         # Load the node table from the pickled dataset file
         node_table = Dataset.from_file(input.dataset_file).node_table
-        summary_df = summary.summarize_networks(input.pathways, node_table, algorithm_params, algorithms_with_params)
+        summary_df = summary.summarize_networks(filter_successful(input.pathways), node_table, algorithm_params, algorithms_with_params)
         summary_df.to_csv(output.summary_table, sep='\t', index=False)
 
 # Cluster the output pathways for each dataset
@@ -339,7 +395,7 @@ rule ml_analysis:
         hac_image_horizontal = SEP.join([out_dir, '{dataset}-ml', 'hac-horizontal.png']),
         hac_clusters_horizontal = SEP.join([out_dir, '{dataset}-ml', 'hac-clusters-horizontal.txt']),
     run: 
-        summary_df = ml.summarize_networks(input.pathways)
+        summary_df = ml.summarize_networks(filter_successful(input.pathways))
         ml.hac_vertical(summary_df, output.hac_image_vertical, output.hac_clusters_vertical, **hac_params)
         ml.hac_horizontal(summary_df, output.hac_image_horizontal, output.hac_clusters_horizontal, **hac_params)
         ml.pca(summary_df, output.pca_image, output.pca_variance, output.pca_coordinates, **pca_params)
@@ -353,7 +409,7 @@ rule jaccard_similarity:
         jaccard_similarity_matrix = SEP.join([out_dir, '{dataset}-ml', 'jaccard-matrix.txt']),
         jaccard_similarity_heatmap = SEP.join([out_dir, '{dataset}-ml', 'jaccard-heatmap.png'])
     run:
-        summary_df = ml.summarize_networks(input.pathways)
+        summary_df = ml.summarize_networks(filter_successful(input.pathways))
         ml.jaccard_similarity_eval(summary_df, output.jaccard_similarity_matrix, output.jaccard_similarity_heatmap)
 
 
@@ -364,7 +420,7 @@ rule ensemble:
     output:
         ensemble_network_file = SEP.join([out_dir,'{dataset}-ml', 'ensemble-pathway.txt'])
     run:
-        summary_df = ml.summarize_networks(input.pathways)
+        summary_df = ml.summarize_networks(filter_successful(input.pathways))
         ml.ensemble_network(summary_df, output.ensemble_network_file)
 
 # Returns all pathways for a specific algorithm
@@ -386,7 +442,7 @@ rule ml_analysis_aggregate_algo:
         hac_image_horizontal = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-horizontal.png']),
         hac_clusters_horizontal = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-hac-clusters-horizontal.txt']),
     run:
-        summary_df = ml.summarize_networks(input.pathways)
+        summary_df = ml.summarize_networks(filter_successful(input.pathways))
         ml.hac_vertical(summary_df, output.hac_image_vertical, output.hac_clusters_vertical, **hac_params)
         ml.hac_horizontal(summary_df, output.hac_image_horizontal, output.hac_clusters_horizontal, **hac_params)
         ml.pca(summary_df, output.pca_image, output.pca_variance, output.pca_coordinates, **pca_params)
@@ -398,7 +454,7 @@ rule ensemble_per_algo:
     output:
         ensemble_network_file = SEP.join([out_dir,'{dataset}-ml', '{algorithm}-ensemble-pathway.txt'])
     run:
-        summary_df = ml.summarize_networks(input.pathways)
+        summary_df = ml.summarize_networks(filter_successful(input.pathways))
         ml.ensemble_network(summary_df, output.ensemble_network_file)
 
 # Calculated Jaccard similarity between output pathways for each dataset per algorithm
@@ -409,7 +465,7 @@ rule jaccard_similarity_per_algo:
         jaccard_similarity_matrix = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-matrix.txt']),
         jaccard_similarity_heatmap = SEP.join([out_dir, '{dataset}-ml', '{algorithm}-jaccard-heatmap.png'])
     run:
-        summary_df = ml.summarize_networks(input.pathways)
+        summary_df = ml.summarize_networks(filter_successful(input.pathways))
         ml.jaccard_similarity_eval(summary_df, output.jaccard_similarity_matrix, output.jaccard_similarity_heatmap)
 
 # Return the gold standard pickle file for a specific gold standard
@@ -440,7 +496,7 @@ rule evaluation_pr_per_pathways:
         node_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-nodes.png']),
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
-        pr_df = Evaluation.node_precision_and_recall(input.pathways, node_table)
+        pr_df = Evaluation.node_precision_and_recall(filter_successful(input.pathways), node_table)
         Evaluation.precision_and_recall_per_pathway(pr_df, output.node_pr_file, output.node_pr_png)
 
 # Returns all pathways for a specific algorithm and dataset
@@ -459,7 +515,7 @@ rule evaluation_per_algo_pr_per_pathways:
         node_pr_png = SEP.join([out_dir, '{dataset_gold_standard_pair}-eval', 'pr-per-pathway-for-{algorithm}-nodes.png']),
     run:
         node_table = Evaluation.from_file(input.node_gold_standard_file).node_table
-        pr_df = Evaluation.node_precision_and_recall(input.pathways, node_table)
+        pr_df = Evaluation.node_precision_and_recall(filter_successful(input.pathways), node_table)
         Evaluation.precision_and_recall_per_pathway(pr_df, output.node_pr_file, output.node_pr_png, include_aggregate_algo_eval)
 
 # Return pathway summary file per dataset