From b18a31e444be382bac84b7a443a174161214cceb Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Thu, 9 Apr 2026 20:58:49 +0200 Subject: [PATCH 1/3] Add TFlink support --- CITATIONS.md | 4 + conf/igenomes.config | 18 ++ conf/modules.config | 8 + docs/output.md | 6 + main.nf | 3 + .../report/preprocess/templates/preprocess.py | 64 ++++- modules/local/tflink/annotate/environment.yml | 6 + modules/local/tflink/annotate/main.nf | 45 ++++ .../tflink/annotate/templates/annotate.py | 243 ++++++++++++++++++ .../local/tflink/annotate/tests/main.nf.test | 87 +++++++ .../tflink/annotate/tests/main.nf.test.snap | 164 ++++++++++++ nextflow_schema.json | 10 + .../utils_nfcore_tfactivity_pipeline/main.nf | 4 + workflows/tfactivity.nf | 26 +- 14 files changed, 682 insertions(+), 6 deletions(-) create mode 100644 modules/local/tflink/annotate/environment.yml create mode 100644 modules/local/tflink/annotate/main.nf create mode 100644 modules/local/tflink/annotate/templates/annotate.py create mode 100644 modules/local/tflink/annotate/tests/main.nf.test create mode 100644 modules/local/tflink/annotate/tests/main.nf.test.snap diff --git a/CITATIONS.md b/CITATIONS.md index 606579b..eacaa0b 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -50,6 +50,10 @@ > Tremblay, B. J., (2024). universalmotif: An R package for biological motif analysis. Journal of Open Source Software, 9(100), 701 +- [TFLink](https://doi.org/10.1093/database/baac083) + + > Fekete T, Gyorffy B. TFLink: an integrated gateway to access transcription factor-target gene interactions for multiple species. Database (Oxford). 2022;2022:baac083. + - [SNEEP](https://doi.org/10.1016/j.isci.2024.109765) > Baumgarten N, Ebert P, Schmidt F, Kern F, Schulz MH. A statistical approach for identifying single nucleotide variants that affect transcription factor binding. iScience, Volume 27, Issue 5, 109765 diff --git a/conf/igenomes.config b/conf/igenomes.config index c980d52..a7ce056 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -16,6 +16,7 @@ params { gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" mito_name = "MT" taxon_id = 9606 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz" blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" } GRCh38 { @@ -23,6 +24,7 @@ params { gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" mito_name = "chrM" taxon_id = 9606 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" sneep_scale_file = "${projectDir}/assets/sneep_scale_human_817.txt" sneep_motif_file = "${projectDir}/assets/sneep_transfac_human_817.txt" @@ -32,12 +34,14 @@ params { gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" mito_name = "chrM" taxon_id = 9606 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz" } GRCm38 { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" mito_name = "MT" taxon_id = 10090 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Mus_musculus_interactions_All_simpleFormat_v1.0.tsv.gz" blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" mito_name = "Mt" } @@ -58,6 +62,7 @@ params { mito_name = "MtDNA" macs_gsize = "9e7" taxon_id = 6239 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Caenorhabditis_elegans_interactions_All_simpleFormat_v1.0.tsv" } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" @@ -70,6 +75,7 @@ params { gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" mito_name = "MT" taxon_id = 7955 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Danio_rerio_interactions_All_simpleFormat_v1.0.tsv" } BDGP6 { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" @@ -77,6 +83,7 @@ params { mito_name = "M" macs_gsize = "1.2e8" taxon_id = 7227 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Drosophila_melanogaster_interactions_All_simpleFormat_v1.0.tsv" } EquCab2 { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" @@ -123,18 +130,21 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" mito_name = "MT" taxon_id = 10116 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Rattus_norvegicus_interactions_All_simpleFormat_v1.0.tsv" } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" mito_name = "MT" taxon_id = 10116 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Rattus_norvegicus_interactions_All_simpleFormat_v1.0.tsv" } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" mito_name = "MT" taxon_id = 559292 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Saccharomyces_cerevisiae_interactions_All_simpleFormat_v1.0.tsv" } EF2 { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" @@ -164,6 +174,7 @@ params { gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" mito_name = "chrM" taxon_id = 9606 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz" blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" snps = "https://zenodo.org/records/15090556/files/dbSNP_hg38.bed.gz" sneep_scale_file = "${projectDir}/assets/sneep_scale_human_817.txt" @@ -174,6 +185,7 @@ params { gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" mito_name = "chrM" taxon_id = 9606 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Homo_sapiens_interactions_All_simpleFormat_v1.0.tsv.gz" blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" } mm10 { @@ -181,6 +193,7 @@ params { gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" mito_name = "chrM" taxon_id = 10090 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Mus_musculus_interactions_All_simpleFormat_v1.0.tsv.gz" blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" snps = "https://zenodo.org/records/15090556/files/dbSNP_mm10.bed.gz" sneep_scale_file = "${projectDir}/assets/sneep_scale_mouse_218.txt" @@ -198,6 +211,7 @@ params { mito_name = "chrM" macs_gsize = "9e7" taxon_id = 6239 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Caenorhabditis_elegans_interactions_All_simpleFormat_v1.0.tsv" } canFam3 { fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" @@ -211,12 +225,14 @@ params { mito_name = "chrM" macs_gsize = "1.37e9" taxon_id = 7955 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Danio_rerio_interactions_All_simpleFormat_v1.0.tsv" } dm6 { fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" mito_name = "chrM" taxon_id = 7227 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Drosophila_melanogaster_interactions_All_simpleFormat_v1.0.tsv" } equCab2 { fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" @@ -241,12 +257,14 @@ params { gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" mito_name = "chrM" taxon_id = 10116 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Rattus_norvegicus_interactions_All_simpleFormat_v1.0.tsv" } sacCer3 { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" mito_name = "chrM" macs_gsize = "1.2e7" taxon_id = 559292 + tflink_file = "https://cdn.netbiol.org/tflink/download_files/TFLink_Saccharomyces_cerevisiae_interactions_All_simpleFormat_v1.0.tsv" } susScr3 { fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" diff --git a/conf/modules.config b/conf/modules.config index 954e266..ea41529 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -555,6 +555,14 @@ process { ] } + withName: ".*:TFACTIVITY:TFLINK_ANNOTATE" { + publishDir = [ + path: { "${params.outdir}/05_ranking/07_tflink_annotation" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + /* FIMO */ diff --git a/docs/output.md b/docs/output.md index 2114514..2988157 100644 --- a/docs/output.md +++ b/docs/output.md @@ -278,6 +278,11 @@ The ranking outputs provide the primary results of the pipeline: prioritized lis - `.tg_ranking.tsv`: TG ranking matrices per assay (COMBINE_TGS_PER_ASSAY). - `06_combined_tgs_across_assays/` - `all.tsv`: TG ranking matrices combined across assays (COMBINE_TGS_ACROSS_ASSAYS). + - `07_tflink_annotation/` (only if `--tflink_file` is provided directly or via `--genome`) + - `.tf_ranking.tsv`: TF rankings with TFLink support columns (`tflink_supported`, `tflink_supported_edges`, `tflink_total_edges`, `tflink_support_rate`). + - `.tg_ranking.tsv`: Unchanged TG ranking matrix copied for traceable side-by-side usage with TFLink annotations. + - `.tflink_edges.tsv`: Edge-level TFLink support table with per TF-target support status and evidence metadata. + - `.tflink_summary.tsv`: Per-assay support summary for annotated edges. @@ -387,6 +392,7 @@ This final step generates a comprehensive, interactive HTML report that consolid - Binding affinity predictions from STARE - Regulatory coefficients from DYNAMITE analysis - Motif information and binding site predictions +- TFLink evidence summaries and per-TF support metrics when TFLink annotation is enabled **Distribution Formats**: Results are provided in two convenient formats: diff --git a/main.nf b/main.nf index a329f5b..fe62035 100644 --- a/main.nf +++ b/main.nf @@ -22,6 +22,7 @@ params.taxon_id = getGenomeAttribute('taxon_id') params.snps = getGenomeAttribute('snps') params.sneep_scale_file = getGenomeAttribute('sneep_scale_file') params.sneep_motif_file = getGenomeAttribute('sneep_motif_file') +params.tflink_file = getGenomeAttribute('tflink_file') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -63,6 +64,7 @@ workflow NFCORE_TFACTIVITY { snps = params.snps ? file(params.snps, checkIfExists: true) : null sneep_scale_file = params.sneep_scale_file ? file(params.sneep_scale_file, checkIfExists: true) : null sneep_motif_file = params.sneep_motif_file ? file(params.sneep_motif_file, checkIfExists: true) : null + tflink_file = params.tflink_file ? file(params.tflink_file, checkIfExists: true) : null // // SUBWORKFLOW: Prepare genome @@ -113,6 +115,7 @@ workflow NFCORE_TFACTIVITY { params.dynamite_randomize, params.alpha, snps, + tflink_file, ch_versions, params.skip_fimo, params.skip_sneep, diff --git a/modules/local/report/preprocess/templates/preprocess.py b/modules/local/report/preprocess/templates/preprocess.py index 6703d13..9ba86b8 100644 --- a/modules/local/report/preprocess/templates/preprocess.py +++ b/modules/local/report/preprocess/templates/preprocess.py @@ -10,7 +10,13 @@ from gtfparse import read_gtf # Constants -OVERVIEW_TEMPLATE = {"dcg": {}, "regression_coefficients": {}, "differential_expression": {}, "tpm": {}} +OVERVIEW_TEMPLATE = { + "dcg": {}, + "regression_coefficients": {}, + "differential_expression": {}, + "tpm": {}, + "tflink": {}, +} TF_TEMPLATE = { "target_genes": {}, "differential_expression": {}, @@ -20,6 +26,7 @@ "tpm": {}, "counts": {}, "fimo_binding_sites": {}, + "tflink": {}, } def remove_motif_id(tf): @@ -72,11 +79,30 @@ def init_tf_data(tf, tfs): def process_ranking_data(paths, overview, tfs): """Process TF ranking and target gene data.""" + tflink_summary = {} + for file in paths['tf_ranking_dir'].glob("*.tf_ranking.tsv"): - assay = file.stem.split(".")[0] + file_name = file.name + if file_name.endswith(".tflink.tf_ranking.tsv"): + assay = file_name.removesuffix(".tflink.tf_ranking.tsv") + tg_filename = file_name.replace(".tflink.tf_ranking.tsv", ".tflink.tg_ranking.tsv") + else: + assay = file_name.removesuffix(".tf_ranking.tsv") + tg_filename = file_name.replace(".tf_ranking.tsv", ".tg_ranking.tsv") df_tf = pd.read_csv(file, sep="\\t", index_col=0) - df_tg = pd.read_csv(paths['tg_ranking_dir'] / f"{assay}.tg_ranking.tsv", sep="\\t", index_col=0) + df_tg = pd.read_csv(paths['tg_ranking_dir'] / tg_filename, sep="\\t", index_col=0) + has_tflink_columns = all( + column in df_tf.columns + for column in ["tflink_supported_edges", "tflink_total_edges", "tflink_support_rate"] + ) + + if has_tflink_columns and assay not in tflink_summary: + tflink_summary[assay] = { + "supported_edges": 0, + "total_edges": 0, + "support_rate": 0.0, + } # Process all TFs from this assay for tf, dcg_score in df_tf["dcg"].items(): @@ -90,6 +116,29 @@ def process_ranking_data(paths, overview, tfs): # Store target genes tfs[tf]["target_genes"][assay] = df_tg[tf].to_dict() + if has_tflink_columns: + supported_edges = int(df_tf.loc[tf, "tflink_supported_edges"]) if pd.notna(df_tf.loc[tf, "tflink_supported_edges"]) else 0 + total_edges = int(df_tf.loc[tf, "tflink_total_edges"]) if pd.notna(df_tf.loc[tf, "tflink_total_edges"]) else 0 + support_rate = float(df_tf.loc[tf, "tflink_support_rate"]) if pd.notna(df_tf.loc[tf, "tflink_support_rate"]) else 0.0 + + tflink_data = { + "supported_edges": supported_edges, + "total_edges": total_edges, + "support_rate": support_rate, + } + overview[tf]["tflink"][assay] = tflink_data + tfs[tf]["tflink"][assay] = tflink_data + + tflink_summary[assay]["supported_edges"] += supported_edges + tflink_summary[assay]["total_edges"] += total_edges + + for assay in tflink_summary: + total_edges = tflink_summary[assay]["total_edges"] + supported_edges = tflink_summary[assay]["supported_edges"] + tflink_summary[assay]["support_rate"] = (float(supported_edges) / float(total_edges)) if total_edges else 0.0 + + return tflink_summary + def process_differential_expression(paths, overview, tfs): """Process differential expression data.""" pairings = set() @@ -335,6 +384,9 @@ def clean_empty_data(overview, tfs): # Remove empty target_genes if not tfs[tf]["target_genes"]: del tfs[tf]["target_genes"] + # Remove empty TFLink annotations + if "tflink" in tfs[tf] and not tfs[tf]["tflink"]: + del tfs[tf]["tflink"] def merge_overview_data(overview, tfs): """Merge overview data into individual TF structures.""" @@ -491,12 +543,16 @@ def main(): clean_params_data(params) # Process core data - process_ranking_data(paths, overview, tfs) + tflink_summary = process_ranking_data(paths, overview, tfs) pairings = process_differential_expression(paths, overview, tfs) # Get assays from overview assays = list(set([assay for tf_data in overview.values() for assay in tf_data["dcg"].keys()])) metadata["assays"] = assays + metadata["tflink"] = { + "enabled": bool(tflink_summary), + "assays": tflink_summary, + } # Process remaining data types process_regression_coefficients(paths, overview, pairings, assays) diff --git a/modules/local/tflink/annotate/environment.yml b/modules/local/tflink/annotate/environment.yml new file mode 100644 index 0000000..fda0639 --- /dev/null +++ b/modules/local/tflink/annotate/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pandas=2.3.0 + - conda-forge::pyyaml=6.0.2 diff --git a/modules/local/tflink/annotate/main.nf b/modules/local/tflink/annotate/main.nf new file mode 100644 index 0000000..eaec315 --- /dev/null +++ b/modules/local/tflink/annotate/main.nf @@ -0,0 +1,45 @@ +process TFLINK_ANNOTATE { + tag "${meta.id}" + label "process_single" + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c5/c59e6f0f9a6959e3755f422836b3d77a592d3be0a4e7a1798fd2d4aa8e10a874/data' + : 'community.wave.seqera.io/library/gtfparse_pandas_pyyaml:10fc85c2e9b77f0d'}" + + input: + tuple val(meta), path(tf_ranking), path(tg_ranking) + path tflink_file + + output: + tuple val(meta), path("*.tf_ranking.tsv"), emit: tf_ranking + tuple val(meta), path("*.tg_ranking.tsv"), emit: tg_ranking + tuple val(meta), path("*.tflink_edges.tsv"), emit: edge_annotations + tuple val(meta), path("*.tflink_summary.tsv"), emit: summary + path "versions.yml", emit: versions + + script: + template("annotate.py") + + stub: + """ + cp ${tf_ranking} ${meta.id}.tflink.tf_ranking.tsv + cp ${tg_ranking} ${meta.id}.tflink.tg_ranking.tsv + + cat <<-END_EDGES > ${meta.id}.tflink_edges.tsv + tf\ttarget_gene\tscore\ttflink_supported\ttflink_match_type\ttflink_evidence_scope\ttflink_source_count\ttflink_sources + END_EDGES + + cat <<-END_SUMMARY > ${meta.id}.tflink_summary.tsv + assay\ttflink_total_edges\ttflink_supported_edges\ttflink_support_rate + ${meta.id}\t0\t0\t0.0 + END_SUMMARY + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python3 --version | cut -f 2 -d " ") + pandas: \$(python3 -c "import pandas; print(pandas.__version__)") + yaml: \$(python3 -c "import yaml; print(yaml.__version__)") + END_VERSIONS + """ +} diff --git a/modules/local/tflink/annotate/templates/annotate.py b/modules/local/tflink/annotate/templates/annotate.py new file mode 100644 index 0000000..77cc338 --- /dev/null +++ b/modules/local/tflink/annotate/templates/annotate.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 + +import platform +from pathlib import Path + +import pandas as pd +import yaml + + +TF_COLUMN = "Name.TF" +TARGET_COLUMN = "Name.Target" +SCOPE_COLUMN = "Small-scale.evidence" +SOURCE_COLUMN = "Source.database" +EDGE_OUTPUT_COLUMNS = [ + "tf", + "target_gene", + "score", + "tflink_supported", + "tflink_match_type", + "tflink_evidence_scope", + "tflink_source_count", + "tflink_sources", +] + + +def normalize_symbol(value: str) -> str: + """Normalize gene/TF symbols for deterministic matching. + + TFLink files and ranking outputs can differ in capitalization and may contain + incidental leading/trailing whitespace. This helper applies the same cleanup + everywhere before building TF-target keys. + """ + if value is None: + return "" + return str(value).strip().upper() + + +def tf_symbol_from_motif(tf_name: str) -> str: + """Extract TF symbol from motif-style names like 'ATF3(MA1988.2)'. + + Ranking columns usually encode the motif ID in parentheses. TFLink files use + plain TF symbols, so we strip the motif suffix before matching. + """ + if tf_name is None: + return "" + return str(tf_name).split("(")[0].strip() + + +def validate_required_columns(df: pd.DataFrame) -> None: + """Validate exact TFLink simple-format column names are present. + + We intentionally require the canonical TFLink headers to keep parsing strict + and predictable across all species files from TFLink. + """ + required = [TF_COLUMN, TARGET_COLUMN, SCOPE_COLUMN, SOURCE_COLUMN] + missing = [column for column in required if column not in df.columns] + if missing: + raise ValueError( + "TFLink input is missing required columns: " + f"{missing}. Available columns: {list(df.columns)}" + ) + + +def to_evidence_scope(scopes: set[str]) -> str: + """Collapse per-edge scope values to one normalized output label. + + Multiple TFLink rows can map to the same TF-target pair. We merge their scope + labels into one summary value used in `tflink_evidence_scope`. + """ + normalized = {scope.lower().strip() for scope in scopes if scope and str(scope).strip()} + if not normalized: + return "none" + if normalized == {"small"}: + return "small" + if normalized == {"large"}: + return "large" + if normalized == {"small", "large"}: + return "both" + if normalized == {"all"}: + return "all" + return ";".join(sorted(normalized)) + + +def normalize_scope_value(raw_scope: str) -> str: + """Normalize a single scope value from TFLink `Small-scale.evidence`. + + TFLink `Small-scale.evidence` encodes scope as Yes/No. This function maps + those values to `small`/`large` and passes through already normalized values. + """ + if raw_scope is None: + return "all" + value = str(raw_scope).strip().lower() + if not value: + return "all" + + if value in {"yes", "true", "1"}: + return "small" + if value in {"no", "false", "0"}: + return "large" + + return value + + +def aggregate_tf_counts(df_edges: pd.DataFrame, group_col: str) -> pd.DataFrame: + """Aggregate supported and total edge counts by TF-related grouping column.""" + return ( + df_edges.groupby(group_col, sort=False) + .agg( + supported=("tflink_supported", "sum"), + total=("target_gene", "size"), + ) + .astype({"supported": int, "total": int}) + ) + + +def main() -> None: + """Annotate ranking outputs with TFLink support without changing scores. + + Inputs: + - per-assay TF ranking (`*.tf_ranking.tsv`) + - per-assay TG ranking (`*.tg_ranking.tsv`) + - TFLink interaction table + + Outputs: + - TF ranking with TFLink summary columns + - TG ranking copy for paired report ingestion + - edge-level TFLink annotation table + - per-assay summary table + """ + tf_ranking_path = Path("${tf_ranking}") + tg_ranking_path = Path("${tg_ranking}") + tflink_path = Path("${tflink_file}") + assay = "${meta.id}" + + df_tf = pd.read_csv(tf_ranking_path, sep="\t", index_col=0) + df_tg = pd.read_csv(tg_ranking_path, sep="\t", index_col=0) + df_tflink = pd.read_csv(tflink_path, sep="\t") + + validate_required_columns(df_tflink) + + # Group TFLink rows by normalized TF-target keys to aggregate scope/source metadata. + grouped_pairs = ( + df_tflink.assign( + tf_symbol=df_tflink[TF_COLUMN].map(normalize_symbol), + target_symbol=df_tflink[TARGET_COLUMN].map(normalize_symbol), + scope_value=df_tflink[SCOPE_COLUMN].map(normalize_scope_value), + source_value=df_tflink[SOURCE_COLUMN].map( + lambda value: (str(value).strip() if str(value).strip() else "TFLink") + ), + ) + .loc[lambda frame: (frame["tf_symbol"] != "") & (frame["target_symbol"] != "")] + .groupby(["tf_symbol", "target_symbol"], sort=False) + .agg( + tflink_evidence_scope=("scope_value", lambda values: to_evidence_scope(set(values))), + tflink_source_count=("source_value", lambda values: len(set(values))), + tflink_sources=("source_value", lambda values: ";".join(sorted(set(values)))), + ) + .reset_index() + ) + + # Build edge-level table in TF-major order. + df_edges = ( + df_tg.T.stack() + .rename("score") + .reset_index() + .rename(columns={"level_0": "tf", "level_1": "target_gene"}) + ) + df_edges["target_gene"] = df_edges["target_gene"].astype(str) + df_edges["tf_symbol"] = df_edges["tf"].map(tf_symbol_from_motif).map(normalize_symbol) + df_edges["target_symbol"] = df_edges["target_gene"].map(normalize_symbol) + + df_edges = df_edges.merge(grouped_pairs, how="left", on=["tf_symbol", "target_symbol"]) + supported_mask = df_edges["tflink_evidence_scope"].notna() + df_edges["tflink_supported"] = supported_mask + df_edges["tflink_match_type"] = supported_mask.map({True: "exact_symbol", False: "none"}) + df_edges["tflink_evidence_scope"] = df_edges["tflink_evidence_scope"].fillna("none") + df_edges["tflink_source_count"] = df_edges["tflink_source_count"].fillna(0).astype(int) + df_edges["tflink_sources"] = df_edges["tflink_sources"].fillna("") + + # Aggregate per-TF support counts. + tf_counts_df = aggregate_tf_counts(df_edges, "tf") + tf_symbol_counts_df = aggregate_tf_counts(df_edges, "tf_symbol") + + # Annotate TF ranking while preserving row order and existing values. + df_tf_stats = pd.DataFrame(index=df_tf.index) + tf_index = df_tf_stats.index.to_series() + df_tf_stats["tf_symbol"] = tf_index.map(tf_symbol_from_motif).map(normalize_symbol) + df_tf_stats["supported_exact"] = tf_index.map(tf_counts_df["supported"]) + df_tf_stats["total_exact"] = tf_index.map(tf_counts_df["total"]) + df_tf_stats["supported_symbol"] = df_tf_stats["tf_symbol"].map(tf_symbol_counts_df["supported"]) + df_tf_stats["total_symbol"] = df_tf_stats["tf_symbol"].map(tf_symbol_counts_df["total"]) + df_tf_stats["tflink_supported_edges"] = ( + df_tf_stats["supported_exact"].fillna(df_tf_stats["supported_symbol"]).fillna(0).astype(int) + ) + df_tf_stats["tflink_total_edges"] = ( + df_tf_stats["total_exact"].fillna(df_tf_stats["total_symbol"]).fillna(0).astype(int) + ) + df_tf_stats["tflink_support_rate"] = ( + df_tf_stats["tflink_supported_edges"] / df_tf_stats["tflink_total_edges"].replace({0: pd.NA}) + ).fillna(0.0) + df_tf["tflink_supported"] = df_tf_stats["tflink_supported_edges"] > 0 + df_tf["tflink_supported_edges"] = df_tf_stats["tflink_supported_edges"] + df_tf["tflink_total_edges"] = df_tf_stats["tflink_total_edges"] + df_tf["tflink_support_rate"] = df_tf_stats["tflink_support_rate"] + + # Per-assay summary. + summary_total = int(df_edges.shape[0]) + summary_supported = int(df_edges["tflink_supported"].sum()) + summary_rate = (float(summary_supported) / float(summary_total)) if summary_total else 0.0 + df_summary = pd.DataFrame( + [ + { + "assay": assay, + "tflink_total_edges": summary_total, + "tflink_supported_edges": summary_supported, + "tflink_support_rate": summary_rate, + } + ] + ) + + output_tf = f"{assay}.tflink.tf_ranking.tsv" + output_tg = f"{assay}.tflink.tg_ranking.tsv" + output_edges = f"{assay}.tflink_edges.tsv" + output_summary = f"{assay}.tflink_summary.tsv" + + df_tf.to_csv(output_tf, sep="\t") + df_tg.to_csv(output_tg, sep="\t") + df_edges[EDGE_OUTPUT_COLUMNS].to_csv(output_edges, sep="\t", index=False) + df_summary.to_csv(output_summary, sep="\t", index=False) + + versions = { + "${task.process}": { + "python": platform.python_version(), + "pandas": pd.__version__, + "yaml": yaml.__version__, + } + } + with open("versions.yml", "w", encoding="utf-8") as handle: + handle.write(yaml.dump(versions)) + + +if __name__ == "__main__": + main() diff --git a/modules/local/tflink/annotate/tests/main.nf.test b/modules/local/tflink/annotate/tests/main.nf.test new file mode 100644 index 0000000..47cf75d --- /dev/null +++ b/modules/local/tflink/annotate/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process TFLINK_ANNOTATE" + script "modules/local/tflink/annotate/main.nf" + process "TFLINK_ANNOTATE" + + test("Should run without failures") { + + when { + process { + """ + tf_ranking = channel.of(''' +\tdcg +TFA(M1)\t1.0 +TFB(M2)\t0.5 +''').collectFile(name: "test.tf_ranking.tsv", newLine: false) + + tg_ranking = channel.of(''' +\tTFA(M1)\tTFB(M2) +GENE1\t0.9\t0.1 +GENE2\t0.4\t0.7 +GENE3\t0.2\t0.3 +''').collectFile(name: "test.tg_ranking.tsv", newLine: false) + + tflink = channel.of(''' +Name.TF\tName.Target\tSmall-scale.evidence\tSource.database +TFA\tGENE1\tYes\tDB_A +TFA\tGENE2\tNo\tDB_B +TFA\tGENE2\tYes\tDB_C +TFB\tGENE2\tNo\tDB_D +''').collectFile(name: "test.tflink.tsv", newLine: false) + + input[0] = tf_ranking.combine(tg_ranking).map { tf_file, tg_file -> [[id: 'test_assay'], tf_file, tg_file] } + input[1] = tflink + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + + test("Should run without failures - stub") { + + options "-stub" + + when { + process { + """ + tf_ranking = channel.of(''' +\tdcg +TFA(M1)\t1.0 +TFB(M2)\t0.5 +''').collectFile(name: "test.tf_ranking.tsv", newLine: false) + + tg_ranking = channel.of(''' +\tTFA(M1)\tTFB(M2) +GENE1\t0.9\t0.1 +GENE2\t0.4\t0.7 +GENE3\t0.2\t0.3 +''').collectFile(name: "test.tg_ranking.tsv", newLine: false) + + tflink = channel.of(''' +Name.TF\tName.Target\tSmall-scale.evidence\tSource.database +TFA\tGENE1\tYes\tDB_A +TFA\tGENE2\tNo\tDB_B +TFA\tGENE2\tYes\tDB_C +TFB\tGENE2\tNo\tDB_D +''').collectFile(name: "test.tflink.tsv", newLine: false) + + input[0] = tf_ranking.combine(tg_ranking).map { tf_file, tg_file -> [[id: 'test_assay'], tf_file, tg_file] } + input[1] = tflink + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/local/tflink/annotate/tests/main.nf.test.snap b/modules/local/tflink/annotate/tests/main.nf.test.snap new file mode 100644 index 0000000..4ab12ac --- /dev/null +++ b/modules/local/tflink/annotate/tests/main.nf.test.snap @@ -0,0 +1,164 @@ +{ + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink.tf_ranking.tsv:md5,ff2dddc7ddb2ca0bd65cd623915796c7" + ] + ], + "1": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink.tg_ranking.tsv:md5,8aa221f160336c2a1ef00f6b12a588e7" + ] + ], + "2": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink_edges.tsv:md5,84e6026449b92fd83248f705c19184f0" + ] + ], + "3": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink_summary.tsv:md5,9c98f1a338812660920d639e8fe0574f" + ] + ], + "4": [ + "versions.yml:md5,0ce29530d4f834275f6e1dfd4ad6ba95" + ], + "edge_annotations": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink_edges.tsv:md5,84e6026449b92fd83248f705c19184f0" + ] + ], + "summary": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink_summary.tsv:md5,9c98f1a338812660920d639e8fe0574f" + ] + ], + "tf_ranking": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink.tf_ranking.tsv:md5,ff2dddc7ddb2ca0bd65cd623915796c7" + ] + ], + "tg_ranking": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink.tg_ranking.tsv:md5,8aa221f160336c2a1ef00f6b12a588e7" + ] + ], + "versions": [ + "versions.yml:md5,0ce29530d4f834275f6e1dfd4ad6ba95" + ] + } + ], + "timestamp": "2026-04-09T19:27:16.276982145", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink.tf_ranking.tsv:md5,d2c5c3278b9117610a1eca0f0bb91c60" + ] + ], + "1": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink.tg_ranking.tsv:md5,0221b5d45a65a32e58452107916845fa" + ] + ], + "2": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink_edges.tsv:md5,19fdf044666f51bc3bd9cc93839928d9" + ] + ], + "3": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink_summary.tsv:md5,b86a0988730182b87de41cdf044b6d39" + ] + ], + "4": [ + "versions.yml:md5,3ccdc123869180cdb82f254b27831d37" + ], + "edge_annotations": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink_edges.tsv:md5,19fdf044666f51bc3bd9cc93839928d9" + ] + ], + "summary": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink_summary.tsv:md5,b86a0988730182b87de41cdf044b6d39" + ] + ], + "tf_ranking": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink.tf_ranking.tsv:md5,d2c5c3278b9117610a1eca0f0bb91c60" + ] + ], + "tg_ranking": [ + [ + { + "id": "test_assay" + }, + "test_assay.tflink.tg_ranking.tsv:md5,0221b5d45a65a32e58452107916845fa" + ] + ], + "versions": [ + "versions.yml:md5,3ccdc123869180cdb82f254b27831d37" + ] + } + ], + "timestamp": "2026-04-09T18:43:15.660367921", + "meta": { + "nf-test": "0.9.4", + "nextflow": "25.10.4" + } + } +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index cac5888..9087b6e 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -426,6 +426,16 @@ "help_text": "This parameter is *mandatory* if `--genome` is not specified and `--skip_sneep` is false.", "fa_icon": "far fa-file-code" }, + "tflink_file": { + "type": "string", + "format": "file-path", + "exists": true, + "mimetype": "text/plain", + "pattern": "^\\S+\\.(tsv|txt|tsv.gz|txt.gz)$", + "description": "Path to TFLink TF-target interaction table used for annotation.", + "help_text": "If provided (or resolved via `--genome` in `igenomes.config`), TFLink post-ranking annotation is enabled. Explicit `--tflink_file` overrides any genome-provided default.", + "fa_icon": "far fa-file-code" + }, "taxon_id": { "type": "integer", "description": "NCBI Taxonomy ID.", diff --git a/subworkflows/local/utils_nfcore_tfactivity_pipeline/main.nf b/subworkflows/local/utils_nfcore_tfactivity_pipeline/main.nf index eb0e72c..e1b30c8 100644 --- a/subworkflows/local/utils_nfcore_tfactivity_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_tfactivity_pipeline/main.nf @@ -217,6 +217,7 @@ def genomeExistsError() { // Generate methods description for MultiQC // def toolCitationText() { + def tflink_enabled = params.tflink_file ?: getGenomeAttribute('tflink_file') def tools = [ 'DESeq2 (Love et al. 2014)', 'STARE (Hecker et al. 2023)', @@ -227,6 +228,7 @@ def toolCitationText() { 'Biopython (Cock et al. 2009)', 'JASPAR (Rauluseviciute et al. 2024)', 'universalmotif (Tremblay 2024)', + tflink_enabled ? 'TFLink (Fekete et al. 2022)' : '', params.skip_fimo ? '' : 'FIMO (Grant et al. 2011)', params.skip_sneep ? '' : 'SNEEP (Baumgarten et al. 2024)' ] @@ -237,6 +239,7 @@ def toolCitationText() { } def toolBibliographyText() { + def tflink_enabled = params.tflink_file ?: getGenomeAttribute('tflink_file') def references = [ "
  • Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15:550. doi:10.1186/s13059-014-0550-8.
  • ", "
  • Hecker D, Behjati Ardakani F, Karollus A, Gagneur J, Schulz MH. The adapted Activity-By-Contact model for enhancer–gene assignment and its application to single-cell data (STARE). Bioinformatics. 2023;39(2). doi:10.1093/bioinformatics/btad062.
  • ", @@ -248,6 +251,7 @@ def toolBibliographyText() { (params.skip_fimo ? "" : "
  • Grant CE, Bailey TL, Noble WS. FIMO: scanning for occurrences of a given motif. Bioinformatics. 2011;27(7):1017–1018. doi:10.1093/bioinformatics/btr064.
  • "), "
  • Rauluseviciute I, Riudavets-Puig R, Blanc-Mathieu R, et al. JASPAR 2024: 20th anniversary of the open-access database of transcription factor binding profiles. Nucleic Acids Res. 2024;52(D1):D174–D182. doi:10.1093/nar/gkad1059.
  • ", "
  • Tremblay BJ. universalmotif: An R package for biological motif analysis. Journal of Open Source Software. 2024;9(100):7012. doi:10.21105/joss.07012.
  • ", + (tflink_enabled ? "
  • Fekete T, Gyorffy B. TFLink: an integrated gateway to access transcription factor-target gene interactions for multiple species. Database (Oxford). 2022;2022:baac083. doi:10.1093/database/baac083.
  • " : ""), (params.skip_sneep ? "" : "
  • Baumgarten N, Ebert P, Schmidt F, Kern F, Schulz MH. A statistical approach for identifying single nucleotide variants that affect transcription factor binding (SNEEP). iScience. 2024;27(5):109765. doi:10.1016/j.isci.2024.109765.
  • ") ].findAll { ref -> ref } diff --git a/workflows/tfactivity.nf b/workflows/tfactivity.nf index 5fd24bc..7f8fa3d 100644 --- a/workflows/tfactivity.nf +++ b/workflows/tfactivity.nf @@ -13,6 +13,7 @@ include { RANKING } from '../subworkflows/local/ranking' include { FIMO } from '../subworkflows/local/fimo' include { SNEEP } from '../subworkflows/local/sneep' include { REPORT } from '../subworkflows/local/report' +include { TFLINK_ANNOTATE } from '../modules/local/tflink/annotate' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' /* @@ -56,6 +57,7 @@ workflow TFACTIVITY { dynamite_randomize alpha snps + tflink_file ch_versions skip_fimo skip_sneep @@ -140,6 +142,26 @@ workflow TFACTIVITY { affinity_agg_method, ) ch_versions = ch_versions.mix(RANKING.out.versions) + ch_tf_rankings_for_report = RANKING.out.tf_ranking + ch_tg_rankings_for_report = RANKING.out.tg_ranking + + if (tflink_file) { + ch_rankings_for_tflink = RANKING.out.tf_ranking + .map { meta, tf_ranking -> [meta.id, meta, tf_ranking] } + .join( + RANKING.out.tg_ranking.map { meta, tg_ranking -> [meta.id, tg_ranking] }, + by: 0 + ) + .map { _id, meta, tf_ranking, tg_ranking -> [meta, tf_ranking, tg_ranking] } + + TFLINK_ANNOTATE( + ch_rankings_for_tflink, + tflink_file, + ) + ch_versions = ch_versions.mix(TFLINK_ANNOTATE.out.versions) + ch_tf_rankings_for_report = TFLINK_ANNOTATE.out.tf_ranking + ch_tg_rankings_for_report = TFLINK_ANNOTATE.out.tg_ranking + } ch_fimo_binding_sites = channel.empty() @@ -217,8 +239,8 @@ workflow TFACTIVITY { REPORT( gtf, - RANKING.out.tf_ranking.map { _meta, ranking -> ranking }.collect(), - RANKING.out.tg_ranking.map { _meta, ranking -> ranking }.collect(), + ch_tf_rankings_for_report.map { _meta, ranking -> ranking }.collect(), + ch_tg_rankings_for_report.map { _meta, ranking -> ranking }.collect(), COUNTS.out.differential.map { _meta, differential -> differential }.collect(), COUNTS.out.raw_counts.map { _meta, raw_counts -> raw_counts }.collect(), COUNTS.out.normalized.map { _meta, normalized -> normalized }.collect(), From 3ddad3ef02e392d7c8489cae58c9131682e2c80a Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Thu, 9 Apr 2026 21:00:26 +0200 Subject: [PATCH 2/3] Fix tflink/annotate container definition --- modules/local/tflink/annotate/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/tflink/annotate/main.nf b/modules/local/tflink/annotate/main.nf index eaec315..01a4231 100644 --- a/modules/local/tflink/annotate/main.nf +++ b/modules/local/tflink/annotate/main.nf @@ -4,8 +4,8 @@ process TFLINK_ANNOTATE { conda "${moduleDir}/environment.yml" container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container - ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c5/c59e6f0f9a6959e3755f422836b3d77a592d3be0a4e7a1798fd2d4aa8e10a874/data' - : 'community.wave.seqera.io/library/gtfparse_pandas_pyyaml:10fc85c2e9b77f0d'}" + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/7c/7c256e63e08633ac420692d3ceec1f554fe4fcc794e5bdd331994f743096a46d/data' + : 'community.wave.seqera.io/library/pandas_pyyaml:c0acbb47d05e4f9c'}" input: tuple val(meta), path(tf_ranking), path(tg_ranking) From 4c6511b7073b5490394f5ef031409efdfa3a791e Mon Sep 17 00:00:00 2001 From: Nico Trummer Date: Thu, 9 Apr 2026 21:43:47 +0200 Subject: [PATCH 3/3] Update test snapshots --- .../tflink/annotate/templates/annotate.py | 2 - .../tflink/annotate/tests/main.nf.test.snap | 12 ++-- tests/default.nf.test.snap | 70 +++++++++++++++++-- 3 files changed, 70 insertions(+), 14 deletions(-) diff --git a/modules/local/tflink/annotate/templates/annotate.py b/modules/local/tflink/annotate/templates/annotate.py index 77cc338..11351cf 100644 --- a/modules/local/tflink/annotate/templates/annotate.py +++ b/modules/local/tflink/annotate/templates/annotate.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -import platform from pathlib import Path import pandas as pd @@ -230,7 +229,6 @@ def main() -> None: versions = { "${task.process}": { - "python": platform.python_version(), "pandas": pd.__version__, "yaml": yaml.__version__, } diff --git a/modules/local/tflink/annotate/tests/main.nf.test.snap b/modules/local/tflink/annotate/tests/main.nf.test.snap index 4ab12ac..d1bfc4e 100644 --- a/modules/local/tflink/annotate/tests/main.nf.test.snap +++ b/modules/local/tflink/annotate/tests/main.nf.test.snap @@ -35,7 +35,7 @@ ] ], "4": [ - "versions.yml:md5,0ce29530d4f834275f6e1dfd4ad6ba95" + "versions.yml:md5,f80bcde83ca23bab61bdb126a354c83f" ], "edge_annotations": [ [ @@ -70,11 +70,11 @@ ] ], "versions": [ - "versions.yml:md5,0ce29530d4f834275f6e1dfd4ad6ba95" + "versions.yml:md5,f80bcde83ca23bab61bdb126a354c83f" ] } ], - "timestamp": "2026-04-09T19:27:16.276982145", + "timestamp": "2026-04-09T21:11:55.037571654", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" @@ -116,7 +116,7 @@ ] ], "4": [ - "versions.yml:md5,3ccdc123869180cdb82f254b27831d37" + "versions.yml:md5,0651095064e5de959f9d74717caba16e" ], "edge_annotations": [ [ @@ -151,11 +151,11 @@ ] ], "versions": [ - "versions.yml:md5,3ccdc123869180cdb82f254b27831d37" + "versions.yml:md5,0651095064e5de959f9d74717caba16e" ] } ], - "timestamp": "2026-04-09T18:43:15.660367921", + "timestamp": "2026-04-09T21:11:42.496472809", "meta": { "nf-test": "0.9.4", "nextflow": "25.10.4" diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index c6ffe36..6568b04 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,7 +1,7 @@ { "Should run with bed only": { "content": [ - 179, + 181, { "AFFINITY_MEAN": { "numpy": "2.2.6", @@ -164,6 +164,10 @@ "STARE": { "stare": "1.0.5" }, + "TFLINK_ANNOTATE": { + "pandas": "2.3.0", + "yaml": "6.0.2" + }, "TF_TG_SCORE": { "pandas": "2.3.0", "python": "3.13.3" @@ -363,6 +367,15 @@ "05_ranking/05_combined_tgs_per_assay/H3K4me3.tg_ranking.tsv", "05_ranking/06_combined_tgs_across_assays", "05_ranking/06_combined_tgs_across_assays/all.tsv", + "05_ranking/07_tflink_annotation", + "05_ranking/07_tflink_annotation/H3K27ac.tflink.tf_ranking.tsv", + "05_ranking/07_tflink_annotation/H3K27ac.tflink.tg_ranking.tsv", + "05_ranking/07_tflink_annotation/H3K27ac.tflink_edges.tsv", + "05_ranking/07_tflink_annotation/H3K27ac.tflink_summary.tsv", + "05_ranking/07_tflink_annotation/H3K4me3.tflink.tf_ranking.tsv", + "05_ranking/07_tflink_annotation/H3K4me3.tflink.tg_ranking.tsv", + "05_ranking/07_tflink_annotation/H3K4me3.tflink_edges.tsv", + "05_ranking/07_tflink_annotation/H3K4me3.tflink_summary.tsv", "06_fimo", "06_fimo/01_filtered_motifs", "06_fimo/01_filtered_motifs/motifs", @@ -3757,6 +3770,14 @@ "H3K27ac.tg_ranking.tsv:md5,88229324fcacf3fd97140defa3c50d88", "H3K4me3.tg_ranking.tsv:md5,7d27ecc256317be02da81db75303e24d", "all.tsv:md5,0371c1546c0f018b03366fa3c4633d64", + "H3K27ac.tflink.tf_ranking.tsv:md5,e20dee29187ee7be46c848e67bcc418d", + "H3K27ac.tflink.tg_ranking.tsv:md5,447eab86bbb3ae5e1ecdb69d06f5b06c", + "H3K27ac.tflink_edges.tsv:md5,d8588f19d1cb15733e51163de814b140", + "H3K27ac.tflink_summary.tsv:md5,b788a17d208a5678487606a7d0d21f84", + "H3K4me3.tflink.tf_ranking.tsv:md5,200ca0fb1d810a02dd7f296133d4477d", + "H3K4me3.tflink.tg_ranking.tsv:md5,c2182ff92a1e83b41f610185bed7e385", + "H3K4me3.tflink_edges.tsv:md5,7d1ddabc85e36240638f0cf0b00465cd", + "H3K4me3.tflink_summary.tsv:md5,4296871d510d012d028ec51e4e0489a8", "MA0145.2.meme:md5,112ddef6097b5b7cec18dc83dde6f654", "MA0602.2.meme:md5,6d30a7ab368e43d191a583424715fbba", "MA0626.2.meme:md5,c1844d28f080b72823106c799266fd65", @@ -3987,10 +4008,10 @@ "filtered_p6_H3K4me3.bed:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2025-11-13T19:36:12.666238946", + "timestamp": "2026-04-09T21:31:19.892864774", "meta": { - "nf-test": "0.9.2", - "nextflow": "25.10.0" + "nf-test": "0.9.4", + "nextflow": "25.10.4" } }, "Should run with both bed and bam input": { @@ -4188,6 +4209,10 @@ "SUBTRACT_OVERLAPS": { "bedtools": "2.31.1" }, + "TFLINK_ANNOTATE": { + "pandas": "2.3.0", + "yaml": "6.0.2" + }, "TF_TG_SCORE": { "pandas": "2.3.0", "python": "3.13.3" @@ -4542,6 +4567,23 @@ "05_ranking/05_combined_tgs_per_assay/chromHMM_promoters.tg_ranking.tsv", "05_ranking/06_combined_tgs_across_assays", "05_ranking/06_combined_tgs_across_assays/all.tsv", + "05_ranking/07_tflink_annotation", + "05_ranking/07_tflink_annotation/H3K27ac.tflink.tf_ranking.tsv", + "05_ranking/07_tflink_annotation/H3K27ac.tflink.tg_ranking.tsv", + "05_ranking/07_tflink_annotation/H3K27ac.tflink_edges.tsv", + "05_ranking/07_tflink_annotation/H3K27ac.tflink_summary.tsv", + "05_ranking/07_tflink_annotation/H3K4me3.tflink.tf_ranking.tsv", + "05_ranking/07_tflink_annotation/H3K4me3.tflink.tg_ranking.tsv", + "05_ranking/07_tflink_annotation/H3K4me3.tflink_edges.tsv", + "05_ranking/07_tflink_annotation/H3K4me3.tflink_summary.tsv", + "05_ranking/07_tflink_annotation/chromHMM_enhancers.tflink.tf_ranking.tsv", + "05_ranking/07_tflink_annotation/chromHMM_enhancers.tflink.tg_ranking.tsv", + "05_ranking/07_tflink_annotation/chromHMM_enhancers.tflink_edges.tsv", + "05_ranking/07_tflink_annotation/chromHMM_enhancers.tflink_summary.tsv", + "05_ranking/07_tflink_annotation/chromHMM_promoters.tflink.tf_ranking.tsv", + "05_ranking/07_tflink_annotation/chromHMM_promoters.tflink.tg_ranking.tsv", + "05_ranking/07_tflink_annotation/chromHMM_promoters.tflink_edges.tsv", + "05_ranking/07_tflink_annotation/chromHMM_promoters.tflink_summary.tsv", "06_fimo", "06_fimo/01_filtered_motifs", "06_fimo/01_filtered_motifs/motifs", @@ -9537,6 +9579,22 @@ "chromHMM_enhancers.tg_ranking.tsv:md5,85f61b6c0f3ff0103684d76f3c7d1e34", "chromHMM_promoters.tg_ranking.tsv:md5,edf40455b2658931b5b48c177d0637e3", "all.tsv:md5,44590a7acfd2082306fb56aca936c217", + "H3K27ac.tflink.tf_ranking.tsv:md5,e20dee29187ee7be46c848e67bcc418d", + "H3K27ac.tflink.tg_ranking.tsv:md5,447eab86bbb3ae5e1ecdb69d06f5b06c", + "H3K27ac.tflink_edges.tsv:md5,d8588f19d1cb15733e51163de814b140", + "H3K27ac.tflink_summary.tsv:md5,b788a17d208a5678487606a7d0d21f84", + "H3K4me3.tflink.tf_ranking.tsv:md5,200ca0fb1d810a02dd7f296133d4477d", + "H3K4me3.tflink.tg_ranking.tsv:md5,c2182ff92a1e83b41f610185bed7e385", + "H3K4me3.tflink_edges.tsv:md5,7d1ddabc85e36240638f0cf0b00465cd", + "H3K4me3.tflink_summary.tsv:md5,4296871d510d012d028ec51e4e0489a8", + "chromHMM_enhancers.tflink.tf_ranking.tsv:md5,c5d40f1a73a5327d5aca63acb582e7e9", + "chromHMM_enhancers.tflink.tg_ranking.tsv:md5,6ec3683806785560ac5659d4eab8db99", + "chromHMM_enhancers.tflink_edges.tsv:md5,0d7d246974185fad470c3454e2b73dec", + "chromHMM_enhancers.tflink_summary.tsv:md5,16d5f5555ba856d99ad6ce49b4ef83a7", + "chromHMM_promoters.tflink.tf_ranking.tsv:md5,d4521af42b3df2a1297584be295cf598", + "chromHMM_promoters.tflink.tg_ranking.tsv:md5,86bd7f96be19d306d22ac18383efa738", + "chromHMM_promoters.tflink_edges.tsv:md5,f4ed8a836788fbeb309e1176611ffe76", + "chromHMM_promoters.tflink_summary.tsv:md5,a6246cd61b34edb71b0b82c2d98577ec", "MA0145.2.meme:md5,112ddef6097b5b7cec18dc83dde6f654", "MA0505.3.meme:md5,c13fc4da323a7944e49564b163f2a0e9", "MA0518.2.meme:md5,fb73c817ec3bffc16887882a1913b5ef", @@ -10141,10 +10199,10 @@ "filtered_p6_chromHMM_promoters.bed:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "timestamp": "2026-03-29T16:58:46.963683478", + "timestamp": "2026-04-09T21:24:32.438531168", "meta": { "nf-test": "0.9.4", - "nextflow": "25.10.2" + "nextflow": "25.10.4" } } } \ No newline at end of file