From ac4f962a0ac0dce1497372a9cd9aff3b88e6c39e Mon Sep 17 00:00:00 2001 From: Jeltje Date: Tue, 21 Feb 2017 20:58:39 +0000 Subject: [PATCH 1/7] added current samtools to docker container because the somatic sniper internal version does not like long fasta lines when doing faidx; added option to input a gzipped genome to SomaticSniper.py; updated cwl to v1.0 --- Dockerfile | 6 +- SomaticSniper.py | 16 ++++- somatic_sniper.cwl | 128 ++++++++++++++++++++++++++++++++++++++++ somatic_sniper.cwl.yaml | 77 ------------------------ 4 files changed, 146 insertions(+), 81 deletions(-) mode change 100644 => 100755 SomaticSniper.py create mode 100644 somatic_sniper.cwl delete mode 100644 somatic_sniper.cwl.yaml diff --git a/Dockerfile b/Dockerfile index f674fcd..12742f4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,13 +1,13 @@ FROM ubuntu -RUN apt-get update && apt-get install -y wget build-essential git-core cmake zlib1g-dev libncurses-dev python python-dev python-pip +RUN apt-get update && apt-get install -y wget build-essential git-core cmake zlib1g-dev libncurses-dev python python-dev python-pip samtools RUN pip install pysam WORKDIR /opt RUN wget https://github.com/genome/somatic-sniper/archive/v1.0.5.0.tar.gz && tar xvzf v1.0.5.0.tar.gz && rm v1.0.5.0.tar.gz RUN cd /opt/somatic-sniper-1.0.5.0 && mkdir build && cd build && cmake ../ && make deps && make -j && make install -RUN cp /opt/somatic-sniper-1.0.5.0/build/vendor/samtools/samtools /usr/bin/ +#RUN cp /opt/somatic-sniper-1.0.5.0/build/vendor/samtools/samtools /usr/bin/ COPY SomaticSniper.py /opt/ -RUN chmod +x /opt/SomaticSniper.py \ No newline at end of file +RUN chmod +x /opt/SomaticSniper.py diff --git a/SomaticSniper.py b/SomaticSniper.py old mode 100644 new mode 100755 index 8de3453..fa43828 --- a/SomaticSniper.py +++ b/SomaticSniper.py @@ -4,6 +4,7 @@ import os import logging import subprocess +import gzip from datetime import date from urlparse import urlparse, urlunparse @@ -45,6 +46,13 @@ def sniper_argparser(): group.add_argument('--sniper-exe', dest='sniper_exe', default='bam-somaticsniper', help='SomaticSniper Exec Name') return parser +def gunzip(infile, outfile): + inF = gzip.GzipFile(infile, 'rb') + s = inF.read() + inF.close() + with open(outfile, 'wb') as outF: + outF.write(s) + def tcga_header_arguments(): return set(('reference_id', 'center', 'tumor_uuid', 'tumor_barcode', 'tumor_accession', 'tumor_platform', @@ -87,7 +95,13 @@ def create_sniper_cmdline(namespace_dict, reference, tumor_bam, normal_bam, temp def create_workspace(workdir, reference, tumor_bam, normal_bam): - new_ref = symlink_workspace_file(workdir, reference, "ref_genome.fasta") + if reference.endswith('.gz'): + new_ref = os.path.join(args.workdir, "ref_genome.fasta") + gunzip(args.f, new_ref) + args.f = new_ref + else: + new_ref = symlink_workspace_file(workdir, reference, "ref_genome.fasta") + if not os.path.exists(reference + ".fai"): print "Indexing", new_ref subprocess.check_call( ["/usr/bin/samtools", "faidx", new_ref] ) diff --git a/somatic_sniper.cwl b/somatic_sniper.cwl new file mode 100644 index 0000000..7be7509 --- /dev/null +++ b/somatic_sniper.cwl @@ -0,0 +1,128 @@ +#!/usr/bin/env cwl-runner +# +# Author: Jeltje van Baren jeltje.van.baren@gmail.com + +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: [ /opt/SomaticSniper.py, -F, vcf, --workdir, ./ ] + +doc: "Runs somatic sniper snp caller on input bam files" + +hints: + DockerRequirement: + dockerPull: opengenomics/somatic-sniper:1.0.5.0 + +#requirements: +# - class: InlineJavascriptRequirement + +inputs: + + reference: + type: File + doc: | + reference sequence in the FASTA format + inputBinding: + position: 3 + prefix: -f + + tumor_name: + type: string? + doc: | + tumor sample id (for VCF header) [TUMOR] + inputBinding: + position: 3 + prefix: -t + + normal_name: + type: string? + doc: | + normal sample id (for VCF header) [NORMAL] + inputBinding: + position: 3 + prefix: -n + + minmapqual: + type: int? + doc: | + filtering reads with mapping quality less than [0] + inputBinding: + position: 3 + prefix: -q + + snvqual: + type: int? + doc: | + filtering somatic snv output with somatic quality less than [15] + inputBinding: + position: 3 + prefix: -Q + + noLOH: + type: boolean? + doc: | + do not report LOH variants as determined by genotypes + inputBinding: + position: 3 + prefix: -L + + noGainOfRef: + type: boolean? + doc: | + do not report Gain of Reference variants as determined by genotypes + inputBinding: + position: 3 + prefix: -G + + noSomaticPriors: + type: boolean? + doc: | + disable priors in the somatic calculation. Increases sensitivity for solid tumors + inputBinding: + position: 3 + prefix: -p + + doPriors: + type: boolean? + doc: | + Use prior probabilities accounting for the somatic mutation rate + inputBinding: + position: 3 + prefix: -J + + priorProb: + type: float? + doc: | + prior probability of a somatic mutation (implies -J) [0.010000] + inputBinding: + position: 3 + prefix: -s + + tumorbam: + type: File + doc: | + tumor bamfile + inputBinding: + position: 1 + + normalbam: + type: File + doc: | + normal bamfile + inputBinding: + position: 2 + + output_name: + type: string + default: mutations.vcf + doc: | + Name of output file + inputBinding: + position: 5 + +outputs: + + mutations: + type: File? + outputBinding: + glob: $(inputs.output_name) + diff --git a/somatic_sniper.cwl.yaml b/somatic_sniper.cwl.yaml deleted file mode 100644 index 07f9a36..0000000 --- a/somatic_sniper.cwl.yaml +++ /dev/null @@ -1,77 +0,0 @@ -class: CommandLineTool -label: SomaticSniper -cwlVersion: v1.0 -baseCommand: [ python, /opt/SomaticSniper.py, -F, vcf, --workdir, ./ ] -requirements: - - class: "DockerRequirement" - dockerImageId: "opengenomics/somatic-sniper:1.0.5.0" -inputs: - reference: - type: File - inputBinding: - prefix: -f - tumor_name: - type: string - default: "TUMOR" - inputBinding: - prefix: -t - normal_name: - type: string - default: "NORMAL" - inputBinding: - prefix: -n - mapq: - type: int - inputBinding: - prefix: -q - default: 0 - somaticq: - type: int - inputBinding: - prefix: -Q - default: 40 - loh: - type: boolean - default: false - inputBinding: - prefix: -L - gor: - type: boolean - default: false - inputBinding: - prefix: -G - dis_priors: - type: boolean - default: false - inputBinding: - prefix: -p - use_priorp: - type: boolean - default: false - inputBinding: - prefix: -J - prior_p: - type: float - default: 0.01 - inputBinding: - prefix: -s - tumor: - type: File - inputBinding: - position: 1 - normal: - type: File - inputBinding: - position: 2 - output_name: - type: string - default: "mutations.vcf" - inputBinding: - position: 3 - -outputs: - mutations: - type: File - outputBinding: - glob: mutations.vcf - From d0abb5d0f4304c315a7b25940dc5c3d4bbd8aa5d Mon Sep 17 00:00:00 2001 From: Jeltje Date: Wed, 22 Feb 2017 00:05:16 +0000 Subject: [PATCH 2/7] adding test file with public data --- test.json | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 test.json diff --git a/test.json b/test.json new file mode 100644 index 0000000..5d1c5fa --- /dev/null +++ b/test.json @@ -0,0 +1,18 @@ +{ + "reference": { + "path": "http://hgwdev.cse.ucsc.edu/~jeltje/public_data/genome.fa.gz", + "class": "File" + }, + "normalbam": { + "path":"https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143_BL.bam", + "class": "File" + }, + "mutations": { + "path": "/tmp/out.vcf", + "class": "File" + }, + "tumorbam": { + "path":"https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143.bam", + "class": "File" + } +} From f1e17408008fe37f4bc4626d23bd63c448ad1019 Mon Sep 17 00:00:00 2001 From: Jeltje Date: Thu, 23 Feb 2017 18:31:20 +0000 Subject: [PATCH 3/7] changed docker image to quay.io --- somatic_sniper.cwl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/somatic_sniper.cwl b/somatic_sniper.cwl index 7be7509..e0c78ef 100644 --- a/somatic_sniper.cwl +++ b/somatic_sniper.cwl @@ -10,7 +10,7 @@ doc: "Runs somatic sniper snp caller on input bam files" hints: DockerRequirement: - dockerPull: opengenomics/somatic-sniper:1.0.5.0 + dockerPull: quay.io/opengenomics/somatic-sniper #requirements: # - class: InlineJavascriptRequirement From e1f51e4ad6fbe9be850a10e3a3037def4131ca07 Mon Sep 17 00:00:00 2001 From: Jeltje Date: Thu, 23 Feb 2017 18:46:37 +0000 Subject: [PATCH 4/7] renamed test json --- test.json => Dockstore.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename test.json => Dockstore.json (100%) diff --git a/test.json b/Dockstore.json similarity index 100% rename from test.json rename to Dockstore.json From 695fc9e1d73b132857459f29766a7d39df1f0fa1 Mon Sep 17 00:00:00 2001 From: Jeltje Date: Wed, 1 Mar 2017 02:26:26 +0000 Subject: [PATCH 5/7] changed gunzip to zcat to avoid memory issues with larger files --- SomaticSniper.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/SomaticSniper.py b/SomaticSniper.py index fa43828..63b1dd3 100755 --- a/SomaticSniper.py +++ b/SomaticSniper.py @@ -4,7 +4,6 @@ import os import logging import subprocess -import gzip from datetime import date from urlparse import urlparse, urlunparse @@ -47,11 +46,13 @@ def sniper_argparser(): return parser def gunzip(infile, outfile): - inF = gzip.GzipFile(infile, 'rb') - s = inF.read() - inF.close() - with open(outfile, 'wb') as outF: - outF.write(s) + cmd = (' ').join(['zcat', infile]) + with open(outfile, 'w') as outF: + p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE) + stdout,stderr = p.communicate() + if len(stderr): + print "unzip command failed:", stderr + raise Exception("unzip failed") def tcga_header_arguments(): return set(('reference_id', 'center', From e267c17acd99cf7b8a4cec99346318b9bd78dd7f Mon Sep 17 00:00:00 2001 From: Jeltje Date: Tue, 4 Apr 2017 17:26:33 +0000 Subject: [PATCH 6/7] removed commented out line --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 12742f4..09ee856 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,6 @@ WORKDIR /opt RUN wget https://github.com/genome/somatic-sniper/archive/v1.0.5.0.tar.gz && tar xvzf v1.0.5.0.tar.gz && rm v1.0.5.0.tar.gz RUN cd /opt/somatic-sniper-1.0.5.0 && mkdir build && cd build && cmake ../ && make deps && make -j && make install -#RUN cp /opt/somatic-sniper-1.0.5.0/build/vendor/samtools/samtools /usr/bin/ COPY SomaticSniper.py /opt/ RUN chmod +x /opt/SomaticSniper.py From ff7940e3f5cf91dc659d03363984c32a2b9428ce Mon Sep 17 00:00:00 2001 From: Jeltje Date: Tue, 4 Apr 2017 17:32:38 +0000 Subject: [PATCH 7/7] removed redundant parentheses --- SomaticSniper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SomaticSniper.py b/SomaticSniper.py index 63b1dd3..da4068f 100755 --- a/SomaticSniper.py +++ b/SomaticSniper.py @@ -46,7 +46,7 @@ def sniper_argparser(): return parser def gunzip(infile, outfile): - cmd = (' ').join(['zcat', infile]) + cmd = ' '.join(['zcat', infile]) with open(outfile, 'w') as outF: p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE) stdout,stderr = p.communicate()