From 043d9af6df4262272f0e2b39fa21a08e577f0e30 Mon Sep 17 00:00:00 2001 From: Jeltje Date: Wed, 1 Mar 2017 03:50:20 +0000 Subject: [PATCH 1/2] upgrade cwl to v1.0, add option to input gzipped genome, change default docker container to quay.io, add test input Dockerfile.json --- Dockstore.json | 23 +++++++++++++++++ muse.cwl | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++ muse.cwl.yaml | 37 ---------------------------- muse.py | 23 ++++++++++++++--- 4 files changed, 110 insertions(+), 40 deletions(-) create mode 100644 Dockstore.json create mode 100644 muse.cwl delete mode 100644 muse.cwl.yaml diff --git a/Dockstore.json b/Dockstore.json new file mode 100644 index 0000000..d28b015 --- /dev/null +++ b/Dockstore.json @@ -0,0 +1,23 @@ +{ + "reference": { + "path": "http://hgwdev.cse.ucsc.edu/~jeltje/public_data/genome.fa.gz", + "class": "File" + }, + "mode": "wgs", + "normal": { + "path":"https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143_BL.bam", + "class": "File" + }, + "mutations": { + "path": "/tmp/muse.vcf", + "class": "File" + }, + "known": { + "path": "ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b149_GRCh37p13/VCF/common_all_20161121.vcf.gz", + "class": "File" + }, + "tumor": { + "path":"https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143.bam", + "class": "File" + } +} diff --git a/muse.cwl b/muse.cwl new file mode 100644 index 0000000..d3c5c46 --- /dev/null +++ b/muse.cwl @@ -0,0 +1,67 @@ +#!/usr/bin/env cwl-runner +# +# Author: Jeltje van Baren jeltje.van.baren@gmail.com + +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: [/opt/bin/muse.py, -O, muse.vcf, -w, ./, --muse, MuSEv1.0rc] + +doc: "Runs MuSEv1.0rc SNP caller on split chromosomes (MuSE call) then creates final calls in VCF format (MuSE sump)" + +hints: + DockerRequirement: + dockerPull: quay.io/opengenomics/muse + +#requirements: +# - class: InlineJavascriptRequirement + +inputs: + tumor: + type: File + doc: | + tumor bam file + inputBinding: + prefix: --tumor-bam + secondaryFiles: + - .bai + normal: + type: File + doc: | + normal bam file + inputBinding: + prefix: --normal-bam + secondaryFiles: + - .bai + reference: + type: File + doc: | + Reference sequence file, can be gzipped. + inputBinding: + prefix: -f + known: + type: File + doc: | + dbSNP vcf file (will be bgzip compressed and tabix indexed). Can be gzipped. + inputBinding: + prefix: -D + mode: + type: {"type": "enum", "name": "Mode", "symbols": ["wgs", "wxs"]} + doc: | + Input is whole genome or exome {wgs,wxs} + inputBinding: + prefix: --mode + ncpus: + type: int? + doc: | + number of cpus (8) + inputBinding: + position: 2 + prefix: --cpus + +outputs: + mutations: + type: File + outputBinding: + glob: muse.vcf + + diff --git a/muse.cwl.yaml b/muse.cwl.yaml deleted file mode 100644 index 192d473..0000000 --- a/muse.cwl.yaml +++ /dev/null @@ -1,37 +0,0 @@ -class: CommandLineTool -label: MuSE -cwlVersion: v1.0 -baseCommand: [/opt/bin/muse.py, -O, muse.vcf, -w, ./, --muse, MuSEv1.0rc] -requirements: - - class: "DockerRequirement" - dockerImageId: "opengenomics/muse:c039ffa" -inputs: - tumor: - type: File - inputBinding: - prefix: --tumor-bam - secondaryFiles: - - .bai - normal: - type: File - inputBinding: - prefix: --normal-bam - secondaryFiles: - - .bai - reference: - type: File - inputBinding: - prefix: -f - known: - type: File - inputBinding: - prefix: -D - mode: - type: {"type": "enum", "name": "Mode", "symbols": ["wgs", "wxs"]} - inputBinding: - prefix: --mode -outputs: - mutations: - type: File - outputBinding: - glob: muse.vcf \ No newline at end of file diff --git a/muse.py b/muse.py index 5c34949..76d0803 100755 --- a/muse.py +++ b/muse.py @@ -18,6 +18,15 @@ def which(cmd): if len(res) == 0: return None return res +def gunzip(infile, outfile): + cmd = (' ').join(['zcat', infile]) + with open(outfile, 'w') as outF: + p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE) + stdout,stderr = p.communicate() + if len(stderr): + print "unzip command failed:", stderr + raise Exception("unzip failed") + def fai_chunk(path, blocksize): seq_map = {} with open( path ) as handle: @@ -77,8 +86,13 @@ def run_muse(args): args.muse = which(args.muse) workdir = os.path.abspath(tempfile.mkdtemp(dir=args.workdir, prefix="muse_work_")) + if args.f.endswith('.gz'): + new_ref = os.path.join(workdir, "ref_genome.fasta") + gunzip(args.f, new_ref) + subprocess.check_call( ["/usr/bin/samtools", "faidx", new_ref] ) + args.f = new_ref - if not os.path.exists(args.f + ".fai"): + elif not os.path.exists(args.f + ".fai"): new_ref = os.path.join(workdir, "ref_genome.fasta") os.symlink(os.path.abspath(args.f),new_ref) subprocess.check_call( ["/usr/bin/samtools", "faidx", new_ref] ) @@ -132,11 +146,14 @@ def run_muse(args): first = False if not args.no_clean: os.unlink(out) - dbsnp_file = None if args.D: new_dbsnp = os.path.join(workdir, "db_snp.vcf") - os.symlink(args.D,new_dbsnp) + if args.D.endswith('.gz'): + print "unzipping SNP file..." + gunzip(args.D, new_dbsnp) + else: + os.symlink(args.D,new_dbsnp) subprocess.check_call( ["/usr/bin/bgzip", new_dbsnp] ) subprocess.check_call( ["/usr/bin/tabix", "-p", "vcf", new_dbsnp + ".gz" ]) dbsnp_file = new_dbsnp + ".gz" From b3aa0f4488adc0b66a45a1b22f81d3e1bdb076cb Mon Sep 17 00:00:00 2001 From: Jeltje Date: Tue, 4 Apr 2017 17:35:27 +0000 Subject: [PATCH 2/2] removed redundant parentheses --- muse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/muse.py b/muse.py index 76d0803..6592b53 100755 --- a/muse.py +++ b/muse.py @@ -19,7 +19,7 @@ def which(cmd): return res def gunzip(infile, outfile): - cmd = (' ').join(['zcat', infile]) + cmd = ' '.join(['zcat', infile]) with open(outfile, 'w') as outF: p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE) stdout,stderr = p.communicate()