-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathAlignment_RNA.mak
More file actions
119 lines (101 loc) · 4.07 KB
/
Copy pathAlignment_RNA.mak
File metadata and controls
119 lines (101 loc) · 4.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#This makefile is designed to align and count bulk RNA-seq data.
#The main target is the table file, generated by featureCounts.
#BAM files get created accordingly from FASTQ using STAR.
#There is the option to also run trimmomatic and je for additional
#processing.
#Running the makefile without targets shows the usage details:
#The user needs to specify the genome and if reads are single or
#paired end. This latter step should be easy to figure out on the
#fly, so that's something for a future iteration.
#The makefile expects a certain directory structure, which can be
#changed in this file if need be.
SHELL=/bin/bash
DATA_DIR=../data/
ALIGN_DIR=../align/
THREADS=14
STAR_HS=~/projects/references/human/hg38_STAR
STAR_HS2=~jsch0032/references/human/hg19_STAR_index/
STAR_MM=/home/jsch/projects/references/mouse/STAR/
GTF_HS=~/projects/references/human/Homo_sapiens.GRCh38.107.gtf
GTF_HS2=~jsch0032/references/human/Homo_sapiens.GRCh37.87.chr.gtf
GTF_MM=/home/jsch/projects/references/mouse/Mus_musculus.GRCm39.105.chr_filtered.gtf
ifdef infix
TMP1 := ${ALIGN_DIR}$(infix)/
ALIGN_DIR=$(TMP1)
TMP2 := ${DATA_DIR}$(infix)/
DATA_DIR=$(TMP2)
endif
FASTQ := $(wildcard ${DATA_DIR}*R1_001.fastq.gz)
#FASTQ := $(shell find ${DATA_DIR} -regextype posix-extended -regex -name '*R1[01_]*.fastq.gz')
#Set bam requirements according to sequencing mode
ifeq ($(mode),se)
BAM= $(addprefix ${ALIGN_DIR}/,\
$(notdir \
$(subst _R1_001.fastq.gz,-SE-starAligned.sortedByCoord.out.bam,$(FASTQ) ) ) )
#$(subst _R1.fastq.gz,-SE-starAligned.sortedByCoord.JEMD.out.bam,$(FASTQ) ) ) )
endif
ifeq ($(mode),pe)
BAM= $(addprefix ${ALIGN_DIR}/,\
$(notdir \
$(subst _R1_001.fastq.gz,-PE-starAligned.sortedByCoord.out.bam,$(FASTQ) ) ) )
endif
#Check UMI Filtering
ifdef umi
TMP := $(subst sortedByCoord.out,sortedByCoord.JEMD.out,$(BAM))
BAM=$(TMP)
endif
#Set genome annotation and index according to user paramter
ifeq ($(genome),hg38)
INDEX=$(STAR_HS)
ANNO=$(GTF_HS)
endif
ifeq ($(genome),hg19)
INDEX=$(STAR_HS2)
ANNO=$(GTF_HS2)
endif
ifeq ($(genome),mm)
INDEX=$(STAR_MM)
ANNO=$(GTF_MM)
endif
#Usage
all:
#echo $(BAM)
#echo $(FASTQ)
echo $(ALIGN_DIR)
@echo "Usage: make -f Alignment_RNA.mak <mode=mode> <genome=genome> [options] table"
@echo " where mode = se or pe, genome = hg19, hg38 or mm"
@echo " and options umi=true for umi collapsing, trim=X for trimming down to X bases,"
@echo " and infix=infix looks for data in ../data/infix/ and writes to ../align/infix/"
#TRIM READS TO 51 BP
#To be implementd
${DATA_DIR}trimmed/%_trimmed_R1_001.fastq: ${DATA_DIR}/*/%_R1_001.fastq.gz
ml load trimmomatic
trimmomatic SE $^ $@ CROP:51 MINLEN:50
#GZIP A FILE IN TRIMMED
${DATA_DIR}trimmed/%.gz: ${DATA_DIR}trimmed/%
gzip $^
#Align SE Reads
${ALIGN_DIR}%-SE-starAligned.sortedByCoord.out.bam: ${DATA_DIR}%_R1_001.fastq.gz
STAR --genomeDir $(INDEX) --runThreadN ${THREADS} --readFilesIn <(zcat $<) --outFileNamePrefix ${ALIGN_DIR}$*-SE-star --outSAMtype BAM SortedByCoordinate --outSAMunmapped Within --outSAMattributes Standard
samtools index $@
#Align PE Reads
${ALIGN_DIR}%-PE-starAligned.sortedByCoord.out.bam: ${DATA_DIR}%_R1_001.fastq.gz ${DATA_DIR}%_R2_001.fastq.gz
STAR --genomeDir $(INDEX) --runThreadN ${THREADS} --readFilesIn <(zcat $<) <(zcat $(word 2,$^)) --outFileNamePrefix ${ALIGN_DIR}$*-PE-star --outSAMtype BAM SortedByCoordinate --outSAMunmapped Within --outSAMattributes Standard
samtools index $@
#UMI collapsing
#To be implemented
${ALIGN_DIR}%-SE-starAligned.sortedByCoord.JEMD.out.bam: ${ALIGN_DIR}%-SE-starAligned.sortedByCoord.out.bam
~jsch0032/tools/je/je_1.2/je markdupes INPUT=$^ O=$@ MISMATCHES=1 METRICS_FILE=${@}.metrics REMOVE_DUPLICATES=true
samtools index $@
bams: ${BAM}
echo "creating bam files"
table%: ${BAM}
ifndef mode
@echo "mode not set. specify mode=se or mode=pe with the command. Run without target for usage."
else ifndef genome
@echo "genome not set. specidy run=hs or run=mm with command for human or mouse alignment."
else
@echo $(BAM)
@echo "----------------------------------------------"
featureCounts -a $(ANNO) -o $@ -Q 10 -p -T ${THREADS} -g gene_name $^
endif