-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcall_diffFP_wellington.sh
More file actions
183 lines (167 loc) · 4.42 KB
/
call_diffFP_wellington.sh
File metadata and controls
183 lines (167 loc) · 4.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/env bash
set -o pipefail
set -o nounset
# default arg
check='on'
# help message
help_message="
Wrapper to call differential footprints from DNase data with Wellington
usage:
bash $(basename "$0") [-options] -r <BED> -b1 <BAM> -b2 <BAM>
required arguments:
-r|--regions : accessible regions to profile [BED]
-t|--treatment : aligned reads for treatment/test [BAM]
-c|--control : aligned reads for control [BAM]
optional arguments:
-o|--outdir : output directory name (default = PWD)
-n|--name : name of comparison & outdir subfolder (default = <tname>_vs_<cname>)
-tn|--tname : treatment name (default = extracted from treatment)
-cn|--cname : control name (defailt = extracted from control)
--fdr : FDR cutoff (default = 0.01)
--fdrlimit : minimum pval to consider sig for FDR (default = -20)
--logdir : output directory for log files (default = --outdir)
--check : whether to check input files [on,off] (default = on)
--depend : list of PBS dependencies (default = NULL)
additional info:
"
# parse command line arguments
while [[ $# -gt 1 ]]; do
key=$1
case $key in
-r|--regions)
regions=$2
shift
;;
-t|--treatment)
treatment=$2
shift
;;
-c|--control)
control=$2
shift
;;
-o|--outdir)
outdir=$2
shift
;;
-n|--name)
name=$2
shift
;;
-tn|--tname)
tname=$2
shift
;;
-cn|--cname)
cname=$2
shift
;;
--fdr)
fdr="-fdr $2"
shift
;;
--fdrlimit)
fdrlimit="-fdrlimit $2"
shift
;;
--logdir)
logdir=$2
shift
;;
--check)
check=$2
shift
;;
--depend)
depend="#SBATCH --dependency=$2"
shift
;;
*)
printf "ERROR: Unrecognised argument: %s %s\n" $1 $2
echo "$help_message"; exit 1
;;
esac
shift
done
# check required args
if [[ -z ${regions:-} ]]; then
printf "\nERROR: --region argument required\n"
echo "$help_message"; exit 1
elif [[ -z ${treatment:-} ]]; then
printf "\nERROR: --treatment argument required\n"
echo "$help_message"; exit 1
elif [[ -z ${control:-} ]]; then
printf "\nERROR: --control argument required\n"
echo "$help_message"; exit 1
fi
# set/check output directories
if [[ -z "${outdir:-}" ]]; then
outdir="."
fi
mkdir -p $outdir
if [[ -z "${logdir:-}" ]]; then
logdir="${outdir}"
fi
mkdir -p $logdir
# set optional args
if [[ -z ${tname:-} ]]; then
tname=$(basename "${treatment}")
tname=${tname%%.*}
fi
if [[ -z ${cname:-} ]]; then
cname=$(basename ${control})
cname=${cname%%.*}
fi
if [[ -z ${name:-} ]]; then
name="${tname}_vs_${cname}"
fi
# check files
if [[ ${check} = 'on' ]]; then
if [[ ! -r ${regions} ]]; then
printf "\nERROR: input file cannot be read: %s\n" $regions
echo "$help_message"; exit 1
elif [[ ! -r ${treatment} ]]; then
printf "\nERROR: bam file cannot be read: %s\n" $treatment
echo "$help_message"; exit 1
elif [[ ! -r ${control} ]]; then
printf "\nERROR: bam file cannot be read: %s\n" $control
echo "$help_message"; exit 1
fi
fi
# set commands
bed_cmd=("zcat -f ${regions} | "
"awk 'BEGIN {OFS = \"\t\"}"
"{if (\$3 - \$2 < 100) \$2 = \$2 - 50; \$3 = \$3 + 50; print \$1, \$2, \$3}'"
"> ${name}.preprocessed.bed")
pydnase_cmd=("wellington_bootstrap.py -A ${fdr_limit:-} -p 12"
"${teatment} ${control} preprocessed.bed"
"${name}/${tname}_specific_fp.txt"
"${name}/${cname}_specific_fp.txt")
# set log file names
logfile=${logdir}/${name}.$(basename "$0" .sh).log
scrfile=${logdir}/${name}.$(basename "$0" .sh).scr
# write job script
script=$(cat <<- EOS
#!/bin/bash
#SBATCH --time=24:00:00
#SBATCH -N 1
#SBATCH -n 12
#SBATCH -mem=24gb
#SBATCH --job-name=diffFP.${name}
#SBATCH -o ${logfile}
${depend:-}
# load modules
source ~/miniconda3/etc/profile.d/conda.sh
conda activate pydnase
printf "\nSTART: %s %s\n" \`date '+%Y-%m-%d %H:%M:%S'\`
# pre process bed
${bed_cmd[@]}
# run wellington
mkdir ${outdir}/${name}
${pydnase_cmd[@]}
printf "\nEND: %s %s\n" \`date '+%Y-%m-%d %H:%M:%S'\`
EOS
)
echo "$script" > $scrfile
sbatch "$scrfile"
exit 0