diff --git a/tutorial11part1.sh b/tutorial11part1.sh new file mode 100644 index 0000000..b93f500 --- /dev/null +++ b/tutorial11part1.sh @@ -0,0 +1,17 @@ +./Muscle -in ./Problem1/sigma.ref -out sigma.align +./Hammer/binaries/hmmbuild sigma.hmm sigma.align + +./Muscle -in ./Problem1/sporecoat.ref -out sporecoat.align +./Hammer/binaries/hmmbuild sporecoat.hmm sporecoat.align + +./Muscle -in ./Problem1/transporter.ref -out transporter.align +./Hammer/binaries/hmmbuild transporter.hmm transporter.align + +for file in Problem1/*.fasta +do + ./Hammer/binaries/hmmsearch --tblout $file.sigma.hits sigma.hmm $file + ./Hammer/binaries/hmmsearch --tblout $file.sporecoat.hits sporecoat.hmm $file + ./Hammer/binaries/hmmsearch --tblout $file.transporter.hits transporter.hmm $file +done + +cat Problem1/*.hits | grep -v "#" | awk '{print $1,$3,$5}' | sed -E 's/tr\|[A-Z0-9]+\|[A-Z0-9]+_9//' > results.txt diff --git a/tutorial11part2.py b/tutorial11part2.py new file mode 100644 index 0000000..6e03e3f --- /dev/null +++ b/tutorial11part2.py @@ -0,0 +1,22 @@ +import numpy +import pandas + +sequences=open("motifsort.fasta","r") +motif1file=open(motif1.txt,"w") +motif2file=open(motif2.txt,"w") +others=open(nonMotif.txt,"w") + +for line in sequences: + motif1= re.match(AKKPRVZE,str(sequences)) + motif2- re.match(AAQWWRNYGG,str(sequences)) + + if motif1: + motif1file.write(line + '\n') + if motif2: + motif2file.write(line + '\n') + else: + others.write(line + '\n') + +print motif1file +print motif2file +print others \ No newline at end of file