diff --git a/Exercise11_files/Problem1/Problem1script.sh b/Exercise11_files/Problem1/Problem1script.sh new file mode 100644 index 0000000..040d08d --- /dev/null +++ b/Exercise11_files/Problem1/Problem1script.sh @@ -0,0 +1,16 @@ +for i in *ref +do + ../../../../muscle.exe -in $i -out $i.aln + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmbuild $i.hmm $i.aln +done + + +for i in *fasta +do +/home/Mati/hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.sigma.hits sigma.ref.hmm $i +/home/Mati/hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.sporecoat.hits sporecoat.ref.hmm $i +/home/Mati/hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.transporter.hits transporter.ref.hmm $i +done + +cat *.hits | grep "tr|" | sed -E 's/[tr|A-Z0-9]+\_9//' | awk '{print $1 " " $3 " " $5}' > bacteriahmmout.txt + diff --git a/Exercise11_files/Problem1/exercise11.py b/Exercise11_files/Problem1/exercise11.py new file mode 100755 index 0000000..e3e9f85 --- /dev/null +++ b/Exercise11_files/Problem1/exercise11.py @@ -0,0 +1,14 @@ +###Question 1### + +for i in *ref +do + ../../../../muscle.exe -in $i -out $i.aln + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmbuild $i.hmm $i.aln +done + +for i in *fasta +do + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.sigma.hits sigma.ref.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.sporecoat.hits sporecoat.ref.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.transporter.hits transporter.ref.hmm $i +done diff --git a/Exercise11_files/exercise11.py b/Exercise11_files/exercise11.py new file mode 100755 index 0000000..6443683 --- /dev/null +++ b/Exercise11_files/exercise11.py @@ -0,0 +1,14 @@ +###Question 1### + +for i in *ref +do + ../../../../muscle.exe -in $i -out $i.aln + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmbuild $i.hmm $i.align +done + +for i in *fasta +do + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout sigma.hits sigma.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout sporecoat.hits sporecoat.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout transporter.hits transporter.hmm $i +done diff --git a/exercise11_problem2.py b/exercise11_problem2.py new file mode 100644 index 0000000..aa27712 --- /dev/null +++ b/exercise11_problem2.py @@ -0,0 +1,51 @@ +import re +# unzip files in unix +# open fastafile +thefile=open('./Exercise11_files/Problem2/motifsort.fasta',"r") + +# search strings +Motif1= r'AKKPRVZE' +Motif2 = r'AAQWWRNYGG' + +# lists! +m1_id = [] +m1_seq = [] + +m2_id = [] +m2_seq = [] + +other_id = [] +other_seq = [] + +# loop over the file +for line in thefile: + line = line.strip() + if ">" in line: + seqid = line + elif re.search(Motif1, line): + m1 = line + m1_id.append(seqid) + m1_seq.append(m1) + elif re.search(Motif2, line): + m2 = line + m2_id.append(seqid) + m2_seq.append(m2) + else: + other_id.append(seqid) + other_seq.append(line) + +# write fasta files from lists :) +ofile = open("m1_motif.txt", "w") +for i in range(len(m1_id)): + ofile.write(m1_id[i] + "\n" +m1_seq[i] + "\n") +ofile.close() + +ofile = open("m2_motif.txt", "w") +for i in range(len(m2_id)): + ofile.write(m2_id[i] + "\n" +m2_seq[i] + "\n") +ofile.close() + +ofile = open("other.txt", "w") +for i in range(len(other_id)): + ofile.write(other_id[i] + "\n" +other_seq[i] + "\n") +ofile.close() \ No newline at end of file