From eaf1101b91e94f4d748f6efe36dc67ffdc456fd4 Mon Sep 17 00:00:00 2001 From: Katherine Date: Fri, 10 Nov 2017 10:47:53 -0500 Subject: [PATCH 01/11] Made Python script, added pseudocode for Q1 --- Exercise11_files/exercise11.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100755 Exercise11_files/exercise11.py diff --git a/Exercise11_files/exercise11.py b/Exercise11_files/exercise11.py new file mode 100755 index 0000000..0b18e0f --- /dev/null +++ b/Exercise11_files/exercise11.py @@ -0,0 +1,16 @@ +###Question 1### + +for i in *ref +do + muscle -in 4i -out $i.aln + hmmbuild ________ +done + +for i in *fasta +do + hmmsearch #model1 + hmmsearch #model2 + hmmsearch #model3 +done + +####Question 2#### From a6306590186a3825b3e92fdcdfd3237b09de18f4 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Fri, 10 Nov 2017 10:51:43 -0500 Subject: [PATCH 02/11] problem2.py file --- exercise11_problem2.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 exercise11_problem2.py diff --git a/exercise11_problem2.py b/exercise11_problem2.py new file mode 100644 index 0000000..e69de29 From 0318b69aee28411a0007364afe8311b137463d92 Mon Sep 17 00:00:00 2001 From: Katherine Date: Fri, 10 Nov 2017 10:52:23 -0500 Subject: [PATCH 03/11] added Q2 pseudocode --- Exercise11_files/exercise11.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Exercise11_files/exercise11.py b/Exercise11_files/exercise11.py index 0b18e0f..0d4ac3a 100755 --- a/Exercise11_files/exercise11.py +++ b/Exercise11_files/exercise11.py @@ -14,3 +14,9 @@ done ####Question 2#### +for line in infile: + if ">" + seqid=line + elif "/tkk" + print seqid + print line \ No newline at end of file From 1213eac9229d290f1432df2bc71354aa8331d9b8 Mon Sep 17 00:00:00 2001 From: Katherine Date: Fri, 10 Nov 2017 11:06:22 -0500 Subject: [PATCH 04/11] Creates alignments and HMMs, searches each protein type in bacterial fastas --- Exercise11_files/exercise11.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/Exercise11_files/exercise11.py b/Exercise11_files/exercise11.py index 0d4ac3a..6443683 100755 --- a/Exercise11_files/exercise11.py +++ b/Exercise11_files/exercise11.py @@ -2,21 +2,13 @@ for i in *ref do - muscle -in 4i -out $i.aln - hmmbuild ________ + ../../../../muscle.exe -in $i -out $i.aln + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmbuild $i.hmm $i.align done for i in *fasta do - hmmsearch #model1 - hmmsearch #model2 - hmmsearch #model3 + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout sigma.hits sigma.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout sporecoat.hits sporecoat.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout transporter.hits transporter.hmm $i done - -####Question 2#### -for line in infile: - if ">" - seqid=line - elif "/tkk" - print seqid - print line \ No newline at end of file From ea4ecd75af6315d7fd89a22da189c028fc903ac0 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Fri, 10 Nov 2017 11:08:32 -0500 Subject: [PATCH 05/11] made line strip! --- exercise11_problem2.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/exercise11_problem2.py b/exercise11_problem2.py index e69de29..5c0639e 100644 --- a/exercise11_problem2.py +++ b/exercise11_problem2.py @@ -0,0 +1,14 @@ +import os +import pandas + +cwd = os.getcwd() +print cwd + +thefile=open('./Exercise11_files/Problem2/motifsort.fasta',"r") + +for line in thefile: + line = line.strip() + if ">" in line: + print line + else: + print "harold" \ No newline at end of file From f5b303a20ad5dba96f6ada59ec252e69e10bfe03 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Fri, 10 Nov 2017 11:20:16 -0500 Subject: [PATCH 06/11] 2 cloooooose! --- exercise11_problem2.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/exercise11_problem2.py b/exercise11_problem2.py index 5c0639e..1cffd41 100644 --- a/exercise11_problem2.py +++ b/exercise11_problem2.py @@ -1,14 +1,31 @@ import os import pandas +import re cwd = os.getcwd() print cwd +Motif1= r'AKKPRVZE' +Motif2 = r'AAQWWRNYGG' + thefile=open('./Exercise11_files/Problem2/motifsort.fasta',"r") for line in thefile: line = line.strip() if ">" in line: - print line - else: - print "harold" \ No newline at end of file + seqid = line + elif re.search(Motif1, line): + m1 = line + print seqid + print m1 + elif re.search(Motif2, line): + m2 = line + print seqid + print m2 + + + + + + + From bd6b908b5d153735fe19d09336aece41ab425635 Mon Sep 17 00:00:00 2001 From: Katherine Date: Fri, 10 Nov 2017 11:20:33 -0500 Subject: [PATCH 07/11] Everything up to making file is working --- Exercise11_files/Problem1/exercise11.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100755 Exercise11_files/Problem1/exercise11.py diff --git a/Exercise11_files/Problem1/exercise11.py b/Exercise11_files/Problem1/exercise11.py new file mode 100755 index 0000000..6443683 --- /dev/null +++ b/Exercise11_files/Problem1/exercise11.py @@ -0,0 +1,14 @@ +###Question 1### + +for i in *ref +do + ../../../../muscle.exe -in $i -out $i.aln + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmbuild $i.hmm $i.align +done + +for i in *fasta +do + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout sigma.hits sigma.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout sporecoat.hits sporecoat.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout transporter.hits transporter.hmm $i +done From 6c4fa71af5de5f007472d3db3e4685d2ff7b6827 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Fri, 10 Nov 2017 11:56:18 -0500 Subject: [PATCH 08/11] FINISED QUESTION 2 WOOOOO WOOOOOOOOOO WOOOOOOOOOOOOOOOOOOO --- exercise11_problem2.py | 53 ++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 17 deletions(-) diff --git a/exercise11_problem2.py b/exercise11_problem2.py index 1cffd41..91f1769 100644 --- a/exercise11_problem2.py +++ b/exercise11_problem2.py @@ -1,31 +1,50 @@ -import os -import pandas import re +# open fastafile +thefile=open('./Exercise11_files/Problem2/motifsort.fasta',"r") -cwd = os.getcwd() -print cwd - +# search strings Motif1= r'AKKPRVZE' Motif2 = r'AAQWWRNYGG' -thefile=open('./Exercise11_files/Problem2/motifsort.fasta',"r") +# lists! +m1_id = [] +m1_seq = [] +m2_id = [] +m2_seq = [] + +other_id = [] +other_seq = [] + +# loop over the file for line in thefile: line = line.strip() if ">" in line: seqid = line elif re.search(Motif1, line): m1 = line - print seqid - print m1 + m1_id.append(seqid) + m1_seq.append(m1) elif re.search(Motif2, line): m2 = line - print seqid - print m2 - - - - - - - + m2_id.append(seqid) + m2_seq.append(m2) + else: + other_id.append(seqid) + other_seq.append(line) + +# write fasta files from lists :) +ofile = open("m1_motif.txt", "w") +for i in range(len(m1_id)): + ofile.write(m1_id[i] + "\n" +m1_seq[i] + "\n") +ofile.close() + +ofile = open("m2_motif.txt", "w") +for i in range(len(m2_id)): + ofile.write(m2_id[i] + "\n" +m2_seq[i] + "\n") +ofile.close() + +ofile = open("other.txt", "w") +for i in range(len(m1_id)): + ofile.write(other_id[i] + "\n" +other_seq[i] + "\n") +ofile.close() \ No newline at end of file From 446f643ee62e59ef447aaa757b77e920677b12e0 Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Mon, 13 Nov 2017 16:41:12 -0500 Subject: [PATCH 09/11] finished problem 1 script --- Exercise11_files/Problem1/Problem1script.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 Exercise11_files/Problem1/Problem1script.sh diff --git a/Exercise11_files/Problem1/Problem1script.sh b/Exercise11_files/Problem1/Problem1script.sh new file mode 100644 index 0000000..040d08d --- /dev/null +++ b/Exercise11_files/Problem1/Problem1script.sh @@ -0,0 +1,16 @@ +for i in *ref +do + ../../../../muscle.exe -in $i -out $i.aln + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmbuild $i.hmm $i.aln +done + + +for i in *fasta +do +/home/Mati/hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.sigma.hits sigma.ref.hmm $i +/home/Mati/hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.sporecoat.hits sporecoat.ref.hmm $i +/home/Mati/hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.transporter.hits transporter.ref.hmm $i +done + +cat *.hits | grep "tr|" | sed -E 's/[tr|A-Z0-9]+\_9//' | awk '{print $1 " " $3 " " $5}' > bacteriahmmout.txt + From 0d81defcef97ca9b2e4f94875beaf9c4eb8273da Mon Sep 17 00:00:00 2001 From: Mati Nemera Date: Mon, 13 Nov 2017 16:41:33 -0500 Subject: [PATCH 10/11] changed python for problem 1 a little --- Exercise11_files/Problem1/exercise11.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Exercise11_files/Problem1/exercise11.py b/Exercise11_files/Problem1/exercise11.py index 6443683..e3e9f85 100755 --- a/Exercise11_files/Problem1/exercise11.py +++ b/Exercise11_files/Problem1/exercise11.py @@ -3,12 +3,12 @@ for i in *ref do ../../../../muscle.exe -in $i -out $i.aln - ../../../../hmmer-3.1b2-cygwin64/binaries/hmmbuild $i.hmm $i.align + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmbuild $i.hmm $i.aln done for i in *fasta do - ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout sigma.hits sigma.hmm $i - ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout sporecoat.hits sporecoat.hmm $i - ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout transporter.hits transporter.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.sigma.hits sigma.ref.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.sporecoat.hits sporecoat.ref.hmm $i + ../../../../hmmer-3.1b2-cygwin64/binaries/hmmsearch --tblout $i.transporter.hits transporter.ref.hmm $i done From f776ba74716a424efe2b5770cd8c409ac44d6c60 Mon Sep 17 00:00:00 2001 From: omegadan01 Date: Mon, 13 Nov 2017 21:39:05 -0500 Subject: [PATCH 11/11] Caught a mistake I made in question 2, it now works --- exercise11_problem2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/exercise11_problem2.py b/exercise11_problem2.py index 91f1769..aa27712 100644 --- a/exercise11_problem2.py +++ b/exercise11_problem2.py @@ -1,4 +1,5 @@ import re +# unzip files in unix # open fastafile thefile=open('./Exercise11_files/Problem2/motifsort.fasta',"r") @@ -45,6 +46,6 @@ ofile.close() ofile = open("other.txt", "w") -for i in range(len(m1_id)): +for i in range(len(other_id)): ofile.write(other_id[i] + "\n" +other_seq[i] + "\n") ofile.close() \ No newline at end of file