Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions Exercise11_files/Problem1/Problem1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#create a list of the ref sequences to input these names into muscle
for file in *.ref
do
echo $file | cut -d '.' -f1 >> ref_filenames.txt
done

#create a list of the fasta sequences to input these names into hmmsearch
for file in *.fasta
do
echo $file | cut -d '.' -f1 >> fasta_filenames.txt
done

#for loop starts by looping through the ref files list of 3 (sigma, transporter, sporecoat)
for file in $(cat ref_filenames.txt)
do
echo $file
#muscle alignment for the reference file in question
muscle -in ${file}.ref -out ${file}.align
#hmmbuild from the muscle alignment to create .hmm file
hmmbuild ${file}.hmm ${file}.align
#nested for loop (inside the above for loop) to loop through each organism for
#the hmm references created for each sequence feature of interest (the hmm refs that
#were created in the above for loop
for fasta in $(cat fasta_filenames.txt)
do
#create hmmsearch output in the format of refsequence_organism.hits with
#inputs refsequence.hmm and organism.fasta
hmmsearch --tblout ${file}_${fasta}.hits ${file}.hmm ${fasta}.fasta
#display the results of the hmm serach, remove lines with #, and print columns of interest
#sed to change space delimiters to commas. Write to temp.txt file
cat ${file}_${fasta}.hits | grep -v "#" | awk '{print $1, $3, $6}' | sed "s/ /,/g" > temp.txt
#loop through each line(match within the orgamism for the sequence) in temp.txt
for line in $(cat temp.txt)
do
#write to a new line in results.txt file in the format:
#organism,reference hammer search name (refsequence.hmm),three colums of info from above
echo "${fasta},${file}.hmm",$line >> results.txt
done
rm temp.txt
done
done

@lyy005 lyy005 Dec 12, 2017

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e-value should be $5
Other parts look good

104 changes: 104 additions & 0 deletions Exercise11_files/Problem1/results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
Arthrobacter,sigma.hmm,tr|A0A0P7HFJ4|A0A0P7HFJ4_9MICC,sigma,78.3
Arthrobacter,sigma.hmm,tr|A0A0P7FPV6|A0A0P7FPV6_9MICC,sigma,72.0
Arthrobacter,sigma.hmm,tr|A0A0P7GDV0|A0A0P7GDV0_9MICC,sigma,55.2
Arthrobacter,sigma.hmm,tr|A0A0P7FY19|A0A0P7FY19_9MICC,sigma,53.3
Arthrobacter,sigma.hmm,tr|A0A0P7HEI0|A0A0P7HEI0_9MICC,sigma,50.0
Arthrobacter,sigma.hmm,tr|A0A0P7GE65|A0A0P7GE65_9MICC,sigma,46.4
Arthrobacter,sigma.hmm,tr|A0A0P7HL11|A0A0P7HL11_9MICC,sigma,42.5
Arthrobacter,sigma.hmm,tr|A0A0P7FHT1|A0A0P7FHT1_9MICC,sigma,40.8
Arthrobacter,sigma.hmm,tr|A0A0N8HX11|A0A0N8HX11_9MICC,sigma,40.8
Arthrobacter,sigma.hmm,tr|A0A0P7GAX7|A0A0P7GAX7_9MICC,sigma,22.9
Bacillus,sigma.hmm,tr|A6CNQ6|A6CNQ6_9BACI,sigma,83.6
Bacillus,sigma.hmm,tr|A6CSJ0|A6CSJ0_9BACI,sigma,78.1
Bacillus,sigma.hmm,tr|A6CJY3|A6CJY3_9BACI,sigma,75.3
Bacillus,sigma.hmm,tr|A6CU12|A6CU12_9BACI,sigma,69.6
Bacillus,sigma.hmm,tr|A6CJR5|A6CJR5_9BACI,sigma,65.9
Bacillus,sigma.hmm,tr|A6CHU4|A6CHU4_9BACI,sigma,64.1
Bacillus,sigma.hmm,tr|A6CMX3|A6CMX3_9BACI,sigma,63.2
Bacillus,sigma.hmm,tr|A6CS17|A6CS17_9BACI,sigma,62.8
Bacillus,sigma.hmm,tr|A6CU10|A6CU10_9BACI,sigma,62.3
Bacillus,sigma.hmm,tr|A6CJX5|A6CJX5_9BACI,sigma,59.8
Bacillus,sigma.hmm,tr|A6CSN9|A6CSN9_9BACI,sigma,58.3
Bacillus,sigma.hmm,tr|A6CTI9|A6CTI9_9BACI,sigma,57.7
Bacillus,sigma.hmm,tr|A6CPD2|A6CPD2_9BACI,sigma,57.7
Bacillus,sigma.hmm,tr|A6CIS3|A6CIS3_9BACI,sigma,57.3
Bacillus,sigma.hmm,tr|A6CII5|A6CII5_9BACI,sigma,56.5
Bacillus,sigma.hmm,tr|A6CI15|A6CI15_9BACI,sigma,56.3
Bacillus,sigma.hmm,tr|A6CU83|A6CU83_9BACI,sigma,55.0
Bacillus,sigma.hmm,tr|A6CI19|A6CI19_9BACI,sigma,51.8
Bacillus,sigma.hmm,tr|A6CQB8|A6CQB8_9BACI,sigma,50.3
Bacillus,sigma.hmm,tr|A6CK70|A6CK70_9BACI,sigma,47.1
Bacillus,sigma.hmm,tr|A6CIW8|A6CIW8_9BACI,sigma,42.5
Bacillus,sigma.hmm,tr|A6CIF2|A6CIF2_9BACI,sigma,41.1
Bacillus,sigma.hmm,tr|A6CM50|A6CM50_9BACI,sigma,35.4
Bacillus,sigma.hmm,tr|A6CL73|A6CL73_9BACI,sigma,33.6
Clostridium,sigma.hmm,tr|R7MFB1|R7MFB1_9CLOT,sigma,79.7
Clostridium,sigma.hmm,tr|R7MJY2|R7MJY2_9CLOT,sigma,79.0
Clostridium,sigma.hmm,tr|R7MGY2|R7MGY2_9CLOT,sigma,70.7
Clostridium,sigma.hmm,tr|R7MFJ3|R7MFJ3_9CLOT,sigma,48.3
Clostridium,sigma.hmm,tr|R7MFY1|R7MFY1_9CLOT,sigma,46.9
Clostridium,sigma.hmm,tr|R7MAD7|R7MAD7_9CLOT,sigma,44.6
Clostridium,sigma.hmm,tr|R7MHN3|R7MHN3_9CLOT,sigma,40.4
Clostridium,sigma.hmm,tr|R7MCS9|R7MCS9_9CLOT,sigma,39.9
Flavobacterium,sigma.hmm,tr|A0A1S1J8W1|A0A1S1J8W1_9FLAO,sigma,61.9
Flavobacterium,sigma.hmm,tr|A0A1S1J0A5|A0A1S1J0A5_9FLAO,sigma,61.8
Flavobacterium,sigma.hmm,tr|A0A1S1J9H9|A0A1S1J9H9_9FLAO,sigma,61.4
Flavobacterium,sigma.hmm,tr|A0A1S1JAH2|A0A1S1JAH2_9FLAO,sigma,61.2
Flavobacterium,sigma.hmm,tr|A0A1S1J7G0|A0A1S1J7G0_9FLAO,sigma,57.5
Flavobacterium,sigma.hmm,tr|A0A1S1J0Z8|A0A1S1J0Z8_9FLAO,sigma,56.6
Flavobacterium,sigma.hmm,tr|A0A1S1JF58|A0A1S1JF58_9FLAO,sigma,54.3
Flavobacterium,sigma.hmm,tr|A0A1S1JAU9|A0A1S1JAU9_9FLAO,sigma,52.5
Flavobacterium,sigma.hmm,tr|A0A1S1JAK2|A0A1S1JAK2_9FLAO,sigma,48.2
Flavobacterium,sigma.hmm,tr|A0A1S1J180|A0A1S1J180_9FLAO,sigma,47.8
Flavobacterium,sigma.hmm,tr|A0A1S1J4G5|A0A1S1J4G5_9FLAO,sigma,46.1
Flavobacterium,sigma.hmm,tr|A0A1S1J5F7|A0A1S1J5F7_9FLAO,sigma,45.8
Flavobacterium,sigma.hmm,tr|A0A1S1J7E0|A0A1S1J7E0_9FLAO,sigma,45.1
Flavobacterium,sigma.hmm,tr|A0A1S1J430|A0A1S1J430_9FLAO,sigma,42.6
Flavobacterium,sigma.hmm,tr|A0A1S1J2T6|A0A1S1J2T6_9FLAO,sigma,42.3
Flavobacterium,sigma.hmm,tr|A0A1S1JDD3|A0A1S1JDD3_9FLAO,sigma,38.2
Flavobacterium,sigma.hmm,tr|A0A1S1J6K9|A0A1S1J6K9_9FLAO,sigma,37.7
Flavobacterium,sigma.hmm,tr|A0A1S1J9G5|A0A1S1J9G5_9FLAO,sigma,36.5
Flavobacterium,sigma.hmm,tr|A0A1S1J5L5|A0A1S1J5L5_9FLAO,sigma,35.8
Flavobacterium,sigma.hmm,tr|A0A1S1J6X1|A0A1S1J6X1_9FLAO,sigma,31.1
Flavobacterium,sigma.hmm,tr|A0A1S1JCL3|A0A1S1JCL3_9FLAO,sigma,30.5
Flavobacterium,sigma.hmm,tr|A0A1S1J6M2|A0A1S1J6M2_9FLAO,sigma,22.2
Flavobacterium,sigma.hmm,tr|A0A1S1J022|A0A1S1J022_9FLAO,sigma,21.0
Flavobacterium,sigma.hmm,tr|A0A1S1J312|A0A1S1J312_9FLAO,sigma,18.1
Limnohabitans,sigma.hmm,tr|A0A0P0LD52|A0A0P0LD52_9BURK,sigma,74.9
Limnohabitans,sigma.hmm,tr|A0A0P0LKP5|A0A0P0LKP5_9BURK,sigma,60.9
Limnohabitans,sigma.hmm,tr|A0A0P0M8A4|A0A0P0M8A4_9BURK,sigma,58.7
Limnohabitans,sigma.hmm,tr|A0A0P0MBT4|A0A0P0MBT4_9BURK,sigma,49.5
Limnohabitans,sigma.hmm,tr|A0A0P0M9M9|A0A0P0M9M9_9BURK,sigma,47.6
Limnohabitans,sigma.hmm,tr|A0A0P0MA37|A0A0P0MA37_9BURK,sigma,46.7
Limnohabitans,sigma.hmm,tr|A0A0P0M9Z4|A0A0P0M9Z4_9BURK,sigma,21.2
Rhizobium,sigma.hmm,tr|A0A1Q9ANZ6|A0A1Q9ANZ6_9RHIZ,sigma,75.3
Rhizobium,sigma.hmm,tr|A0A1Q9ANQ8|A0A1Q9ANQ8_9RHIZ,sigma,75.1
Rhizobium,sigma.hmm,tr|A0A1Q9AL94|A0A1Q9AL94_9RHIZ,sigma,65.5
Rhizobium,sigma.hmm,tr|A0A1Q9ALF0|A0A1Q9ALF0_9RHIZ,sigma,61.3
Rhizobium,sigma.hmm,tr|A0A1Q9AF84|A0A1Q9AF84_9RHIZ,sigma,61.2
Rhizobium,sigma.hmm,tr|A0A1Q9AII6|A0A1Q9AII6_9RHIZ,sigma,59.6
Rhizobium,sigma.hmm,tr|A0A1Q9ACE9|A0A1Q9ACE9_9RHIZ,sigma,56.9
Rhizobium,sigma.hmm,tr|A0A1Q9AIU3|A0A1Q9AIU3_9RHIZ,sigma,55.8
Rhizobium,sigma.hmm,tr|A0A1Q9AEH2|A0A1Q9AEH2_9RHIZ,sigma,54.1
Rhizobium,sigma.hmm,tr|A0A1Q9ANU6|A0A1Q9ANU6_9RHIZ,sigma,44.9
Rhizobium,sigma.hmm,tr|A0A1Q9AQ99|A0A1Q9AQ99_9RHIZ,sigma,43.5
Rhizobium,sigma.hmm,tr|A0A1Q9AQ73|A0A1Q9AQ73_9RHIZ,sigma,43.4
Rhizobium,sigma.hmm,tr|A0A1Q9ADJ9|A0A1Q9ADJ9_9RHIZ,sigma,32.2
Rhizobium,sigma.hmm,tr|A0A1Q9APD8|A0A1Q9APD8_9RHIZ,sigma,24.5
Roseobacter,sigma.hmm,tr|B7RH33|B7RH33_9RHOB,sigma,75.3
Roseobacter,sigma.hmm,tr|B7RGX5|B7RGX5_9RHOB,sigma,64.7
Roseobacter,sigma.hmm,tr|B7RH17|B7RH17_9RHOB,sigma,63.8
Roseobacter,sigma.hmm,tr|B7RI57|B7RI57_9RHOB,sigma,62.7
Roseobacter,sigma.hmm,tr|B7RJL9|B7RJL9_9RHOB,sigma,62.7
Roseobacter,sigma.hmm,tr|B7RH51|B7RH51_9RHOB,sigma,56.6
Roseobacter,sigma.hmm,tr|B7RSA6|B7RSA6_9RHOB,sigma,49.2
Verrucomicrobia,sigma.hmm,tr|A0A1W9LF71|A0A1W9LF71_9BACT,sigma,75.1
Verrucomicrobia,sigma.hmm,tr|A0A1W9LBX6|A0A1W9LBX6_9BACT,sigma,68.4
Verrucomicrobia,sigma.hmm,tr|A0A1W9LH48|A0A1W9LH48_9BACT,sigma,61.3
Verrucomicrobia,sigma.hmm,tr|A0A1W9LHW5|A0A1W9LHW5_9BACT,sigma,49.8
Verrucomicrobia,sigma.hmm,tr|A0A1W9LA94|A0A1W9LA94_9BACT,sigma,48.6
Bacillus,sporecoat.hmm,tr|A6BZD2|A6CT85_9BACI,sporecoat,118.6
Bacillus,transporter.hmm,tr|A6CMF6|A6CMF6_9BACI,transporter,24.9
Flavobacterium,transporter.hmm,tr|A0A1S1J3R7|A0A1S1J3R7_9FLAO,transporter,33.9
Limnohabitans,transporter.hmm,tr|A0A0P0M9B4|A0A0P0M9B4_9BURK,transporter,34.2
Roseobacter,transporter.hmm,tr|B7RKP8|B7RKP8_9RHOB,transporter,10.3
31 changes: 31 additions & 0 deletions Exercise11_files/Problem2/problem2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

import re

motifsortin=open("motifsort.fasta","r")
motif1out=open("motif1.fasta","w")
motif2out=open("motif2.fasta","w")
nomotifout=open("nomotif.fasta","w")

motif1= r'AKKPRVZE'
motif2= r'AAQWWRNYGG'

tempid_m1=[]
tempid_m2=[]
tempid_no=[]
tempseq_m1=[]
tempseq_m2=[]
tempseq_no=[]


for line in motifsortin:
line = line.strip()
if ">" in line:
seqid = line
elif re.search(motif1, line):
motif1out.write(seqid + "\n" +line + "\n")
elif re.search(motif2, line):
motif2out.write(seqid + "\n" +line + "\n")
else:
nomotifout.write(seqid + "\n" +line + "\n")


Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

motifsortin.close()
motif1out.close()
motif2out.close()
nomotifout.close()

Good job