diff --git a/Marton_Sarah_Madeline.sh b/Marton_Sarah_Madeline.sh new file mode 100644 index 0000000..7073cef --- /dev/null +++ b/Marton_Sarah_Madeline.sh @@ -0,0 +1,61 @@ +# contributors: + # Sarah Nicholls + # Madeline Laporte + # Marton Barta + +## Putting all our mcrAgenes and hsp70gene into their own files (with for loop) + + # mcrAgene +for i in ./ref_sequences/mcr*.fasta +do +cat $i >> mcrAgene.fasta +done + # hsp70gene +for i in ./ref_sequences/hsp*.fasta +do +cat $i >> hsp70gene.fasta +done + +## Alligning reference sequences (with muscle) + + # mcrAgene +./muscle -in mcrAgene.fasta -out MCRA_Muscle.afasta + + # hsp70gene +./muscle -in hsp70gene.fasta -out HSP_Muscle.afasta + +## Building hmm profiles for the mcrAgene & hsp70gene + + # mcrAgene +./hmmbuild mcrAgene.hmm MCRA_Muscle.afasta + + # hsp70gene +./hmmbuild hsp70gene.hmm HSP_Muscle.afasta + + +## initializing the final table +echo " +HSP70 & mcrA output +" > final.tbl + +## Searching the proteomes for mcrAgene and hsp70gen (with hmmsearch) +for genes in ./proteomes/*.fasta +do +./hmmsearch --tblout $genes.hsp70gene.tbl hsp70gene.hmm $genes +./hmmsearch --tblout $genes.mcrAgene.tbl mcrAgene.hmm $genes +done + +## counting all the times we found gene matches +for genes in ./proteomes/*.fasta +do +x=$(echo $genes | sed -E 's/[^0-9]//g') +hsp=$(cat $genes.hsp70gene.tbl | grep "HSP_Muscle" | uniq | wc -l) +mcrA=$(cat $genes.mcrAgene.tbl | grep "MCRA_Muscle" | uniq | wc -l) +echo "Proteome" $x $hsp "HSP" $mcrA "mcrA" >> final.tbl +done + +## saving all the genes that are considered candidates +grep -v ' 0 ' final.tbl > candidates.txt + +## Thank you :))) + diff --git a/README.md b/README.md new file mode 100644 index 0000000..581da7f --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Project from Sarah Nicholls (TA: Tim Burton), Madeline Laporte (TA: Tim Burton), Marton Barta (TA: Ceara J. Talbot) diff --git a/candidates.txt b/candidates.txt new file mode 100644 index 0000000..1358546 --- /dev/null +++ b/candidates.txt @@ -0,0 +1,18 @@ + +HSP70 & mcrA output + +Proteome 03 3 HSP 1 mcrA +Proteome 05 2 HSP 1 mcrA +Proteome 07 2 HSP 1 mcrA +Proteome 15 1 HSP 1 mcrA +Proteome 16 1 HSP 1 mcrA +Proteome 19 1 HSP 2 mcrA +Proteome 23 2 HSP 2 mcrA +Proteome 24 2 HSP 1 mcrA +Proteome 38 1 HSP 1 mcrA +Proteome 39 1 HSP 1 mcrA +Proteome 42 3 HSP 1 mcrA +Proteome 44 1 HSP 1 mcrA +Proteome 45 3 HSP 1 mcrA +Proteome 48 1 HSP 1 mcrA +Proteome 50 3 HSP 1 mcrA