From dc5cceae064817036a855ae921a62858ae30cf97 Mon Sep 17 00:00:00 2001 From: Marton Barta Date: Thu, 14 Oct 2021 20:50:12 -0400 Subject: [PATCH 1/5] Uploading the shell script! lets gooo --- Marton_Sarah_Madeline.sh | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 Marton_Sarah_Madeline.sh diff --git a/Marton_Sarah_Madeline.sh b/Marton_Sarah_Madeline.sh new file mode 100644 index 0000000..2b00a8d --- /dev/null +++ b/Marton_Sarah_Madeline.sh @@ -0,0 +1,52 @@ +## Putting all our mcrAgenes and hsp70gene into their own files (with for loop) + + # mcrAgene +for i in ./ref_sequences/mcr*.fasta +do +cat $i >> mcrAgene.fasta +done + # hsp70gene +for i in ./ref_sequences/hsp*.fasta +do +cat $i >> hsp70gene.fasta +done + +## Alligning reference sequences (with muscle) + + # mcrAgene +./muscle -in mcrAgene.fasta -out MCRA_Muscle.afasta + + # hsp70gene +./muscle -in hsp70gene.fasta -out HSP_Muscle.afasta + +## Building hmm profiles for the mcrAgene & hsp70gene + + # mcrAgene +./hmmbuild mcrAgene.hmm MCRA_Muscle.afasta + + # hsp70gene +./hmmbuild hsp70gene.hmm HSP_Muscle.afasta + +## Searching the proteomes for mcrAgene and hsp70gen (with hmmsearch) + + # mcrAgene & hsp70gene +echo " +HSP70 & mcrA output +" > final.tbl + +for genes in ./proteomes/*.fasta +do +./hmmsearch --tblout $genes.hsp70gene.tbl hsp70gene.hmm $genes +./hmmsearch --tblout $genes.mcrAgene.tbl mcrAgene.hmm $genes +done + +for genes in ./proteomes/*.fasta +do +x=$(echo $genes | sed -E 's/[^0-9]//g') +hsp=$(cat $genes.hsp70gene.tbl | grep "HSP_Muscle" | uniq | wc -l) +mcrA=$(cat $genes.mcrAgene.tbl | grep "MCRA_Muscle" | uniq | wc -l) +echo "Proteome" $x $hsp "HSP" $mcrA "mcrA" >> final.tbl +done + +grep -v ' 0 ' final.tbl > candidates.txt + From 9f38f51ed6ce5fb8ed45a0206068a59064484e17 Mon Sep 17 00:00:00 2001 From: Marton Barta Date: Thu, 14 Oct 2021 20:55:01 -0400 Subject: [PATCH 2/5] There are the candiates xoxo --- candidates.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 candidates.txt diff --git a/candidates.txt b/candidates.txt new file mode 100644 index 0000000..1358546 --- /dev/null +++ b/candidates.txt @@ -0,0 +1,18 @@ + +HSP70 & mcrA output + +Proteome 03 3 HSP 1 mcrA +Proteome 05 2 HSP 1 mcrA +Proteome 07 2 HSP 1 mcrA +Proteome 15 1 HSP 1 mcrA +Proteome 16 1 HSP 1 mcrA +Proteome 19 1 HSP 2 mcrA +Proteome 23 2 HSP 2 mcrA +Proteome 24 2 HSP 1 mcrA +Proteome 38 1 HSP 1 mcrA +Proteome 39 1 HSP 1 mcrA +Proteome 42 3 HSP 1 mcrA +Proteome 44 1 HSP 1 mcrA +Proteome 45 3 HSP 1 mcrA +Proteome 48 1 HSP 1 mcrA +Proteome 50 3 HSP 1 mcrA From 6d875405d638b1b2e0c72c072908a3a0db6cb540 Mon Sep 17 00:00:00 2001 From: bmarci99 <81930058+bmarci99@users.noreply.github.com> Date: Thu, 14 Oct 2021 21:00:13 -0400 Subject: [PATCH 3/5] Update Marton_Sarah_Madeline.sh --- Marton_Sarah_Madeline.sh | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Marton_Sarah_Madeline.sh b/Marton_Sarah_Madeline.sh index 2b00a8d..dae8824 100644 --- a/Marton_Sarah_Madeline.sh +++ b/Marton_Sarah_Madeline.sh @@ -27,19 +27,20 @@ done # hsp70gene ./hmmbuild hsp70gene.hmm HSP_Muscle.afasta -## Searching the proteomes for mcrAgene and hsp70gen (with hmmsearch) - # mcrAgene & hsp70gene +## initializing the final table echo " HSP70 & mcrA output " > final.tbl - + +## Searching the proteomes for mcrAgene and hsp70gen (with hmmsearch) for genes in ./proteomes/*.fasta do ./hmmsearch --tblout $genes.hsp70gene.tbl hsp70gene.hmm $genes ./hmmsearch --tblout $genes.mcrAgene.tbl mcrAgene.hmm $genes done - + +## counting all the times we found gene matches for genes in ./proteomes/*.fasta do x=$(echo $genes | sed -E 's/[^0-9]//g') @@ -48,5 +49,8 @@ mcrA=$(cat $genes.mcrAgene.tbl | grep "MCRA_Muscle" | uniq | wc -l) echo "Proteome" $x $hsp "HSP" $mcrA "mcrA" >> final.tbl done +## saving all the genes that are considered candidates grep -v ' 0 ' final.tbl > candidates.txt +## Thank you :))) + From 56181f46eb1e4cae6024917a010ae841565531a8 Mon Sep 17 00:00:00 2001 From: bmarci99 <81930058+bmarci99@users.noreply.github.com> Date: Thu, 14 Oct 2021 21:02:47 -0400 Subject: [PATCH 4/5] Update Marton_Sarah_Madeline.sh --- Marton_Sarah_Madeline.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Marton_Sarah_Madeline.sh b/Marton_Sarah_Madeline.sh index dae8824..7073cef 100644 --- a/Marton_Sarah_Madeline.sh +++ b/Marton_Sarah_Madeline.sh @@ -1,3 +1,8 @@ +# contributors: + # Sarah Nicholls + # Madeline Laporte + # Marton Barta + ## Putting all our mcrAgenes and hsp70gene into their own files (with for loop) # mcrAgene From e6cf4bf92c892ac2d6bfe56f62fe0751b819fa4d Mon Sep 17 00:00:00 2001 From: bmarci99 <81930058+bmarci99@users.noreply.github.com> Date: Thu, 14 Oct 2021 21:04:02 -0400 Subject: [PATCH 5/5] Create README.md --- README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..581da7f --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Project from Sarah Nicholls (TA: Tim Burton), Madeline Laporte (TA: Tim Burton), Marton Barta (TA: Ceara J. Talbot)