From d331db1734e0ec5e20388d27e53e8dfecafa9a61 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Thu, 14 Oct 2021 08:49:06 -0400 Subject: [PATCH 1/2] This is the script for our final project (Emily Chen and Sanjna Chetan). --- .../ref_sequences/finalscript.sh | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 bioinformaticsProject/ref_sequences/finalscript.sh diff --git a/bioinformaticsProject/ref_sequences/finalscript.sh b/bioinformaticsProject/ref_sequences/finalscript.sh new file mode 100644 index 0000000..faa1ba3 --- /dev/null +++ b/bioinformaticsProject/ref_sequences/finalscript.sh @@ -0,0 +1,34 @@ +#This script is for the final project of Biocomputing +#Usage: bash finalscript.sh + +#STEP 1a: combine all hsp files in ref_sequences into a total file called HSP_file.fasta +cat hsp*.fasta >> HSP_file.fasta +#STEP 1b: combine all mcrA files in ref_sequences into a total file called mcrA_file.fasta +cat mcrA*.fasta >> mcrA_file.fasta + +#STEP 2a: Search for HSP in each proteome +./muscle -in HSP_file.fasta -out HSP_Muscle.afasta +#STEP 2b: Search for mcrA in each proteome +./muscle -in mcrA_file.fasta -out MCRA_Muscle.afasta + +#STEP 3a: Build hmm profile +~/Private/bin/bin/hmmbuild HSP_hmm.txt HSP_Muscle.afasta +#STEP 3b: Build mcrA profile +~/Private/bin/bin/hmmbuild MCRA_hmm.txt MCRA_Muscle.afasta + +#STEP 4: Count matches for HSP and MCRA (against each proteome) +for number in proteome*.fasta +do +name=$(echo $number | cut -d_ -f 2| cut -d. -f 1) +~/Private/bin/bin/hmmsearch --tblout match${name}resultsHSP.search HSP_hmm.txt $number +~/Private/bin/bin/hmmsearch --tblout match${name}resultsMCRA.search MCRA_hmm.txt $number +done + +#STEP 5: Make table with proteome number in first column, number of HSP matches in second column, and number of MCRA matches in third column +for i in {01..50} +do +coltwo=$(cat match"$i"resultsHSP.search | grep -v "#" | wc -l) +colthree=$(cat match"$i"resultsMCRA.search | grep -v "#" | wc -l) +echo "Proteome $i $coltwo" "$colthree" +done + From 9d798f88996812e22e5b7b0a81112b48395e54c2 Mon Sep 17 00:00:00 2001 From: Emily Chen Date: Thu, 14 Oct 2021 08:59:11 -0400 Subject: [PATCH 2/2] This is the text file of final candidates that are pH resistant methanogens. --- .../ref_sequences/finalcandidates.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 bioinformaticsProject/ref_sequences/finalcandidates.txt diff --git a/bioinformaticsProject/ref_sequences/finalcandidates.txt b/bioinformaticsProject/ref_sequences/finalcandidates.txt new file mode 100644 index 0000000..784dad6 --- /dev/null +++ b/bioinformaticsProject/ref_sequences/finalcandidates.txt @@ -0,0 +1,15 @@ +Proteome 03 = 3 HSP, 1 MCRA +Proteome 05 = 2 HSP, 1 MCRA +Proteome 07 = 2 HSP, 1 MCRA +Proteome 15 = 1 HSP, 1 MCRA +Proteome 16 = 1 HSP, 1 MCRA +Proteome 19 = 1 HSP, 2 MCRA +Proteome 23 = 2 HSP, 2 MCRA +Proteome 24 = 2 HSP, 1 MCRA +Proteome 38 = 1 HSP, 1 MCRA +Proteome 39 = 1 HSP, 1 MCRA +Proteome 42 = 3 HSP, 1 MCRA +Proteome 44 = 1 HSP, 1 MCRA +Proteome 45 = 3 HSP, 1 MCRA +Proteome 48 = 1 HSP, 1 MCRA +Proteome 50 = 3 HSP, 1 MCRA