diff --git a/bioinformaticsProject/ref_sequences/finalcandidates.txt b/bioinformaticsProject/ref_sequences/finalcandidates.txt new file mode 100644 index 0000000..784dad6 --- /dev/null +++ b/bioinformaticsProject/ref_sequences/finalcandidates.txt @@ -0,0 +1,15 @@ +Proteome 03 = 3 HSP, 1 MCRA +Proteome 05 = 2 HSP, 1 MCRA +Proteome 07 = 2 HSP, 1 MCRA +Proteome 15 = 1 HSP, 1 MCRA +Proteome 16 = 1 HSP, 1 MCRA +Proteome 19 = 1 HSP, 2 MCRA +Proteome 23 = 2 HSP, 2 MCRA +Proteome 24 = 2 HSP, 1 MCRA +Proteome 38 = 1 HSP, 1 MCRA +Proteome 39 = 1 HSP, 1 MCRA +Proteome 42 = 3 HSP, 1 MCRA +Proteome 44 = 1 HSP, 1 MCRA +Proteome 45 = 3 HSP, 1 MCRA +Proteome 48 = 1 HSP, 1 MCRA +Proteome 50 = 3 HSP, 1 MCRA diff --git a/bioinformaticsProject/ref_sequences/finalscript.sh b/bioinformaticsProject/ref_sequences/finalscript.sh new file mode 100644 index 0000000..faa1ba3 --- /dev/null +++ b/bioinformaticsProject/ref_sequences/finalscript.sh @@ -0,0 +1,34 @@ +#This script is for the final project of Biocomputing +#Usage: bash finalscript.sh + +#STEP 1a: combine all hsp files in ref_sequences into a total file called HSP_file.fasta +cat hsp*.fasta >> HSP_file.fasta +#STEP 1b: combine all mcrA files in ref_sequences into a total file called mcrA_file.fasta +cat mcrA*.fasta >> mcrA_file.fasta + +#STEP 2a: Search for HSP in each proteome +./muscle -in HSP_file.fasta -out HSP_Muscle.afasta +#STEP 2b: Search for mcrA in each proteome +./muscle -in mcrA_file.fasta -out MCRA_Muscle.afasta + +#STEP 3a: Build hmm profile +~/Private/bin/bin/hmmbuild HSP_hmm.txt HSP_Muscle.afasta +#STEP 3b: Build mcrA profile +~/Private/bin/bin/hmmbuild MCRA_hmm.txt MCRA_Muscle.afasta + +#STEP 4: Count matches for HSP and MCRA (against each proteome) +for number in proteome*.fasta +do +name=$(echo $number | cut -d_ -f 2| cut -d. -f 1) +~/Private/bin/bin/hmmsearch --tblout match${name}resultsHSP.search HSP_hmm.txt $number +~/Private/bin/bin/hmmsearch --tblout match${name}resultsMCRA.search MCRA_hmm.txt $number +done + +#STEP 5: Make table with proteome number in first column, number of HSP matches in second column, and number of MCRA matches in third column +for i in {01..50} +do +coltwo=$(cat match"$i"resultsHSP.search | grep -v "#" | wc -l) +colthree=$(cat match"$i"resultsMCRA.search | grep -v "#" | wc -l) +echo "Proteome $i $coltwo" "$colthree" +done +