From 2dfad88d29be89875d7e9ac48126a120de0c9004 Mon Sep 17 00:00:00 2001
From: Johanna1403 <johannaolesk99@gmail.com>
Date: Thu, 14 Oct 2021 14:26:54 -0400
Subject: [PATCH 1/7] First steps of project.

---
 bioinformaticsProject/Project1.sh | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 bioinformaticsProject/Project1.sh

diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh
new file mode 100644
index 0000000..8b5f39c
--- /dev/null
+++ b/bioinformaticsProject/Project1.sh
@@ -0,0 +1,31 @@
+## Identifying candidate pH-resistant methanogenic Archaea
+
+# The aim of this project is to 
+
+# USAGE: bash Project1.sh reference_sequence_path proteome_sequence_path
+
+# 1. concatenating the reference sequences:
+
+#cat $1 > geneA.fasta
+#cat $2 > geneB.fasta
+cat $1/mcrAgene_*.fasta > mcrAgene.fasta
+cat $1/hsp70gene_*.fasta > hsp70gene.fasta
+
+# 2. aligning the concatenated ref sequence files using muscle
+../muscle3.8.31_i86linux64 -in mcrAgene.fasta -out mcrAgene_aligned.fasta
+../muscle3.8.31_i86linux64 -in hsp70gene.fasta -out hsp70gene_aligned.fasta
+
+# 3. build HMM profile
+../hmmer/bin/hmmbuild mcrA_profile.hmm mcrAgene_aligned.fasta
+../hmmer/bin/hmmbuild hsp70_profile.hmm hsp70gene_aligned.fasta
+
+# 4. search for gene in each proteome
+for proteome in $2/proteome_*.fasta
+do
+../hmmer/bin/hmmsearch --tblout mcrA_search.txt mcrA_profile.hmm $proteome
+../hmmer/bin/hmmsearch --tblout hsp70_search.txt hsp70_profile.hmm $proteome
+mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l)
+hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l)
+# make a table with 3 columns (proteome name, mcrA match count, hsp70 match count)
+
+done

From aab9783d9bd5c4016459dff988401f9114e31baa Mon Sep 17 00:00:00 2001
From: Nihat Aliyev <naliyev@crcfe01.crc.nd.edu>
Date: Thu, 14 Oct 2021 17:26:39 -0400
Subject: [PATCH 2/7] results table

---
 bioinformaticsProject/Project1.sh           |  5 +-
 bioinformaticsProject/match_count_table.csv | 51 +++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)
 create mode 100644 bioinformaticsProject/match_count_table.csv

diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh
index 8b5f39c..1edd914 100644
--- a/bioinformaticsProject/Project1.sh
+++ b/bioinformaticsProject/Project1.sh
@@ -19,6 +19,8 @@ cat $1/hsp70gene_*.fasta > hsp70gene.fasta
 ../hmmer/bin/hmmbuild mcrA_profile.hmm mcrAgene_aligned.fasta
 ../hmmer/bin/hmmbuild hsp70_profile.hmm hsp70gene_aligned.fasta
 
+echo "proteome_ID,mcrA_gene,hsp70_gene" > match_count_table.csv
+
 # 4. search for gene in each proteome
 for proteome in $2/proteome_*.fasta
 do
@@ -27,5 +29,6 @@ do
 mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l)
 hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l)
 # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count)
-
+echo "$proteome,$mcrA_match,$hsp70_match" >> match_count_table.csv
 done
+
diff --git a/bioinformaticsProject/match_count_table.csv b/bioinformaticsProject/match_count_table.csv
new file mode 100644
index 0000000..7e57b95
--- /dev/null
+++ b/bioinformaticsProject/match_count_table.csv
@@ -0,0 +1,51 @@
+proteome_ID,mcrA_gene,hsp70_gene
+proteomes//proteome_01.fasta,0,4
+proteomes//proteome_02.fasta,0,2
+proteomes//proteome_03.fasta,1,3
+proteomes//proteome_04.fasta,0,4
+proteomes//proteome_05.fasta,1,2
+proteomes//proteome_06.fasta,0,0
+proteomes//proteome_07.fasta,1,2
+proteomes//proteome_08.fasta,0,5
+proteomes//proteome_09.fasta,0,1
+proteomes//proteome_10.fasta,0,3
+proteomes//proteome_11.fasta,0,6
+proteomes//proteome_12.fasta,0,6
+proteomes//proteome_13.fasta,0,3
+proteomes//proteome_14.fasta,0,2
+proteomes//proteome_15.fasta,1,1
+proteomes//proteome_16.fasta,1,1
+proteomes//proteome_17.fasta,0,4
+proteomes//proteome_18.fasta,0,8
+proteomes//proteome_19.fasta,2,1
+proteomes//proteome_20.fasta,0,3
+proteomes//proteome_21.fasta,0,5
+proteomes//proteome_22.fasta,0,9
+proteomes//proteome_23.fasta,2,2
+proteomes//proteome_24.fasta,1,2
+proteomes//proteome_25.fasta,0,5
+proteomes//proteome_26.fasta,0,1
+proteomes//proteome_27.fasta,0,1
+proteomes//proteome_28.fasta,0,1
+proteomes//proteome_29.fasta,1,0
+proteomes//proteome_30.fasta,0,1
+proteomes//proteome_31.fasta,0,7
+proteomes//proteome_32.fasta,0,4
+proteomes//proteome_33.fasta,0,0
+proteomes//proteome_34.fasta,0,2
+proteomes//proteome_35.fasta,0,1
+proteomes//proteome_36.fasta,0,3
+proteomes//proteome_37.fasta,0,1
+proteomes//proteome_38.fasta,1,1
+proteomes//proteome_39.fasta,1,1
+proteomes//proteome_40.fasta,0,2
+proteomes//proteome_41.fasta,0,1
+proteomes//proteome_42.fasta,1,3
+proteomes//proteome_43.fasta,0,3
+proteomes//proteome_44.fasta,1,1
+proteomes//proteome_45.fasta,1,3
+proteomes//proteome_46.fasta,0,2
+proteomes//proteome_47.fasta,0,1
+proteomes//proteome_48.fasta,1,1
+proteomes//proteome_49.fasta,0,3
+proteomes//proteome_50.fasta,1,3

From 2c7f217447c45bdf8ac3bc954d49c8649e4e38e6 Mon Sep 17 00:00:00 2001
From: Johanna1403 <johannaolesk99@gmail.com>
Date: Thu, 14 Oct 2021 19:58:46 -0400
Subject: [PATCH 3/7] Extracted proteomes of interest. Created a folder for the
 results. Added some small changes/comments.

---
 bioinformaticsProject/Project1.sh             | 29 ++++++++---
 .../Results/match_count_table.csv             | 51 +++++++++++++++++++
 .../Results/pHresistant_methanogens.txt       |  8 +++
 3 files changed, 82 insertions(+), 6 deletions(-)
 create mode 100644 bioinformaticsProject/Results/match_count_table.csv
 create mode 100644 bioinformaticsProject/Results/pHresistant_methanogens.txt

diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh
index 1edd914..020aa1b 100644
--- a/bioinformaticsProject/Project1.sh
+++ b/bioinformaticsProject/Project1.sh
@@ -1,13 +1,15 @@
 ## Identifying candidate pH-resistant methanogenic Archaea
 
-# The aim of this project is to 
+# The aim of this project is to identify which of the 50 isolated microbe proteomes belong to pH-resistant methanogens. This can be done by looking at the presence of the methyl-coenzyme M reductase (mcrA) gene catalyzing the last step of the methanogenesis, and the number of copies of HSP70 gene which is involved in protein biogenesis and refolding for stress resistance.
+
+# The inputs of the code are the reference sequences together with the proteome sequences.
+# The outputs of the code are a table showing a match count for mcrA gene and hsp70 gene for each proteome, and a text file with the names of the candidate pH-resistant methanogens based on the results.
 
 # USAGE: bash Project1.sh reference_sequence_path proteome_sequence_path
 
-# 1. concatenating the reference sequences:
+mkdir Results
 
-#cat $1 > geneA.fasta
-#cat $2 > geneB.fasta
+# 1. concatenating the reference sequences:
 cat $1/mcrAgene_*.fasta > mcrAgene.fasta
 cat $1/hsp70gene_*.fasta > hsp70gene.fasta
 
@@ -19,7 +21,7 @@ cat $1/hsp70gene_*.fasta > hsp70gene.fasta
 ../hmmer/bin/hmmbuild mcrA_profile.hmm mcrAgene_aligned.fasta
 ../hmmer/bin/hmmbuild hsp70_profile.hmm hsp70gene_aligned.fasta
 
-echo "proteome_ID,mcrA_gene,hsp70_gene" > match_count_table.csv
+echo "proteome_ID,mcrA_gene,hsp70_gene" > Results/match_count_table.csv
 
 # 4. search for gene in each proteome
 for proteome in $2/proteome_*.fasta
@@ -29,6 +31,21 @@ do
 mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l)
 hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l)
 # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count)
-echo "$proteome,$mcrA_match,$hsp70_match" >> match_count_table.csv
+echo "$proteome,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv
 done
 
+echo "These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more)." > Results/pHresistant_methanogens.txt
+
+# 5. choose the pH-resistant methanogens
+cat Results/match_count_table.csv | sort -t , -k 2 | awk -F , '$3>"1"' | awk -F , '$2=="1"' > chosen_methanogens.txt
+cut -d , -f 1 chosen_methanogens.txt >> Results/pHresistant_methanogens.txt
+
+rm mcrAgene.fasta
+rm hsp70gene.fasta
+rm mcrAgene_aligned.fasta
+rm hsp70gene_aligned.fasta
+rm mcrA_profile.hmm
+rm hsp70_profile.hmm
+rm mcrA_search.txt
+rm hsp70_search.txt
+rm chosen_methanogens.txt
diff --git a/bioinformaticsProject/Results/match_count_table.csv b/bioinformaticsProject/Results/match_count_table.csv
new file mode 100644
index 0000000..7e57b95
--- /dev/null
+++ b/bioinformaticsProject/Results/match_count_table.csv
@@ -0,0 +1,51 @@
+proteome_ID,mcrA_gene,hsp70_gene
+proteomes//proteome_01.fasta,0,4
+proteomes//proteome_02.fasta,0,2
+proteomes//proteome_03.fasta,1,3
+proteomes//proteome_04.fasta,0,4
+proteomes//proteome_05.fasta,1,2
+proteomes//proteome_06.fasta,0,0
+proteomes//proteome_07.fasta,1,2
+proteomes//proteome_08.fasta,0,5
+proteomes//proteome_09.fasta,0,1
+proteomes//proteome_10.fasta,0,3
+proteomes//proteome_11.fasta,0,6
+proteomes//proteome_12.fasta,0,6
+proteomes//proteome_13.fasta,0,3
+proteomes//proteome_14.fasta,0,2
+proteomes//proteome_15.fasta,1,1
+proteomes//proteome_16.fasta,1,1
+proteomes//proteome_17.fasta,0,4
+proteomes//proteome_18.fasta,0,8
+proteomes//proteome_19.fasta,2,1
+proteomes//proteome_20.fasta,0,3
+proteomes//proteome_21.fasta,0,5
+proteomes//proteome_22.fasta,0,9
+proteomes//proteome_23.fasta,2,2
+proteomes//proteome_24.fasta,1,2
+proteomes//proteome_25.fasta,0,5
+proteomes//proteome_26.fasta,0,1
+proteomes//proteome_27.fasta,0,1
+proteomes//proteome_28.fasta,0,1
+proteomes//proteome_29.fasta,1,0
+proteomes//proteome_30.fasta,0,1
+proteomes//proteome_31.fasta,0,7
+proteomes//proteome_32.fasta,0,4
+proteomes//proteome_33.fasta,0,0
+proteomes//proteome_34.fasta,0,2
+proteomes//proteome_35.fasta,0,1
+proteomes//proteome_36.fasta,0,3
+proteomes//proteome_37.fasta,0,1
+proteomes//proteome_38.fasta,1,1
+proteomes//proteome_39.fasta,1,1
+proteomes//proteome_40.fasta,0,2
+proteomes//proteome_41.fasta,0,1
+proteomes//proteome_42.fasta,1,3
+proteomes//proteome_43.fasta,0,3
+proteomes//proteome_44.fasta,1,1
+proteomes//proteome_45.fasta,1,3
+proteomes//proteome_46.fasta,0,2
+proteomes//proteome_47.fasta,0,1
+proteomes//proteome_48.fasta,1,1
+proteomes//proteome_49.fasta,0,3
+proteomes//proteome_50.fasta,1,3
diff --git a/bioinformaticsProject/Results/pHresistant_methanogens.txt b/bioinformaticsProject/Results/pHresistant_methanogens.txt
new file mode 100644
index 0000000..9526170
--- /dev/null
+++ b/bioinformaticsProject/Results/pHresistant_methanogens.txt
@@ -0,0 +1,8 @@
+These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more).
+proteomes//proteome_05.fasta
+proteomes//proteome_07.fasta
+proteomes//proteome_24.fasta
+proteomes//proteome_03.fasta
+proteomes//proteome_42.fasta
+proteomes//proteome_45.fasta
+proteomes//proteome_50.fasta

From 5f0c38ada6e7862fad6207c1a281178e00e3705e Mon Sep 17 00:00:00 2001
From: Johanna1403 <johannaolesk99@gmail.com>
Date: Fri, 15 Oct 2021 13:18:24 -0400
Subject: [PATCH 4/7] Final changes: added code to include a proteome name
 instead of the whole path in the table and text file.

---
 bioinformaticsProject/Project1.sh             |   4 +-
 .../Results/match_count_table.csv             | 100 +++++++++---------
 .../Results/pHresistant_methanogens.txt       |  14 +--
 3 files changed, 60 insertions(+), 58 deletions(-)

diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh
index 020aa1b..7945a9d 100644
--- a/bioinformaticsProject/Project1.sh
+++ b/bioinformaticsProject/Project1.sh
@@ -1,4 +1,5 @@
 ## Identifying candidate pH-resistant methanogenic Archaea
+# Project by Johanna Olesk and Nihat Aliyev
 
 # The aim of this project is to identify which of the 50 isolated microbe proteomes belong to pH-resistant methanogens. This can be done by looking at the presence of the methyl-coenzyme M reductase (mcrA) gene catalyzing the last step of the methanogenesis, and the number of copies of HSP70 gene which is involved in protein biogenesis and refolding for stress resistance.
 
@@ -31,7 +32,8 @@ do
 mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l)
 hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l)
 # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count)
-echo "$proteome,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv
+proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/')
+echo "$proteomeID,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv
 done
 
 echo "These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more)." > Results/pHresistant_methanogens.txt
diff --git a/bioinformaticsProject/Results/match_count_table.csv b/bioinformaticsProject/Results/match_count_table.csv
index 7e57b95..2cd51ca 100644
--- a/bioinformaticsProject/Results/match_count_table.csv
+++ b/bioinformaticsProject/Results/match_count_table.csv
@@ -1,51 +1,51 @@
 proteome_ID,mcrA_gene,hsp70_gene
-proteomes//proteome_01.fasta,0,4
-proteomes//proteome_02.fasta,0,2
-proteomes//proteome_03.fasta,1,3
-proteomes//proteome_04.fasta,0,4
-proteomes//proteome_05.fasta,1,2
-proteomes//proteome_06.fasta,0,0
-proteomes//proteome_07.fasta,1,2
-proteomes//proteome_08.fasta,0,5
-proteomes//proteome_09.fasta,0,1
-proteomes//proteome_10.fasta,0,3
-proteomes//proteome_11.fasta,0,6
-proteomes//proteome_12.fasta,0,6
-proteomes//proteome_13.fasta,0,3
-proteomes//proteome_14.fasta,0,2
-proteomes//proteome_15.fasta,1,1
-proteomes//proteome_16.fasta,1,1
-proteomes//proteome_17.fasta,0,4
-proteomes//proteome_18.fasta,0,8
-proteomes//proteome_19.fasta,2,1
-proteomes//proteome_20.fasta,0,3
-proteomes//proteome_21.fasta,0,5
-proteomes//proteome_22.fasta,0,9
-proteomes//proteome_23.fasta,2,2
-proteomes//proteome_24.fasta,1,2
-proteomes//proteome_25.fasta,0,5
-proteomes//proteome_26.fasta,0,1
-proteomes//proteome_27.fasta,0,1
-proteomes//proteome_28.fasta,0,1
-proteomes//proteome_29.fasta,1,0
-proteomes//proteome_30.fasta,0,1
-proteomes//proteome_31.fasta,0,7
-proteomes//proteome_32.fasta,0,4
-proteomes//proteome_33.fasta,0,0
-proteomes//proteome_34.fasta,0,2
-proteomes//proteome_35.fasta,0,1
-proteomes//proteome_36.fasta,0,3
-proteomes//proteome_37.fasta,0,1
-proteomes//proteome_38.fasta,1,1
-proteomes//proteome_39.fasta,1,1
-proteomes//proteome_40.fasta,0,2
-proteomes//proteome_41.fasta,0,1
-proteomes//proteome_42.fasta,1,3
-proteomes//proteome_43.fasta,0,3
-proteomes//proteome_44.fasta,1,1
-proteomes//proteome_45.fasta,1,3
-proteomes//proteome_46.fasta,0,2
-proteomes//proteome_47.fasta,0,1
-proteomes//proteome_48.fasta,1,1
-proteomes//proteome_49.fasta,0,3
-proteomes//proteome_50.fasta,1,3
+proteome_01,0,4
+proteome_02,0,2
+proteome_03,1,3
+proteome_04,0,4
+proteome_05,1,2
+proteome_06,0,0
+proteome_07,1,2
+proteome_08,0,5
+proteome_09,0,1
+proteome_10,0,3
+proteome_11,0,6
+proteome_12,0,6
+proteome_13,0,3
+proteome_14,0,2
+proteome_15,1,1
+proteome_16,1,1
+proteome_17,0,4
+proteome_18,0,8
+proteome_19,2,1
+proteome_20,0,3
+proteome_21,0,5
+proteome_22,0,9
+proteome_23,2,2
+proteome_24,1,2
+proteome_25,0,5
+proteome_26,0,1
+proteome_27,0,1
+proteome_28,0,1
+proteome_29,1,0
+proteome_30,0,1
+proteome_31,0,7
+proteome_32,0,4
+proteome_33,0,0
+proteome_34,0,2
+proteome_35,0,1
+proteome_36,0,3
+proteome_37,0,1
+proteome_38,1,1
+proteome_39,1,1
+proteome_40,0,2
+proteome_41,0,1
+proteome_42,1,3
+proteome_43,0,3
+proteome_44,1,1
+proteome_45,1,3
+proteome_46,0,2
+proteome_47,0,1
+proteome_48,1,1
+proteome_49,0,3
+proteome_50,1,3
diff --git a/bioinformaticsProject/Results/pHresistant_methanogens.txt b/bioinformaticsProject/Results/pHresistant_methanogens.txt
index 9526170..b155ce1 100644
--- a/bioinformaticsProject/Results/pHresistant_methanogens.txt
+++ b/bioinformaticsProject/Results/pHresistant_methanogens.txt
@@ -1,8 +1,8 @@
 These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more).
-proteomes//proteome_05.fasta
-proteomes//proteome_07.fasta
-proteomes//proteome_24.fasta
-proteomes//proteome_03.fasta
-proteomes//proteome_42.fasta
-proteomes//proteome_45.fasta
-proteomes//proteome_50.fasta
+proteome_05
+proteome_07
+proteome_24
+proteome_03
+proteome_42
+proteome_45
+proteome_50

From 3202e5f0908ec3f67400469d870b79dd968ec7c9 Mon Sep 17 00:00:00 2001
From: Johanna1403 <johannaolesk99@gmail.com>
Date: Fri, 15 Oct 2021 13:28:53 -0400
Subject: [PATCH 5/7] Added some comments.

---
 bioinformaticsProject/Project1.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh
index 7945a9d..034a0cb 100644
--- a/bioinformaticsProject/Project1.sh
+++ b/bioinformaticsProject/Project1.sh
@@ -22,7 +22,7 @@ cat $1/hsp70gene_*.fasta > hsp70gene.fasta
 ../hmmer/bin/hmmbuild mcrA_profile.hmm mcrAgene_aligned.fasta
 ../hmmer/bin/hmmbuild hsp70_profile.hmm hsp70gene_aligned.fasta
 
-echo "proteome_ID,mcrA_gene,hsp70_gene" > Results/match_count_table.csv
+echo "proteome_ID,mcrA_gene,hsp70_gene" > Results/match_count_table.csv # create an empty table with column names
 
 # 4. search for gene in each proteome
 for proteome in $2/proteome_*.fasta
@@ -32,14 +32,14 @@ do
 mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l)
 hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l)
 # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count)
-proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/')
+proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/') # extract the ptoteome ID only, instead of the whole file path
 echo "$proteomeID,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv
 done
 
-echo "These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more)." > Results/pHresistant_methanogens.txt
+echo "These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more)." > Results/pHresistant_methanogens.txt # make an empty text file with an introductory sentence
 
 # 5. choose the pH-resistant methanogens
-cat Results/match_count_table.csv | sort -t , -k 2 | awk -F , '$3>"1"' | awk -F , '$2=="1"' > chosen_methanogens.txt
+cat Results/match_count_table.csv | awk -F , '$3>"1"' | awk -F , '$2=="1"' > chosen_methanogens.txt # chooses the proteomes that have mcrA gene and 2 or more copies of hsp70 gene
 cut -d , -f 1 chosen_methanogens.txt >> Results/pHresistant_methanogens.txt
 
 rm mcrAgene.fasta

From 11fc8fed2cebd29765b4abdd41888e57c06a0c50 Mon Sep 17 00:00:00 2001
From: Johanna1403 <johannaolesk99@gmail.com>
Date: Fri, 15 Oct 2021 13:37:47 -0400
Subject: [PATCH 6/7] Added an NB! message: If a / (slash) is automatically
 added after the reference_sequence_path and proteome_sequence_path when using
 tab autocomplete, be sure to remove it before running the script, otherwise
 it will result in a double // (slash) in between the directory and the file
 name, which will result an error in the script.

---
 bioinformaticsProject/Project1.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh
index 034a0cb..431f9eb 100644
--- a/bioinformaticsProject/Project1.sh
+++ b/bioinformaticsProject/Project1.sh
@@ -7,6 +7,7 @@
 # The outputs of the code are a table showing a match count for mcrA gene and hsp70 gene for each proteome, and a text file with the names of the candidate pH-resistant methanogens based on the results.
 
 # USAGE: bash Project1.sh reference_sequence_path proteome_sequence_path
+# NB! be sure that after entering the reference_sequence_path and proteome_sequence_path there is no / (slash) automatically added at the end, otherwise it will result in double // (slash) in between the directory and the file, resulting in an error
 
 mkdir Results
 
@@ -32,7 +33,7 @@ do
 mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l)
 hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l)
 # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count)
-proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/') # extract the ptoteome ID only, instead of the whole file path
+proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/') # extract the proteome ID only, instead of the whole file path
 echo "$proteomeID,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv
 done
 

From 74d740abd689317c59695ac3d079cced3d3194ad Mon Sep 17 00:00:00 2001
From: Johanna1403 <69310080+Johanna1403@users.noreply.github.com>
Date: Fri, 15 Oct 2021 13:42:52 -0400
Subject: [PATCH 7/7] Deleted the old match_count_table.csv

---
 bioinformaticsProject/match_count_table.csv | 51 ---------------------
 1 file changed, 51 deletions(-)
 delete mode 100644 bioinformaticsProject/match_count_table.csv

diff --git a/bioinformaticsProject/match_count_table.csv b/bioinformaticsProject/match_count_table.csv
deleted file mode 100644
index 7e57b95..0000000
--- a/bioinformaticsProject/match_count_table.csv
+++ /dev/null
@@ -1,51 +0,0 @@
-proteome_ID,mcrA_gene,hsp70_gene
-proteomes//proteome_01.fasta,0,4
-proteomes//proteome_02.fasta,0,2
-proteomes//proteome_03.fasta,1,3
-proteomes//proteome_04.fasta,0,4
-proteomes//proteome_05.fasta,1,2
-proteomes//proteome_06.fasta,0,0
-proteomes//proteome_07.fasta,1,2
-proteomes//proteome_08.fasta,0,5
-proteomes//proteome_09.fasta,0,1
-proteomes//proteome_10.fasta,0,3
-proteomes//proteome_11.fasta,0,6
-proteomes//proteome_12.fasta,0,6
-proteomes//proteome_13.fasta,0,3
-proteomes//proteome_14.fasta,0,2
-proteomes//proteome_15.fasta,1,1
-proteomes//proteome_16.fasta,1,1
-proteomes//proteome_17.fasta,0,4
-proteomes//proteome_18.fasta,0,8
-proteomes//proteome_19.fasta,2,1
-proteomes//proteome_20.fasta,0,3
-proteomes//proteome_21.fasta,0,5
-proteomes//proteome_22.fasta,0,9
-proteomes//proteome_23.fasta,2,2
-proteomes//proteome_24.fasta,1,2
-proteomes//proteome_25.fasta,0,5
-proteomes//proteome_26.fasta,0,1
-proteomes//proteome_27.fasta,0,1
-proteomes//proteome_28.fasta,0,1
-proteomes//proteome_29.fasta,1,0
-proteomes//proteome_30.fasta,0,1
-proteomes//proteome_31.fasta,0,7
-proteomes//proteome_32.fasta,0,4
-proteomes//proteome_33.fasta,0,0
-proteomes//proteome_34.fasta,0,2
-proteomes//proteome_35.fasta,0,1
-proteomes//proteome_36.fasta,0,3
-proteomes//proteome_37.fasta,0,1
-proteomes//proteome_38.fasta,1,1
-proteomes//proteome_39.fasta,1,1
-proteomes//proteome_40.fasta,0,2
-proteomes//proteome_41.fasta,0,1
-proteomes//proteome_42.fasta,1,3
-proteomes//proteome_43.fasta,0,3
-proteomes//proteome_44.fasta,1,1
-proteomes//proteome_45.fasta,1,3
-proteomes//proteome_46.fasta,0,2
-proteomes//proteome_47.fasta,0,1
-proteomes//proteome_48.fasta,1,1
-proteomes//proteome_49.fasta,0,3
-proteomes//proteome_50.fasta,1,3