From 2dfad88d29be89875d7e9ac48126a120de0c9004 Mon Sep 17 00:00:00 2001 From: Johanna1403 Date: Thu, 14 Oct 2021 14:26:54 -0400 Subject: [PATCH 1/7] First steps of project. --- bioinformaticsProject/Project1.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 bioinformaticsProject/Project1.sh diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh new file mode 100644 index 0000000..8b5f39c --- /dev/null +++ b/bioinformaticsProject/Project1.sh @@ -0,0 +1,31 @@ +## Identifying candidate pH-resistant methanogenic Archaea + +# The aim of this project is to + +# USAGE: bash Project1.sh reference_sequence_path proteome_sequence_path + +# 1. concatenating the reference sequences: + +#cat $1 > geneA.fasta +#cat $2 > geneB.fasta +cat $1/mcrAgene_*.fasta > mcrAgene.fasta +cat $1/hsp70gene_*.fasta > hsp70gene.fasta + +# 2. aligning the concatenated ref sequence files using muscle +../muscle3.8.31_i86linux64 -in mcrAgene.fasta -out mcrAgene_aligned.fasta +../muscle3.8.31_i86linux64 -in hsp70gene.fasta -out hsp70gene_aligned.fasta + +# 3. build HMM profile +../hmmer/bin/hmmbuild mcrA_profile.hmm mcrAgene_aligned.fasta +../hmmer/bin/hmmbuild hsp70_profile.hmm hsp70gene_aligned.fasta + +# 4. search for gene in each proteome +for proteome in $2/proteome_*.fasta +do +../hmmer/bin/hmmsearch --tblout mcrA_search.txt mcrA_profile.hmm $proteome +../hmmer/bin/hmmsearch --tblout hsp70_search.txt hsp70_profile.hmm $proteome +mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l) +hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l) +# make a table with 3 columns (proteome name, mcrA match count, hsp70 match count) + +done From aab9783d9bd5c4016459dff988401f9114e31baa Mon Sep 17 00:00:00 2001 From: Nihat Aliyev Date: Thu, 14 Oct 2021 17:26:39 -0400 Subject: [PATCH 2/7] results table --- bioinformaticsProject/Project1.sh | 5 +- bioinformaticsProject/match_count_table.csv | 51 +++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 bioinformaticsProject/match_count_table.csv diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh index 8b5f39c..1edd914 100644 --- a/bioinformaticsProject/Project1.sh +++ b/bioinformaticsProject/Project1.sh @@ -19,6 +19,8 @@ cat $1/hsp70gene_*.fasta > hsp70gene.fasta ../hmmer/bin/hmmbuild mcrA_profile.hmm mcrAgene_aligned.fasta ../hmmer/bin/hmmbuild hsp70_profile.hmm hsp70gene_aligned.fasta +echo "proteome_ID,mcrA_gene,hsp70_gene" > match_count_table.csv + # 4. search for gene in each proteome for proteome in $2/proteome_*.fasta do @@ -27,5 +29,6 @@ do mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l) hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l) # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count) - +echo "$proteome,$mcrA_match,$hsp70_match" >> match_count_table.csv done + diff --git a/bioinformaticsProject/match_count_table.csv b/bioinformaticsProject/match_count_table.csv new file mode 100644 index 0000000..7e57b95 --- /dev/null +++ b/bioinformaticsProject/match_count_table.csv @@ -0,0 +1,51 @@ +proteome_ID,mcrA_gene,hsp70_gene +proteomes//proteome_01.fasta,0,4 +proteomes//proteome_02.fasta,0,2 +proteomes//proteome_03.fasta,1,3 +proteomes//proteome_04.fasta,0,4 +proteomes//proteome_05.fasta,1,2 +proteomes//proteome_06.fasta,0,0 +proteomes//proteome_07.fasta,1,2 +proteomes//proteome_08.fasta,0,5 +proteomes//proteome_09.fasta,0,1 +proteomes//proteome_10.fasta,0,3 +proteomes//proteome_11.fasta,0,6 +proteomes//proteome_12.fasta,0,6 +proteomes//proteome_13.fasta,0,3 +proteomes//proteome_14.fasta,0,2 +proteomes//proteome_15.fasta,1,1 +proteomes//proteome_16.fasta,1,1 +proteomes//proteome_17.fasta,0,4 +proteomes//proteome_18.fasta,0,8 +proteomes//proteome_19.fasta,2,1 +proteomes//proteome_20.fasta,0,3 +proteomes//proteome_21.fasta,0,5 +proteomes//proteome_22.fasta,0,9 +proteomes//proteome_23.fasta,2,2 +proteomes//proteome_24.fasta,1,2 +proteomes//proteome_25.fasta,0,5 +proteomes//proteome_26.fasta,0,1 +proteomes//proteome_27.fasta,0,1 +proteomes//proteome_28.fasta,0,1 +proteomes//proteome_29.fasta,1,0 +proteomes//proteome_30.fasta,0,1 +proteomes//proteome_31.fasta,0,7 +proteomes//proteome_32.fasta,0,4 +proteomes//proteome_33.fasta,0,0 +proteomes//proteome_34.fasta,0,2 +proteomes//proteome_35.fasta,0,1 +proteomes//proteome_36.fasta,0,3 +proteomes//proteome_37.fasta,0,1 +proteomes//proteome_38.fasta,1,1 +proteomes//proteome_39.fasta,1,1 +proteomes//proteome_40.fasta,0,2 +proteomes//proteome_41.fasta,0,1 +proteomes//proteome_42.fasta,1,3 +proteomes//proteome_43.fasta,0,3 +proteomes//proteome_44.fasta,1,1 +proteomes//proteome_45.fasta,1,3 +proteomes//proteome_46.fasta,0,2 +proteomes//proteome_47.fasta,0,1 +proteomes//proteome_48.fasta,1,1 +proteomes//proteome_49.fasta,0,3 +proteomes//proteome_50.fasta,1,3 From 2c7f217447c45bdf8ac3bc954d49c8649e4e38e6 Mon Sep 17 00:00:00 2001 From: Johanna1403 Date: Thu, 14 Oct 2021 19:58:46 -0400 Subject: [PATCH 3/7] Extracted proteomes of interest. Created a folder for the results. Added some small changes/comments. --- bioinformaticsProject/Project1.sh | 29 ++++++++--- .../Results/match_count_table.csv | 51 +++++++++++++++++++ .../Results/pHresistant_methanogens.txt | 8 +++ 3 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 bioinformaticsProject/Results/match_count_table.csv create mode 100644 bioinformaticsProject/Results/pHresistant_methanogens.txt diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh index 1edd914..020aa1b 100644 --- a/bioinformaticsProject/Project1.sh +++ b/bioinformaticsProject/Project1.sh @@ -1,13 +1,15 @@ ## Identifying candidate pH-resistant methanogenic Archaea -# The aim of this project is to +# The aim of this project is to identify which of the 50 isolated microbe proteomes belong to pH-resistant methanogens. This can be done by looking at the presence of the methyl-coenzyme M reductase (mcrA) gene catalyzing the last step of the methanogenesis, and the number of copies of HSP70 gene which is involved in protein biogenesis and refolding for stress resistance. + +# The inputs of the code are the reference sequences together with the proteome sequences. +# The outputs of the code are a table showing a match count for mcrA gene and hsp70 gene for each proteome, and a text file with the names of the candidate pH-resistant methanogens based on the results. # USAGE: bash Project1.sh reference_sequence_path proteome_sequence_path -# 1. concatenating the reference sequences: +mkdir Results -#cat $1 > geneA.fasta -#cat $2 > geneB.fasta +# 1. concatenating the reference sequences: cat $1/mcrAgene_*.fasta > mcrAgene.fasta cat $1/hsp70gene_*.fasta > hsp70gene.fasta @@ -19,7 +21,7 @@ cat $1/hsp70gene_*.fasta > hsp70gene.fasta ../hmmer/bin/hmmbuild mcrA_profile.hmm mcrAgene_aligned.fasta ../hmmer/bin/hmmbuild hsp70_profile.hmm hsp70gene_aligned.fasta -echo "proteome_ID,mcrA_gene,hsp70_gene" > match_count_table.csv +echo "proteome_ID,mcrA_gene,hsp70_gene" > Results/match_count_table.csv # 4. search for gene in each proteome for proteome in $2/proteome_*.fasta @@ -29,6 +31,21 @@ do mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l) hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l) # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count) -echo "$proteome,$mcrA_match,$hsp70_match" >> match_count_table.csv +echo "$proteome,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv done +echo "These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more)." > Results/pHresistant_methanogens.txt + +# 5. choose the pH-resistant methanogens +cat Results/match_count_table.csv | sort -t , -k 2 | awk -F , '$3>"1"' | awk -F , '$2=="1"' > chosen_methanogens.txt +cut -d , -f 1 chosen_methanogens.txt >> Results/pHresistant_methanogens.txt + +rm mcrAgene.fasta +rm hsp70gene.fasta +rm mcrAgene_aligned.fasta +rm hsp70gene_aligned.fasta +rm mcrA_profile.hmm +rm hsp70_profile.hmm +rm mcrA_search.txt +rm hsp70_search.txt +rm chosen_methanogens.txt diff --git a/bioinformaticsProject/Results/match_count_table.csv b/bioinformaticsProject/Results/match_count_table.csv new file mode 100644 index 0000000..7e57b95 --- /dev/null +++ b/bioinformaticsProject/Results/match_count_table.csv @@ -0,0 +1,51 @@ +proteome_ID,mcrA_gene,hsp70_gene +proteomes//proteome_01.fasta,0,4 +proteomes//proteome_02.fasta,0,2 +proteomes//proteome_03.fasta,1,3 +proteomes//proteome_04.fasta,0,4 +proteomes//proteome_05.fasta,1,2 +proteomes//proteome_06.fasta,0,0 +proteomes//proteome_07.fasta,1,2 +proteomes//proteome_08.fasta,0,5 +proteomes//proteome_09.fasta,0,1 +proteomes//proteome_10.fasta,0,3 +proteomes//proteome_11.fasta,0,6 +proteomes//proteome_12.fasta,0,6 +proteomes//proteome_13.fasta,0,3 +proteomes//proteome_14.fasta,0,2 +proteomes//proteome_15.fasta,1,1 +proteomes//proteome_16.fasta,1,1 +proteomes//proteome_17.fasta,0,4 +proteomes//proteome_18.fasta,0,8 +proteomes//proteome_19.fasta,2,1 +proteomes//proteome_20.fasta,0,3 +proteomes//proteome_21.fasta,0,5 +proteomes//proteome_22.fasta,0,9 +proteomes//proteome_23.fasta,2,2 +proteomes//proteome_24.fasta,1,2 +proteomes//proteome_25.fasta,0,5 +proteomes//proteome_26.fasta,0,1 +proteomes//proteome_27.fasta,0,1 +proteomes//proteome_28.fasta,0,1 +proteomes//proteome_29.fasta,1,0 +proteomes//proteome_30.fasta,0,1 +proteomes//proteome_31.fasta,0,7 +proteomes//proteome_32.fasta,0,4 +proteomes//proteome_33.fasta,0,0 +proteomes//proteome_34.fasta,0,2 +proteomes//proteome_35.fasta,0,1 +proteomes//proteome_36.fasta,0,3 +proteomes//proteome_37.fasta,0,1 +proteomes//proteome_38.fasta,1,1 +proteomes//proteome_39.fasta,1,1 +proteomes//proteome_40.fasta,0,2 +proteomes//proteome_41.fasta,0,1 +proteomes//proteome_42.fasta,1,3 +proteomes//proteome_43.fasta,0,3 +proteomes//proteome_44.fasta,1,1 +proteomes//proteome_45.fasta,1,3 +proteomes//proteome_46.fasta,0,2 +proteomes//proteome_47.fasta,0,1 +proteomes//proteome_48.fasta,1,1 +proteomes//proteome_49.fasta,0,3 +proteomes//proteome_50.fasta,1,3 diff --git a/bioinformaticsProject/Results/pHresistant_methanogens.txt b/bioinformaticsProject/Results/pHresistant_methanogens.txt new file mode 100644 index 0000000..9526170 --- /dev/null +++ b/bioinformaticsProject/Results/pHresistant_methanogens.txt @@ -0,0 +1,8 @@ +These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more). +proteomes//proteome_05.fasta +proteomes//proteome_07.fasta +proteomes//proteome_24.fasta +proteomes//proteome_03.fasta +proteomes//proteome_42.fasta +proteomes//proteome_45.fasta +proteomes//proteome_50.fasta From 5f0c38ada6e7862fad6207c1a281178e00e3705e Mon Sep 17 00:00:00 2001 From: Johanna1403 Date: Fri, 15 Oct 2021 13:18:24 -0400 Subject: [PATCH 4/7] Final changes: added code to include a proteome name instead of the whole path in the table and text file. --- bioinformaticsProject/Project1.sh | 4 +- .../Results/match_count_table.csv | 100 +++++++++--------- .../Results/pHresistant_methanogens.txt | 14 +-- 3 files changed, 60 insertions(+), 58 deletions(-) diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh index 020aa1b..7945a9d 100644 --- a/bioinformaticsProject/Project1.sh +++ b/bioinformaticsProject/Project1.sh @@ -1,4 +1,5 @@ ## Identifying candidate pH-resistant methanogenic Archaea +# Project by Johanna Olesk and Nihat Aliyev # The aim of this project is to identify which of the 50 isolated microbe proteomes belong to pH-resistant methanogens. This can be done by looking at the presence of the methyl-coenzyme M reductase (mcrA) gene catalyzing the last step of the methanogenesis, and the number of copies of HSP70 gene which is involved in protein biogenesis and refolding for stress resistance. @@ -31,7 +32,8 @@ do mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l) hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l) # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count) -echo "$proteome,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv +proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/') +echo "$proteomeID,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv done echo "These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more)." > Results/pHresistant_methanogens.txt diff --git a/bioinformaticsProject/Results/match_count_table.csv b/bioinformaticsProject/Results/match_count_table.csv index 7e57b95..2cd51ca 100644 --- a/bioinformaticsProject/Results/match_count_table.csv +++ b/bioinformaticsProject/Results/match_count_table.csv @@ -1,51 +1,51 @@ proteome_ID,mcrA_gene,hsp70_gene -proteomes//proteome_01.fasta,0,4 -proteomes//proteome_02.fasta,0,2 -proteomes//proteome_03.fasta,1,3 -proteomes//proteome_04.fasta,0,4 -proteomes//proteome_05.fasta,1,2 -proteomes//proteome_06.fasta,0,0 -proteomes//proteome_07.fasta,1,2 -proteomes//proteome_08.fasta,0,5 -proteomes//proteome_09.fasta,0,1 -proteomes//proteome_10.fasta,0,3 -proteomes//proteome_11.fasta,0,6 -proteomes//proteome_12.fasta,0,6 -proteomes//proteome_13.fasta,0,3 -proteomes//proteome_14.fasta,0,2 -proteomes//proteome_15.fasta,1,1 -proteomes//proteome_16.fasta,1,1 -proteomes//proteome_17.fasta,0,4 -proteomes//proteome_18.fasta,0,8 -proteomes//proteome_19.fasta,2,1 -proteomes//proteome_20.fasta,0,3 -proteomes//proteome_21.fasta,0,5 -proteomes//proteome_22.fasta,0,9 -proteomes//proteome_23.fasta,2,2 -proteomes//proteome_24.fasta,1,2 -proteomes//proteome_25.fasta,0,5 -proteomes//proteome_26.fasta,0,1 -proteomes//proteome_27.fasta,0,1 -proteomes//proteome_28.fasta,0,1 -proteomes//proteome_29.fasta,1,0 -proteomes//proteome_30.fasta,0,1 -proteomes//proteome_31.fasta,0,7 -proteomes//proteome_32.fasta,0,4 -proteomes//proteome_33.fasta,0,0 -proteomes//proteome_34.fasta,0,2 -proteomes//proteome_35.fasta,0,1 -proteomes//proteome_36.fasta,0,3 -proteomes//proteome_37.fasta,0,1 -proteomes//proteome_38.fasta,1,1 -proteomes//proteome_39.fasta,1,1 -proteomes//proteome_40.fasta,0,2 -proteomes//proteome_41.fasta,0,1 -proteomes//proteome_42.fasta,1,3 -proteomes//proteome_43.fasta,0,3 -proteomes//proteome_44.fasta,1,1 -proteomes//proteome_45.fasta,1,3 -proteomes//proteome_46.fasta,0,2 -proteomes//proteome_47.fasta,0,1 -proteomes//proteome_48.fasta,1,1 -proteomes//proteome_49.fasta,0,3 -proteomes//proteome_50.fasta,1,3 +proteome_01,0,4 +proteome_02,0,2 +proteome_03,1,3 +proteome_04,0,4 +proteome_05,1,2 +proteome_06,0,0 +proteome_07,1,2 +proteome_08,0,5 +proteome_09,0,1 +proteome_10,0,3 +proteome_11,0,6 +proteome_12,0,6 +proteome_13,0,3 +proteome_14,0,2 +proteome_15,1,1 +proteome_16,1,1 +proteome_17,0,4 +proteome_18,0,8 +proteome_19,2,1 +proteome_20,0,3 +proteome_21,0,5 +proteome_22,0,9 +proteome_23,2,2 +proteome_24,1,2 +proteome_25,0,5 +proteome_26,0,1 +proteome_27,0,1 +proteome_28,0,1 +proteome_29,1,0 +proteome_30,0,1 +proteome_31,0,7 +proteome_32,0,4 +proteome_33,0,0 +proteome_34,0,2 +proteome_35,0,1 +proteome_36,0,3 +proteome_37,0,1 +proteome_38,1,1 +proteome_39,1,1 +proteome_40,0,2 +proteome_41,0,1 +proteome_42,1,3 +proteome_43,0,3 +proteome_44,1,1 +proteome_45,1,3 +proteome_46,0,2 +proteome_47,0,1 +proteome_48,1,1 +proteome_49,0,3 +proteome_50,1,3 diff --git a/bioinformaticsProject/Results/pHresistant_methanogens.txt b/bioinformaticsProject/Results/pHresistant_methanogens.txt index 9526170..b155ce1 100644 --- a/bioinformaticsProject/Results/pHresistant_methanogens.txt +++ b/bioinformaticsProject/Results/pHresistant_methanogens.txt @@ -1,8 +1,8 @@ These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more). -proteomes//proteome_05.fasta -proteomes//proteome_07.fasta -proteomes//proteome_24.fasta -proteomes//proteome_03.fasta -proteomes//proteome_42.fasta -proteomes//proteome_45.fasta -proteomes//proteome_50.fasta +proteome_05 +proteome_07 +proteome_24 +proteome_03 +proteome_42 +proteome_45 +proteome_50 From 3202e5f0908ec3f67400469d870b79dd968ec7c9 Mon Sep 17 00:00:00 2001 From: Johanna1403 Date: Fri, 15 Oct 2021 13:28:53 -0400 Subject: [PATCH 5/7] Added some comments. --- bioinformaticsProject/Project1.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh index 7945a9d..034a0cb 100644 --- a/bioinformaticsProject/Project1.sh +++ b/bioinformaticsProject/Project1.sh @@ -22,7 +22,7 @@ cat $1/hsp70gene_*.fasta > hsp70gene.fasta ../hmmer/bin/hmmbuild mcrA_profile.hmm mcrAgene_aligned.fasta ../hmmer/bin/hmmbuild hsp70_profile.hmm hsp70gene_aligned.fasta -echo "proteome_ID,mcrA_gene,hsp70_gene" > Results/match_count_table.csv +echo "proteome_ID,mcrA_gene,hsp70_gene" > Results/match_count_table.csv # create an empty table with column names # 4. search for gene in each proteome for proteome in $2/proteome_*.fasta @@ -32,14 +32,14 @@ do mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l) hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l) # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count) -proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/') +proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/') # extract the ptoteome ID only, instead of the whole file path echo "$proteomeID,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv done -echo "These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more)." > Results/pHresistant_methanogens.txt +echo "These are the pH-resistant methanogens chosen according to the presence of the mcrA gene and the number of copies of hsp70 gene (we chose the copy number 2 or more)." > Results/pHresistant_methanogens.txt # make an empty text file with an introductory sentence # 5. choose the pH-resistant methanogens -cat Results/match_count_table.csv | sort -t , -k 2 | awk -F , '$3>"1"' | awk -F , '$2=="1"' > chosen_methanogens.txt +cat Results/match_count_table.csv | awk -F , '$3>"1"' | awk -F , '$2=="1"' > chosen_methanogens.txt # chooses the proteomes that have mcrA gene and 2 or more copies of hsp70 gene cut -d , -f 1 chosen_methanogens.txt >> Results/pHresistant_methanogens.txt rm mcrAgene.fasta From 11fc8fed2cebd29765b4abdd41888e57c06a0c50 Mon Sep 17 00:00:00 2001 From: Johanna1403 Date: Fri, 15 Oct 2021 13:37:47 -0400 Subject: [PATCH 6/7] Added an NB! message: If a / (slash) is automatically added after the reference_sequence_path and proteome_sequence_path when using tab autocomplete, be sure to remove it before running the script, otherwise it will result in a double // (slash) in between the directory and the file name, which will result an error in the script. --- bioinformaticsProject/Project1.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bioinformaticsProject/Project1.sh b/bioinformaticsProject/Project1.sh index 034a0cb..431f9eb 100644 --- a/bioinformaticsProject/Project1.sh +++ b/bioinformaticsProject/Project1.sh @@ -7,6 +7,7 @@ # The outputs of the code are a table showing a match count for mcrA gene and hsp70 gene for each proteome, and a text file with the names of the candidate pH-resistant methanogens based on the results. # USAGE: bash Project1.sh reference_sequence_path proteome_sequence_path +# NB! be sure that after entering the reference_sequence_path and proteome_sequence_path there is no / (slash) automatically added at the end, otherwise it will result in double // (slash) in between the directory and the file, resulting in an error mkdir Results @@ -32,7 +33,7 @@ do mcrA_match=$(cat mcrA_search.txt | grep -v "#" | wc -l) hsp70_match=$(cat hsp70_search.txt | grep -v "#" | wc -l) # make a table with 3 columns (proteome name, mcrA match count, hsp70 match count) -proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/') # extract the ptoteome ID only, instead of the whole file path +proteomeID=$(echo $proteome | sed 's/.*\/\(.*\)\..*/\1/') # extract the proteome ID only, instead of the whole file path echo "$proteomeID,$mcrA_match,$hsp70_match" >> Results/match_count_table.csv done From 74d740abd689317c59695ac3d079cced3d3194ad Mon Sep 17 00:00:00 2001 From: Johanna1403 <69310080+Johanna1403@users.noreply.github.com> Date: Fri, 15 Oct 2021 13:42:52 -0400 Subject: [PATCH 7/7] Deleted the old match_count_table.csv --- bioinformaticsProject/match_count_table.csv | 51 --------------------- 1 file changed, 51 deletions(-) delete mode 100644 bioinformaticsProject/match_count_table.csv diff --git a/bioinformaticsProject/match_count_table.csv b/bioinformaticsProject/match_count_table.csv deleted file mode 100644 index 7e57b95..0000000 --- a/bioinformaticsProject/match_count_table.csv +++ /dev/null @@ -1,51 +0,0 @@ -proteome_ID,mcrA_gene,hsp70_gene -proteomes//proteome_01.fasta,0,4 -proteomes//proteome_02.fasta,0,2 -proteomes//proteome_03.fasta,1,3 -proteomes//proteome_04.fasta,0,4 -proteomes//proteome_05.fasta,1,2 -proteomes//proteome_06.fasta,0,0 -proteomes//proteome_07.fasta,1,2 -proteomes//proteome_08.fasta,0,5 -proteomes//proteome_09.fasta,0,1 -proteomes//proteome_10.fasta,0,3 -proteomes//proteome_11.fasta,0,6 -proteomes//proteome_12.fasta,0,6 -proteomes//proteome_13.fasta,0,3 -proteomes//proteome_14.fasta,0,2 -proteomes//proteome_15.fasta,1,1 -proteomes//proteome_16.fasta,1,1 -proteomes//proteome_17.fasta,0,4 -proteomes//proteome_18.fasta,0,8 -proteomes//proteome_19.fasta,2,1 -proteomes//proteome_20.fasta,0,3 -proteomes//proteome_21.fasta,0,5 -proteomes//proteome_22.fasta,0,9 -proteomes//proteome_23.fasta,2,2 -proteomes//proteome_24.fasta,1,2 -proteomes//proteome_25.fasta,0,5 -proteomes//proteome_26.fasta,0,1 -proteomes//proteome_27.fasta,0,1 -proteomes//proteome_28.fasta,0,1 -proteomes//proteome_29.fasta,1,0 -proteomes//proteome_30.fasta,0,1 -proteomes//proteome_31.fasta,0,7 -proteomes//proteome_32.fasta,0,4 -proteomes//proteome_33.fasta,0,0 -proteomes//proteome_34.fasta,0,2 -proteomes//proteome_35.fasta,0,1 -proteomes//proteome_36.fasta,0,3 -proteomes//proteome_37.fasta,0,1 -proteomes//proteome_38.fasta,1,1 -proteomes//proteome_39.fasta,1,1 -proteomes//proteome_40.fasta,0,2 -proteomes//proteome_41.fasta,0,1 -proteomes//proteome_42.fasta,1,3 -proteomes//proteome_43.fasta,0,3 -proteomes//proteome_44.fasta,1,1 -proteomes//proteome_45.fasta,1,3 -proteomes//proteome_46.fasta,0,2 -proteomes//proteome_47.fasta,0,1 -proteomes//proteome_48.fasta,1,1 -proteomes//proteome_49.fasta,0,3 -proteomes//proteome_50.fasta,1,3