Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions bioinformaticsProject/biocomp_projectupdated.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#usage: To determine candidates based on hsp70 (more than two copies) and mcrA (one copy). Looking for organisms with both.
# Absolute Path to ref_sequences: Private/Biocomputing_Project/bioinformaticsProject/ref_sequences/
#Direct path to muscle tool: ~/Private/bin/muscle. Absolute path to hmmbuild: ~/Private/bin/hmmbuild
#Direct path to proteomes cd Private/Biocomputing_Project/bioinformaticsProject/proteomes/
# Direct path to hmm search ~/Private/bin/hmmsearch

#access ref files, starting in the home directory
#cd ref_sequences

cd $1

#to combine the raw hsp70 files into one fasta that we will hmm

cat hsp70g* | grep -v fasta > hsp70.fasta

#to combine the raw mcrAgene files into one fasta that we will hmm

cat mcrAgene_* | grep -v fasta > mcrA.fasta

#muscling the compiled hsp70

$2 -in hsp70.fasta -out hsp70.hmm

#muscling the compiled mcrA

$2 -in mcrA.fasta -out mcrAgene.hmm

#Build a profile HMM from muscle output for hsp70

$3 hsp70results hsp70.hmm

#Build a profile HMM from muscle output for mcrA

$3 mcrAresults mcrA/gene.hmm

#Compile proteomes to use as file to search using hmmsearch

mv ./mcrAresults $4

mv ./hsp70results $4

#Move to proteome directory from ref_sequences

cd $4

#hmmsearch hsp70

for file in proteome*
do
echo | $5 --tblout hsp70search$file hsp70results $file
done

#Sorting hsp based on proteome

for file in hsp70searchproteome_*
do
grep "# Target\|WP" $file > newcomposite2$file
done

#Finding amount of hsp in each proteome

for file in newcomposite2hsp70searchproteome_*
do
echo
wc -l $file >> CompiledWChsp70
done


#hmmsearch mcrA

for file in proteome*
do
echo | $5 --tblout mcrAsearch$file mcrAresults $file
done

#Sorting mcrA based on proteome

for file in mcrAsearchproteome_*
do
grep "# Target\|WP" $file > newcomposite1$file
done

#Finding amount of mcrA in each proteome

for file in newcomposite1mcrAsearchproteome_*
do
echo
wc -l $file >> CompiledWCmcrA
done

#Renaming the mcrA gene copy results into the format of number of copies of hsp70

sed 's/newcomposite2hsp70searchproteome/proteome/' CompiledWChsp70 > abbreviatedhsp70

#Listing all the hsp70 candidates. Need at least two copied of the gene.
#1 line has the title, so we need at least 3 lines in each search

grep -E "[3-9] " abbreviatedhsp70 > completehsp70

#Renaming the mcrA gene copy results into the format of number of copies and th$

sed 's/newcomposite1mcrAsearchproteome/proteome/' CompiledWCmcrA > abbreviatedmcrA

#Listing all mcrA candidates into file "completemcrA". Just needs 1 copy.
#Therefore, select all proteomes with more than 1 line, indicating presence
#of the gene

grep -v 1 abbreviatedmcrA | sort -n > completemcrA

#Combine both complete searches into a compilation of candidates

cat completehsp70 completemcrA >> candidatesofficial

#Successful candidates

cat candidatesofficial | sort -k 2 | cut -d " " -f 2| uniq -d

#Successful candidates were found to be 3,5,7,23,24,42,45,50


Empty file.
Empty file.
50 changes: 50 additions & 0 deletions bioinformaticsProject/proteomes/CompiledWChsp70
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
5 newcomposite2hsp70searchproteome_01.fasta
3 newcomposite2hsp70searchproteome_02.fasta
4 newcomposite2hsp70searchproteome_03.fasta
5 newcomposite2hsp70searchproteome_04.fasta
3 newcomposite2hsp70searchproteome_05.fasta
1 newcomposite2hsp70searchproteome_06.fasta
3 newcomposite2hsp70searchproteome_07.fasta
6 newcomposite2hsp70searchproteome_08.fasta
2 newcomposite2hsp70searchproteome_09.fasta
4 newcomposite2hsp70searchproteome_10.fasta
7 newcomposite2hsp70searchproteome_11.fasta
7 newcomposite2hsp70searchproteome_12.fasta
4 newcomposite2hsp70searchproteome_13.fasta
3 newcomposite2hsp70searchproteome_14.fasta
2 newcomposite2hsp70searchproteome_15.fasta
2 newcomposite2hsp70searchproteome_16.fasta
5 newcomposite2hsp70searchproteome_17.fasta
9 newcomposite2hsp70searchproteome_18.fasta
2 newcomposite2hsp70searchproteome_19.fasta
4 newcomposite2hsp70searchproteome_20.fasta
1 newcomposite2hsp70searchproteome_21.fasta
1 newcomposite2hsp70searchproteome_22.fasta
3 newcomposite2hsp70searchproteome_23.fasta
3 newcomposite2hsp70searchproteome_24.fasta
1 newcomposite2hsp70searchproteome_25.fasta
2 newcomposite2hsp70searchproteome_26.fasta
2 newcomposite2hsp70searchproteome_27.fasta
2 newcomposite2hsp70searchproteome_28.fasta
1 newcomposite2hsp70searchproteome_29.fasta
2 newcomposite2hsp70searchproteome_30.fasta
8 newcomposite2hsp70searchproteome_31.fasta
5 newcomposite2hsp70searchproteome_32.fasta
1 newcomposite2hsp70searchproteome_33.fasta
3 newcomposite2hsp70searchproteome_34.fasta
2 newcomposite2hsp70searchproteome_35.fasta
4 newcomposite2hsp70searchproteome_36.fasta
2 newcomposite2hsp70searchproteome_37.fasta
2 newcomposite2hsp70searchproteome_38.fasta
2 newcomposite2hsp70searchproteome_39.fasta
3 newcomposite2hsp70searchproteome_40.fasta
2 newcomposite2hsp70searchproteome_41.fasta
4 newcomposite2hsp70searchproteome_42.fasta
4 newcomposite2hsp70searchproteome_43.fasta
2 newcomposite2hsp70searchproteome_44.fasta
4 newcomposite2hsp70searchproteome_45.fasta
3 newcomposite2hsp70searchproteome_46.fasta
2 newcomposite2hsp70searchproteome_47.fasta
2 newcomposite2hsp70searchproteome_48.fasta
4 newcomposite2hsp70searchproteome_49.fasta
4 newcomposite2hsp70searchproteome_50.fasta
50 changes: 50 additions & 0 deletions bioinformaticsProject/proteomes/CompiledWCmcrA
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
1 newcomposite1mcrAsearchproteome_01.fasta
1 newcomposite1mcrAsearchproteome_02.fasta
2 newcomposite1mcrAsearchproteome_03.fasta
1 newcomposite1mcrAsearchproteome_04.fasta
2 newcomposite1mcrAsearchproteome_05.fasta
1 newcomposite1mcrAsearchproteome_06.fasta
2 newcomposite1mcrAsearchproteome_07.fasta
1 newcomposite1mcrAsearchproteome_08.fasta
1 newcomposite1mcrAsearchproteome_09.fasta
1 newcomposite1mcrAsearchproteome_10.fasta
1 newcomposite1mcrAsearchproteome_11.fasta
1 newcomposite1mcrAsearchproteome_12.fasta
1 newcomposite1mcrAsearchproteome_13.fasta
1 newcomposite1mcrAsearchproteome_14.fasta
2 newcomposite1mcrAsearchproteome_15.fasta
2 newcomposite1mcrAsearchproteome_16.fasta
1 newcomposite1mcrAsearchproteome_17.fasta
1 newcomposite1mcrAsearchproteome_18.fasta
3 newcomposite1mcrAsearchproteome_19.fasta
1 newcomposite1mcrAsearchproteome_20.fasta
1 newcomposite1mcrAsearchproteome_21.fasta
1 newcomposite1mcrAsearchproteome_22.fasta
3 newcomposite1mcrAsearchproteome_23.fasta
2 newcomposite1mcrAsearchproteome_24.fasta
1 newcomposite1mcrAsearchproteome_25.fasta
1 newcomposite1mcrAsearchproteome_26.fasta
1 newcomposite1mcrAsearchproteome_27.fasta
1 newcomposite1mcrAsearchproteome_28.fasta
2 newcomposite1mcrAsearchproteome_29.fasta
1 newcomposite1mcrAsearchproteome_30.fasta
1 newcomposite1mcrAsearchproteome_31.fasta
1 newcomposite1mcrAsearchproteome_32.fasta
1 newcomposite1mcrAsearchproteome_33.fasta
1 newcomposite1mcrAsearchproteome_34.fasta
1 newcomposite1mcrAsearchproteome_35.fasta
1 newcomposite1mcrAsearchproteome_36.fasta
1 newcomposite1mcrAsearchproteome_37.fasta
2 newcomposite1mcrAsearchproteome_38.fasta
2 newcomposite1mcrAsearchproteome_39.fasta
1 newcomposite1mcrAsearchproteome_40.fasta
1 newcomposite1mcrAsearchproteome_41.fasta
2 newcomposite1mcrAsearchproteome_42.fasta
1 newcomposite1mcrAsearchproteome_43.fasta
2 newcomposite1mcrAsearchproteome_44.fasta
2 newcomposite1mcrAsearchproteome_45.fasta
1 newcomposite1mcrAsearchproteome_46.fasta
1 newcomposite1mcrAsearchproteome_47.fasta
2 newcomposite1mcrAsearchproteome_48.fasta
1 newcomposite1mcrAsearchproteome_49.fasta
2 newcomposite1mcrAsearchproteome_50.fasta
50 changes: 50 additions & 0 deletions bioinformaticsProject/proteomes/abbreviatedhsp70
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
5 proteome_01.fasta
3 proteome_02.fasta
4 proteome_03.fasta
5 proteome_04.fasta
3 proteome_05.fasta
1 proteome_06.fasta
3 proteome_07.fasta
6 proteome_08.fasta
2 proteome_09.fasta
4 proteome_10.fasta
7 proteome_11.fasta
7 proteome_12.fasta
4 proteome_13.fasta
3 proteome_14.fasta
2 proteome_15.fasta
2 proteome_16.fasta
5 proteome_17.fasta
9 proteome_18.fasta
2 proteome_19.fasta
4 proteome_20.fasta
1 proteome_21.fasta
1 proteome_22.fasta
3 proteome_23.fasta
3 proteome_24.fasta
1 proteome_25.fasta
2 proteome_26.fasta
2 proteome_27.fasta
2 proteome_28.fasta
1 proteome_29.fasta
2 proteome_30.fasta
8 proteome_31.fasta
5 proteome_32.fasta
1 proteome_33.fasta
3 proteome_34.fasta
2 proteome_35.fasta
4 proteome_36.fasta
2 proteome_37.fasta
2 proteome_38.fasta
2 proteome_39.fasta
3 proteome_40.fasta
2 proteome_41.fasta
4 proteome_42.fasta
4 proteome_43.fasta
2 proteome_44.fasta
4 proteome_45.fasta
3 proteome_46.fasta
2 proteome_47.fasta
2 proteome_48.fasta
4 proteome_49.fasta
4 proteome_50.fasta
50 changes: 50 additions & 0 deletions bioinformaticsProject/proteomes/abbreviatedmcrA
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
1 proteome_01.fasta
1 proteome_02.fasta
2 proteome_03.fasta
1 proteome_04.fasta
2 proteome_05.fasta
1 proteome_06.fasta
2 proteome_07.fasta
1 proteome_08.fasta
1 proteome_09.fasta
1 proteome_10.fasta
1 proteome_11.fasta
1 proteome_12.fasta
1 proteome_13.fasta
1 proteome_14.fasta
2 proteome_15.fasta
2 proteome_16.fasta
1 proteome_17.fasta
1 proteome_18.fasta
3 proteome_19.fasta
1 proteome_20.fasta
1 proteome_21.fasta
1 proteome_22.fasta
3 proteome_23.fasta
2 proteome_24.fasta
1 proteome_25.fasta
1 proteome_26.fasta
1 proteome_27.fasta
1 proteome_28.fasta
2 proteome_29.fasta
1 proteome_30.fasta
1 proteome_31.fasta
1 proteome_32.fasta
1 proteome_33.fasta
1 proteome_34.fasta
1 proteome_35.fasta
1 proteome_36.fasta
1 proteome_37.fasta
2 proteome_38.fasta
2 proteome_39.fasta
1 proteome_40.fasta
1 proteome_41.fasta
2 proteome_42.fasta
1 proteome_43.fasta
2 proteome_44.fasta
2 proteome_45.fasta
1 proteome_46.fasta
1 proteome_47.fasta
2 proteome_48.fasta
1 proteome_49.fasta
2 proteome_50.fasta
41 changes: 41 additions & 0 deletions bioinformaticsProject/proteomes/candidatesofficial
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
5 proteome_01.fasta
3 proteome_02.fasta
4 proteome_03.fasta
5 proteome_04.fasta
3 proteome_05.fasta
3 proteome_07.fasta
6 proteome_08.fasta
4 proteome_10.fasta
7 proteome_11.fasta
7 proteome_12.fasta
4 proteome_13.fasta
3 proteome_14.fasta
5 proteome_17.fasta
9 proteome_18.fasta
4 proteome_20.fasta
3 proteome_23.fasta
3 proteome_24.fasta
8 proteome_31.fasta
5 proteome_32.fasta
3 proteome_34.fasta
4 proteome_36.fasta
3 proteome_40.fasta
4 proteome_42.fasta
4 proteome_43.fasta
4 proteome_45.fasta
3 proteome_46.fasta
4 proteome_49.fasta
4 proteome_50.fasta
2 proteome_03.fasta
2 proteome_05.fasta
2 proteome_07.fasta
2 proteome_24.fasta
2 proteome_29.fasta
2 proteome_38.fasta
2 proteome_39.fasta
2 proteome_42.fasta
2 proteome_44.fasta
2 proteome_45.fasta
2 proteome_48.fasta
2 proteome_50.fasta
3 proteome_23.fasta
28 changes: 28 additions & 0 deletions bioinformaticsProject/proteomes/completehsp70
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
5 proteome_01.fasta
3 proteome_02.fasta
4 proteome_03.fasta
5 proteome_04.fasta
3 proteome_05.fasta
3 proteome_07.fasta
6 proteome_08.fasta
4 proteome_10.fasta
7 proteome_11.fasta
7 proteome_12.fasta
4 proteome_13.fasta
3 proteome_14.fasta
5 proteome_17.fasta
9 proteome_18.fasta
4 proteome_20.fasta
3 proteome_23.fasta
3 proteome_24.fasta
8 proteome_31.fasta
5 proteome_32.fasta
3 proteome_34.fasta
4 proteome_36.fasta
3 proteome_40.fasta
4 proteome_42.fasta
4 proteome_43.fasta
4 proteome_45.fasta
3 proteome_46.fasta
4 proteome_49.fasta
4 proteome_50.fasta
Loading