From a0abd328061a5224e09c44a44715a36df5683317 Mon Sep 17 00:00:00 2001 From: Xiyuan Guan Date: Wed, 12 Oct 2022 13:12:48 -0400 Subject: [PATCH] add bash script and result files --- final_result.txt | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ orthogonal.sh | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 final_result.txt create mode 100644 orthogonal.sh diff --git a/final_result.txt b/final_result.txt new file mode 100644 index 0000000..7296426 --- /dev/null +++ b/final_result.txt @@ -0,0 +1,51 @@ +proteomes num of mcrA match num of hsp70 match +proteome_01 0 4 +proteome_02 0 2 +proteome_03 1 3 +proteome_04 0 4 +proteome_05 1 2 +proteome_06 0 0 +proteome_07 1 2 +proteome_08 0 5 +proteome_09 0 1 +proteome_10 0 3 +proteome_11 0 6 +proteome_12 0 6 +proteome_13 0 3 +proteome_14 0 2 +proteome_15 1 1 +proteome_16 1 1 +proteome_17 0 4 +proteome_18 0 8 +proteome_19 2 1 +proteome_20 0 3 +proteome_21 0 0 +proteome_22 0 0 +proteome_23 2 2 +proteome_24 1 2 +proteome_25 0 0 +proteome_26 0 1 +proteome_27 0 1 +proteome_28 0 1 +proteome_29 1 0 +proteome_30 0 1 +proteome_31 0 7 +proteome_32 0 4 +proteome_33 0 0 +proteome_34 0 2 +proteome_35 0 1 +proteome_36 0 3 +proteome_37 0 1 +proteome_38 1 1 +proteome_39 1 1 +proteome_40 0 2 +proteome_41 0 1 +proteome_42 1 3 +proteome_43 0 3 +proteome_44 1 1 +proteome_45 1 3 +proteome_46 0 2 +proteome_47 0 1 +proteome_48 1 1 +proteome_49 0 3 +proteome_50 1 3 diff --git a/orthogonal.sh b/orthogonal.sh new file mode 100644 index 0000000..d418445 --- /dev/null +++ b/orthogonal.sh @@ -0,0 +1,35 @@ +# Identifying candidate pH-resistant methanogenic Archaea +# Usage: bash orthogonal.sh $1 $2 $3 $4 +# $1 and $2 are the names of references gene, $3 is the name of genomes to be search, $4 is the result file. +# Exp: bash orthogonal.sh mcrA hsp70 proteome final_result.txt + +cat ref_sequences/$1* > tot_$1.fasta +cat ref_sequences/$2* > tot_$2.fasta + + +# Combine and align reference sequences +~/Private/BioComputing2022/tools/muscle -in tot_$1.fasta -out $1_aligned.fasta +~/Private/BioComputing2022/tools/muscle -in tot_$2.fasta -out $2_aligned.fasta + +# Build HMM profiles +~/Private/BioComputing2022/tools/hmmbuild $1.hmm $1_aligned.fasta +~/Private/BioComputing2022/tools/hmmbuild $2.hmm $2_aligned.fasta + +# Write headers for the final result file +echo -e "proteomes" "\t" "num of $1 match" "\t" "num of $2 match" > $4 + +for i in {01..50} +do +# Search ref sequences in proteomes, output in repo "before merged" +~/Private/BioComputing2022/tools/hmmsearch --tblout befMerged/$1_match_$i $1.hmm proteomes/$3_$i.fasta +~/Private/BioComputing2022/tools/hmmsearch --tblout befMerged/$2_match_$i $2.hmm proteomes/$3_$i.fasta + +# Count the matches +numA=$(cat befMerged/$1_match_$i | grep -c WP) +numB=$(cat befMerged/$2_match_$i | grep -c WP) + +# Output matches +echo -e $3_$i "\t" $numA "\t" $numB >> $4 + +done +