From eaafe61154b9e9acf9225165eb040a95a09c12dc Mon Sep 17 00:00:00 2001 From: Benedicte-C Date: Wed, 19 Aug 2020 14:52:35 +0200 Subject: [PATCH] Add script to check update. --- check/check.py | 127 +++++++++++++++++++++++++++++++++++++++++++++ check/fastafile | 49 +++++++++++++++++ check/ref_resultat | 51 ++++++++++++++++++ clermonTyping.sh | 4 +- 4 files changed, 229 insertions(+), 2 deletions(-) create mode 100755 check/check.py create mode 100755 check/fastafile create mode 100755 check/ref_resultat diff --git a/check/check.py b/check/check.py new file mode 100755 index 0000000..8fa5163 --- /dev/null +++ b/check/check.py @@ -0,0 +1,127 @@ +#!/usr/local/bin/python3 +# coding: utf-8 + +############################ +# Import +############################ +import os +import sys +import argparse +import subprocess +from subprocess import Popen, PIPE +import shutil + +############################ +# Global Variables +############################ + +############################ +# Functions +############################ + + +def check_file(file): + """ + Check if a file exist and is readable + """ + if not os.path.isfile(file) or not os.access(file, os.R_OK): + print("the file " + file + " is missing or not readable.\n") + PARSER.print_help() + sys.exit(1) + +def List_to_dic(filename): + """ + Send a file (three columns : sample, phylogroup and mashgroup with header) to a list + """ + + dico={} + with open(filename, "r") as f: + f.readline() + for line in f: + try: + sample, phylo, mash=line.strip().split("\t") + dico[sample]={} + dico[sample]['phylo']=phylo + dico[sample]['mash']=mash + except ValueError: + pass + + return dico + +def Html_to_dic(filename): + """ + Send a file (three columns : sample, phylogroup and mashgroup with header) to a list + """ + + dico={} + with open(filename, "r", encoding="utf-8") as f: + for line in f: + if '' in line or '' in line: + sample=f.readline().strip() + sample_tmp=sample.replace('', '') + sample=sample_tmp.replace('', '') + f.readline() + f.readline() + phylo=f.readline().strip() + phylo_tmp=phylo.replace('', '') + phylo=phylo_tmp.replace('', '') + mash=f.readline().strip() + mash_tmp=mash.replace('', '') + mash=mash_tmp.replace('', '') + dico[sample]={} + dico[sample]['phylo']=phylo + dico[sample]['mash']=mash + + return dico + +def comparaison(dico_old, dico_new): + + with open('check.log', "w") as w: + + for sample in dico_old.keys(): + if dico_old[sample]['phylo'] != dico_new[sample]['phylo'] or dico_old[sample]['mash'] != dico_new[sample]['mash']: + sentence='Error with '+ sample + sentence+="\tWaited :" + dico_old[sample]['phylo'] + '/' + dico_old[sample]['mash'] + sentence+="\tNew : " + dico_new[sample]['phylo'] + '/' + dico_new[sample]['mash'] + + print(sentence) + w.write(sentence+"\n") + + + + +############################ +# Main +############################ +if __name__ == "__main__": + + fastafile='fastafile' + + # Clermontyping + if os.path.isdir('res_check'): + abso_dir = os.path.abspath('res_check') + shutil.rmtree(abso_dir) + cmd = ["../clermonTyping.sh", "--fastafile", fastafile, "--name", "res_check"] + subprocess.call(cmd) + + ## + inputList='ref_resultat' + check_file(inputList) + res_waited=List_to_dic(inputList) + + inputHtml='res_check/res_check.html' + check_file(inputHtml) + res_new=Html_to_dic(inputHtml) + + print('%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%') + comparaison(res_waited, res_new) + if os.stat('check.log').st_size == 0: + print("Congratulation! Database is updated succesfull.") + else: + print("ERROR!!! Look at the check.log file.") + + + + + + diff --git a/check/fastafile b/check/fastafile new file mode 100755 index 0000000..e0ed38a --- /dev/null +++ b/check/fastafile @@ -0,0 +1,49 @@ +db/042.fasta +db/101-1.fasta +db/327-20.fasta +db/536.fasta +db/53638.fasta +db/55989.fasta +db/APECO78.fasta +db/ATCC35469T.fasta +db/B1147.fasta +db/B156.fasta +db/B253.fasta +db/B49.fasta +db/B646.fasta +db/B691.fasta +db/B7A.fasta +db/CFT073.fasta +db/E1118.fasta +db/E1426.fasta +db/E1492.fasta +db/E2348-69.fasta +db/E24377A.fasta +db/ECOR01.fasta +db/ECOR31.fasta +db/ECOR37.fasta +db/ECOR42.fasta +db/ECOR44.fasta +db/ECOR46.fasta +db/ECOR47.fasta +db/ESC_BA9053AA_AS.fasta +db/ESC_CA3818AA_AS.fasta +db/ESC_FA7277AA_AS.fasta +db/ESC_GA0630AA_AS.fasta +db/ESC_GA5165AA_AS.fasta +db/ESC_NA3019AA_AS.fasta +db/ESC_NA8746AA_AS.fasta +db/ESC_OA1838AA_AS.fasta +db/H442.fasta +db/H605.fasta +db/H95.fasta +db/IAI17.fasta +db/IAI18.fasta +db/IAI24.fasta +db/IAI32.fasta +db/IAI39.fasta +db/IAI41.fasta +db/IAI42.fasta +db/ROAR8.fasta +db/SMS35.fasta +db/TA004.fasta diff --git a/check/ref_resultat b/check/ref_resultat new file mode 100755 index 0000000..f034a45 --- /dev/null +++ b/check/ref_resultat @@ -0,0 +1,51 @@ +sample Clermontyping Mash +E1492.fasta cladeI cladeI +H442.fasta cladeI cladeI +IAI32.fasta cladeI cladeI +B1147.fasta cladeII cladeII +ESC_NA3019AA_AS.fasta cladeII cladeII +ESC_OA1838AA_AS.fasta cladeII cladeII +ESC_GA5165AA_AS.fasta cladeIII cladeIII +TA004.fasta cladeIII cladeIII +ESC_BA9053AA_AS.fasta cladeIII cladeIII +B49.fasta cladeIV cladeIV +ESC_FA7277AA_AS.fasta cladeIV cladeIV +H605.fasta cladeIV cladeIV +B646.fasta cladeV cladeV +E1118.fasta cladeV cladeV +ESC_CA3818AA_AS.fasta cladeV cladeV +53638.fasta A A +101-1.fasta A A +ECOR01.fasta A A +B156.fasta albertii albertii +ESC_GA0630AA_AS.fasta albertii albertii +ESC_NA8746AA_AS.fasta albertii albertii +55989.fasta B1 B1 +B7A.fasta B1 B1 +E24377A.fasta B1 B1 +536.fasta B2 B2 +CFT073.fasta B2 B2 +E2348-69.fasta B2 B2 +APECO78.fasta C C +IAI18.fasta C C +IAI41.fasta C C +042.fasta D D +ECOR46.fasta D D +ECOR47.fasta D D +IAI17.fasta D A +ECOR31.fasta E E +ECOR37.fasta E E +ECOR42.fasta E E +ECOR44.fasta E D +IAI24.fasta E D +IAI39.fasta F F +SMS35.fasta F F +ROAR8.fasta F F +ATCC35469T.fasta fergusonii fergusonii +B253.fasta fergusonii fergusonii +B691.fasta fergusonii fergusonii +327-20.fasta G G +E1426.fasta G G +H95.fasta G G +IAI42.fasta Unknown C + diff --git a/clermonTyping.sh b/clermonTyping.sh index 785940e..0fbfcbd 100755 --- a/clermonTyping.sh +++ b/clermonTyping.sh @@ -10,8 +10,8 @@ # 4] Launch in silicco PCR for getting phylogroup # 5] Reportings tools # -# Current version : 20.03 (Mar. 2020) -version="Clermont Typing Current version : 20.03 (Mar. 2020)" +# Current version : 20.08 (Aug. 2020) +version="Clermont Typing Current version : 20.08 (Aug. 2020)" # Contact: antoine.bridier-nahmias@inserm.fr