diff --git a/ChallengePart2.R b/ChallengePart2.R new file mode 100755 index 0000000..c6e0251 --- /dev/null +++ b/ChallengePart2.R @@ -0,0 +1,58 @@ +rm(list=ls()) + +#load string package +library(stringr) +library(seqinr) + +#Scan in files +fasta=scan(file="motifsort.fasta",what=character(),sep="\n") + +#Set mofits +motifOne='AKKPRVZE' +motifTwo='AAQWWRNYGG' + +motif1Names=vector(mode='list',length=11) +motif2Names=vector(mode='list',length=13) +NoMotifNames=vector(mode='list',length=66) + +motif1=vector(mode='list',length=11) +motif2=vector(mode='list',length=13) +NoMotif=vector(mode='list',length=66) + +a=1 +b=1 +c=1 + +for (i in 1:length(fasta)){ + if (str_detect(fasta[i],motifOne)==TRUE){ + motif1Names[a]=fasta[i-1] + motif1[a]=fasta[i] + a=a+1 + } + else if (str_detect(fasta[i],motifTwo)==TRUE){ + motif2Names[b]=fasta[i-1] + motif2[b]=fasta[i] + b=b+1 + } + else if (str_detect(fasta[i],'sequence')==FALSE){ + NoMotifNames[c]=fasta[i-1] + NoMotif[c]=fasta[i] + c=c+1 + } +} + +for (i in 1:length(motif1Names)){ + motif1Names[i]=str_replace_all(motif1Names[i],">","") +} + +for (i in 1:length(motif2Names)){ + motif2Names[i]=str_replace_all(motif2Names[i],">","") +} + +for (i in 1:length(NoMotifNames)){ + NoMotifNames[i]=str_replace_all(NoMotifNames[i],">","") +} + +write.fasta(sequences=motif1, names=motif1Names,file.out="motif1.fasta") +write.fasta(sequences=motif2, names=motif2Names,file.out="motif2.fasta") +write.fasta(sequences=NoMotif, names=NoMotifNames,file.out="NoMotif.fasta") \ No newline at end of file diff --git a/NoMotif.fasta b/NoMotif.fasta new file mode 100755 index 0000000..b9b1d8f --- /dev/null +++ b/NoMotif.fasta @@ -0,0 +1,132 @@ +>sequence3 +GAFNAMFDYHGCWHKDAAATGSLCFDGRFIELYAVEAPRAHLALLDRVKRDVPPFWDPAALAEFIDKYGTHVIAGVKMGGKDVVCIKQLKGSNLTQSDVQSRLKKLSDDKLAQDSPESLTARDDKFLLGLNGSLLLGPGSAAWRSFRPSVVSHKDDILSIHIRRGGVDNGQGHSNWLSTISGSPDVISMAFVPITSLLTGVRGCGFLNHAVNLY +>sequence4 +GLFNSCFDFGSDSWASDAGDTRCLAFDGYFISLLDLRLDCRPLALAGHVVADVPAAWDPSAIASFIEKYGTHIIVGLSMGGQDVVYVKQDKSSPLSPSVIKEHLDKLGDQLFTGTCTLPPSHCKSRDHKFKVPEAFNVFDAQMTRQRIEGMTAPMSCKEGVTVIYSKRGGDTAASNHSEWLPTVPLMPDAINFKLVPITSLLKGVAGVGFLSHAINLY +>sequence5 +KAITNDIYIPAEFAACEFSLKSGKSSLYSSHINPGQLIFGQGSDTLHHTSNTHQLPLPPNALGEANIGKLYVSIVEYLRGCQDGAGQPNEPLVVFTSTELVPVVRGCFRYLESDSDELQENIEVYDIQYLFYVLKKEVMDIADLPNEHINKSITDNLFVNDFFEYHSGISCQFHEDNDRGKYCTQSKVARWCYMFSDYMCGDLAIKPLPGKHMPPKQEP +>sequence6 +KLAEYSMEKTKNDKFSFASQSTSCVFYRSYRLSSSPTLSQEFRKAVRGLPKTYSPENKLKFYRLIDTFGTHYITKVKLGGEVQSVTSIRQCQASLQGLSTEEVQMCLEAEASATIKATVKTELKHCKKDTEKMESKSSFSSLFNDRFTEIKGGQTTEPDLLFSSDKDPSAYKEWLNTLPLIPDIISYSLNSLHELLPTSCPVRKDLRSAIRHY +>sequence7 +NPFSASIPYKGYFTDLEIKKRKYIVAENTCLHSYATYSLRESIKNINSDFLLDTENLPILSKSITEKTCSKLIYMYNSKNDQCIKFIKPWIDFFRKYGTHVIVSAHFGGKTINTLEVPIHKFEELKIYNYKYPIENNRYLNVFKDRLLLQKILKIEKGEYAYRGGSQDNYMEDEQAEKNNDNLEKKANDVLNKYENSTSNKINLDIKGGTKLNEDWKQLTYEKWRNSIYTNIAPIYLDLFSLSSFMHIEKKESYNNALLYY +>sequence8 +YSFSASAGYKNALKKLKIQNSIIFMMKIYCLRYYTGISTTTNTWEFTNNFRNALNKLPNTFDGLKEDNECTYEYYITKSHSPQCEKNVNKWMTFFKLHGTHVAHEMYLGGKIIIKVNIEKEEYNKMKETNLDMKTVFDFYFHKMGLSARKNRRIQKFINKMHGSKTVSILGGHPGLNIDDPSFFEKWINSIDKNSMPIRTKLLPFSFFMDDPNMIKAYNDALMFY +>sequence9 +QFSEKIFPIEIGISSYSLKENKEIASYHKLLYPGKFKNVFARTQMIHGIDARDPRLEQNYSLVCIELIKYIEQFPGLAFFVSKEESLAGDKKCIDEIFLRGNVPIPKQIRFITHIQLFDYWCSIQHIELHEKSSFILNHIFKQLECAERCEYHKKINQKYHCALSDARHTSLMELICMKSYGATIIGSDTLPSVKFV +>sequence10 +AKYSKSVKKLRRVSGKSYSFVRAKAQLELAQYMLKSNDLMLHPEFLHRLRALPLSYVYGEYRQIFQDYGTHYITEAALGGEFEYTIILNKERLEQSDYTLEDYKKCAQAGLKVGANIYGVYVSAGVHGGSCNGLLNEMGENTARGSMVEDFVSVVRGGTSESITALLSKKLPTPELMRLWGEGVQFNPDFIRRTTQPLYELVTSRDFSQASTLKRNLKRALSEY +>sequence13 +VLGGSRSDLAKFARSQHSVDKATFAIHEISCTYYSYRLADHPQLSAEFTKYLRRLPQRVQTKQDRGPYRRLIDTYGTHYIHQVQLGGKVRRITAFRTCLATLKGFAETDVKTCLNAELRMSLGFLPANVTLSNKCDNLLKGNMSMGFYQGFMTHKIEVIGGERYFPDILYQQDPSEAYDSWMNSLHDNPDVISYAIFPLHQLVPDSQIAANLRDAITEY +>sequence14 +YSKNKSVQRLRQYSETKDKTYMRVSGTVQLASFQMRTRGAMLSPTFIEDIKSLPRDYDKAEYFSILEMYGTHYTVSGTVGGKYDLVYVLDSIVMKSLDITTEDVTDCLKLNAGANIGGTENGAKVDVNPNVKTDICNKGGGETETEPRRTQKPVIESIISFVDGGSVEYVTALEEKLNKKEPVADVDDYIQWASSLKDSPTVINSKSNPIISLIPTDIKDAYIKTRNLERAIEEY +>sequence15 +GSQESEFFHNVTHYKSTDLGFVRLWSKVETAHFKMRSDKLMLHEDFYISLMDLPEQYDFGMYSRFFNTYGTHYVTQGTMGGTLEYALVLNKTKMAESKLQGEQAGRCFSASIGLSYPIGQGASVDLKLGVNPCSKDGTFNQGSDASSVMVEDIITLVKGGILDSTSGVMVVRNPETYRTWGASLKYNPTLIEHEIMPIYELVRFSTAADHVGARLANLRRAWDEY +>sequence16 +TDICTVPVEICIKPTLLNGTINIECFQTIINQPIPIQHFLNSKHYTDFEHGISQENNPVPQTDFDFLWKKINTFIKSNMSKYSDSSMLPIIICTPFISSVQCVEFLASQAKVSDVRRSIFNTMFSVDDFVECVNRFKEIIPNTNAIYNFYKPLVCWTCNNDFKCDFHKSNGTRTFCCSKTNSEYLASTLCDLYKTIKSKIFVASMPSQV +>sequence17 +DRRKFHKTVTESRAHRLIILKNKVELAQFQNTAPEYLTLAEGFWRALSSLPTTYDYAAYRQLFQTYGTHYFSEGSLGGEYQALLELTQHALATTSTTSREYERCWRKVKRRFLRKKVKTVCEKLTSSTAASYVTPWSPGTSMRNVPIKVDVVGGNPGLKRFLSILDLENPEENGRKYDDWASSVKDFPQIIEQKVRPLYELVKEVECAGLKKLHMKQALEEY +>sequence18 +VNYEHKLENKSLNKLLTKNNLSIKKINCSIHTSGMIISYQWKLKKSISILLNDIQNKLVKDSGHTSNSNPQKNQKNIEKDWYNIFNTYGTHVLTKITLGGKIIEINAVEGGQNITENTSIFGSKLDINFFKMSLNSNSKDKLHDLDKNKSEKIIILGGNAMTTDRKTTNNNGEINYDKKLDKQKWVETIKYNPVPIKFELTPLSYFIYQNFSDENLVNSFHYF +>sequence19 +HGDMPSLCEQRYVPCEIACVRYSLREGILGSFHDFIDPGELPRGFRYHCQSGSASTHQIPISGFELANSDYHNMFRKLCSFVCPTPCPVVPVYTKANDIYRVDWCLQWLANKAGMENHFRVQEVETLIIKFYQDKLQEEPSRPTVSRLLDVVQWDYSSNTRCKWHEDNDMWCCALASCKKIAYCISKALASVYGVTLTPAHLPNPERS +>sequence20 +VSLSGEYVPAELAIIKYSLNDGVMDSLNVLINPTDLPLGMALDAKTHSSSTHQLPVPPDALGEANYEKILRQILKFFKNTSGSKVVPPIFTWNKDIPMVDSILRGILEATDLDYVKFSILPLIDFFYNLKLATEDYGLDIKTFPSIHLAKALLEKDVYAYTAGIACDVHEQLNNQVACALSRVVRWAYVISDSCCLDVGIEMEKGRHLPHNMTT +>sequence21 +FSFSASTGYKNFVKSTATNKVRTYITKTYCLRYVGGIVDYHSLDTTDEFKKAVEALPDKFDSHSCTIETFKSNEDDSICAETVLPWMQFIKMFGTHFTTIVHLGGKITHQVQIDKSDVLHMQQNGINVDAAVKASISPVMVDSLQGGFASTSEKASLSQSNNLKYDKQVLVIGGDGLVDSKNANSLNNWAKELYKRPMPIKIKLESIKSLLGKKRELFDEALKFY +>sequence23 +SHSRSSQFASSHSRKDKFSFTTHNLKCSYYTFRIHSRPPLSKEFEESLKNLPSTYDHKNTSAFTQFLSVYGTHFIRRVRLGGHVNSITAIRTCQASMSQMSVQTVSNCLSVEAQANIKGVTVSAATQFCKTKSSKLKTGATFRQAFSDRSIEVLGGDGDVGDVLFNSNGVAGFKKWLASIKRVPGLVWYQISPLHLLVPDNPVLQETLSKAISHY +>sequence25 +KAIGNDIYMPAEFAACKFSLRSGRGPVYSSHINPGQLIFGQASDAQHHTSTTHQLPLPPKAMGESNMGSLYVNIVKYLRDCQGAGNPLVVFTTAELMPVVSGCFRYLQSDSDEVGEQIHVYDILYLFYVLKKEVMDIADLPHANINKCITDNFFFNDFFEYYSNIACQFHEDNDRGKYCTHSMVSRWCYTFCDYMCGDLAIKPLAGKHMPPVQEQ +>sequence26 +ASFSASADFKQMKDTLSQKDTQCIQSHATCTAFDLSFYNDINSLPLLSLQLVDKIQQLYSYSNYTNEKEYYYDFFDSWGTHVATSVRLGSLFGYQFKMSSSSVQQQSSLGFDASVGASLYGVKGKVSTSYAQQQLNSFQQSLKSWSSYSLGATPNANLDAAQWATQTLDTPMPIKTELTPIYTFISQYQNNADIPLNSTTMAYVVNAMQNY +>sequence27 +FSASATNEFSDSSLRKSENEFSRCQQSFDLWSISIPADIARLQNYVSDDFIKLINAINPESKDSIATVFNVYGSHVLMSGVMGGKAHVSASANKLTLTQKFEMSTIVQAKYEQLTSQLSVEDKLKYSEAFDSFSESGSYTYDILGGSPSLGALVFKNNSQGSSDDNLKNWIQSISSMPVLTKFIDQTSLMPVWLLCEDKTKADALKKY +>sequence29 +GSFSASTGYKKFINEVSKRTSKTYFIKSNCIKYTIGLPPYVPWEQTTAYMNAVGILPKEFTGLNEDSCAPDVYEQKKMTKQCKNVHQWIQFFKTYGTHIIVEAQLGGKITKIINVSNTAVNQMKKDGVSVKAQIQAQFGFASVGGSTSVSSDNSSKNDNSSYDMSEKLVVIGGNPIKDVTKEENLYEWSKTVSSNPMPIHIKLLPIYKSFDSEELKESYEQAVLYY +>sequence30 +KYNINRLLCYPAEIAITTFNMKEGIIYSDSKFVEFDERWAFGQDERDHRTMSERVNENEDLDELMHQLSSTIGIDHLSTDHNPESPFGVFEWLRSRIDIYPYAKILVDMNQFRFVYNGLKNIAKYHGFTGQTYFNENIKFNMVSIQDFTDVLLDYCSLLVARRWSDQDINNQYLRPNLVPNRDKNTICEYHETVPCPTRYNCMKAHNSRLVHHFFTIMKAHRLQNFRYSPPVHEPCIEDM +>sequence31 +NGKFSTENQRMKIHQVKDSSVTTRVQIRNFIYKVKVFPDFSLDVRFAQQAKEIADAIENNQSRYADYLSERMVMDFGTHVITSVDAGASLVEEDYLNSKYVSDNVSQSSSISAQAGLNFFDKLKFDISSHNSQQSSTLQGYQSNIRYSLIQSHGGGIPFYPGMTLQKWQESTRNNLVAIDRSGLPLQYFISPNMLPDLPQPTVRKVSHLVRSAIERY +>sequence32 +ISLGINHELDQFHQEITQNNKAVSVSQSYWAQYSLTTAPAFLMPLNPMFKQSLDALNRMAKEPTTDTQQTIYNQVINSFGTHYVTSAIMGGAAKIYTTLDQNYLKTVDIEQTKTQIGINFSYNVFQFKFGFNSTDLAQKLDENFKKNSNDIIIFSPEVDHISDPKAWSTWESTVPEKPQPVNTTVSYISDLAYEFPEVQAHLRKTIEFY +>sequence33 +KAVNTDIYIPAEFSACEFSLKTGVNSLYSTMIDPSQLIFGQTCDAMLYAAATHQLPLPPAALGESKMTKLYHSIQDYLRSRLERTDKNLKSLVVFTKTDDIDMVKSCFRFIKSGYHDEQSKRYDDDNDEENDQFKFFEAAASKFLPIVVYDIQYLFLALKLAAMDIGGLTLPKPNLYITDAFFSRDFYEFQDGIACWFHEDMDRSKYCTQSKVKRWAYTFCDYMCADLAIKMQPGKHMPPSYKA +>sequence34 +KALTGDIYVPAEFSACRYSLKGGISSNYSTMINPGHIIYGQSRDAQDHSKTTHKLPLPPQAFGETNMGKLYIDIFNWLSVRNEEKLDQDPVIVYTTPELMPVVKSCFRYLASEAEIDEDERKIMVFDIHHLFYTLKKSVLDVAGVTNDRINFHVTNNFFVKDFFEYTEGISCDYHEKIDRSKYCTNSMVKRWGFTFSDYMCADLAIPLQPGKHIPLKVKP +>sequence36 +NSFTGSLEYKNALMNFKSKRQKIYNKTEQCVRYQVGIPLNLKWGYTEYFNRTLSRLPILSSKVIKNCNIDNKLNLSDEECKSIKPWIKFFEVFGTHFNNQLTLGGKINQTMVFDSSTLEELKKKGIDIEAEVRTELGSGNVKLNLDMGGKKSRLDEIGQKKMSVLGGKMPNFPMDDNEFAHWAETVAENPMPIGVVSTSLKTLMHPAMHQSYDQALHQY +>sequence37 +VNGKFSTEFQRMKTLQVKDQAVTTRVQVRNRIYTVKTTPTSELSLGFTKALMDICDQLEKNQTKMATYLAELLILNYGTHVITSVDAGAALVQEDHVRSSFLLDNQNSQNTVTASAGIAFLNIVNFKVETDYISQTSLTKDYLSNRTNSRVQSFGGVPFYPGITLETWQKGITNHLVAIDRAGLPLHFFIKPDKLPGLPGPLVKKLSKTVETAVRHY +>sequence39 +SSHNSAFKQAIQASHKKDSSFIRIHKVIKVLNFTMKTKDLQLSDVFLKALNHLPLEYNAALYSRIFDDFGTHYFTSGSLGGVYDLLYQFSNEELKNSGLTQEEAKNCIRIETKKRYFIVTKTKVEHRCTTNRMSEKYEGSFLQGSEKSISLVKGGRSEYAAALAWEKGSSGPGEKTYSEWLESVKENPAVIDFELAPITDLVRNIPCAVTRRNNLRRAFREY +>sequence41 +YSGYNNDEYTHDDMLHNLNKHNKLLIKSYKCIVYKANLTSLNFLKNKNNDEIGLNFNGMLILNVLKKLNKNCNSEFDNQKCPISMFRNDPFDANCIRCIMPWMEFFKDYGTFMTKEITMGGVINKFYNIKKYEGSMRKEYKKKTIKQSSTFFHLSKSRSESLNEKKSGETNKEELEELYTLTIGPEPPGNVSNSKVISDWLEKVVHNPTPIDLELVPIKQIIPEKYLKIYENALKYY +>sequence43 +SGSRESAFLNKLSKYNEKKYSFIRIFTKVQTASFKMRRDNIMLDEVMLQSLMELPEQYNYGMYAKFIDDYGTHYITSGSMGGVYEYILVLNKENMTKSGVTSDDVTSCFGGSFGIDYDYTDNLQITGSLSGKHCKKLGGGHREDEESNMAVEDIISRVRGGSSGWGGGLTQNGSIITYRAWGRSLKYNPAVIDFEMKPIYEILRHTNLGPLEAKCQNLRRALDQY +>sequence44 +KALTTDVYVPAEFSASEYSFNEGIMSVYSTLIDPGQIIFGQGSDAQHHSSTTHNLPLPPNALGEKNMGKLYRNILEYLSKIQEGKDATKPFVVFTKTDMVPVVKSCFRYLACENQDGSYENGDQIQVLDIQYLLFILKKEVLDIAGVSDEKINLYVTDAYFLKDFFEFTPEISCQYHEENDRSKYCTQSLVMRWAYTFSDYMCSDLAISVQPGKHIPPKTKP +>sequence45 +SVAGSHSKVANFAAEKTYQDQYNFNSDTVECRMYSFRLVQKPPLHLDFKKALRALPRNFNSSTEHAYHRLISSYGTHFITAVDLGGRISVLTALRTCQLTLNGLTADEVGDCLNVEAQVSIGAQASVSSEYKACEEKKKQHKMATSFHQTYRERHVEVLGGPLDSTHDLLFGNQATPEQFSTWTASLPSNPGLVDYSLEPLHTLLEEQNPKREALRQAISHY +>sequence47 +KALTTDVYVPAEFAACEYSLKEGIRSIYSTMIDPGQIIFGQGSDALLHSSTTHDLPLPPNALGEKNMTKLYRNIVDYLSKCQGKGKTLVVFTPAENITMVKSCFRYLECDDDFRDGGEKIQVFDIQYLLFILKKEVMNVADLNDEKINKFATDAFFKKDFFEFTAGIACQYHEDNDRTKYCTQSMVTRWAYTFTDFMCGDLAITVQPGKHIPAQTK +>sequence48 +AKFSLSTNYSEISDLLKNNDNKLYVDKSYCFLLEAALPIHNSLKMTRSFATAMSKLTRDFKKHTKDCNAIKYSINKNNKDCKEIKNWMELFDQFGTHFSYNIKLGGRITFITQEEGSKDERGNEKSVDVGVGGKFEKDNKGVGIEGNVKFVFGNKRGESKNLSFKYTNILGGLPVSDISKESEYVKWIKSVYKYPMPIRTQFAPISKIFKSKALKDSYDEAFRFY +>sequence49 +GSFSASVGYASASNTISKKKFRMFILKSYCFKYVASLSQYSQWKLSDQFLRAINLLPSYFNSLEHDGKYCNAEELRDNKTGMDSCGKSVESWLYFFKNFGTHVSTVIHLGGKITQQVKISKNEYKSLSESGLSTSVSASVGFGLFKANASSSTDSKESSNEESSNSSIEKETVIIGGTTIYDPNDPSNFEKWADSIKNNPMPIKGQYEPLSRILPERLTKIYDEALSFY +>sequence50 +NIDGECMLAEMAMNEFSLFSGIVEKFHAIVGPWMPESESHRRRASRHALETHRIPLQNNFATITKKRLVEEILGRVEPSIACHQGVKVGLYSDACNEKTKIDLNIKNNFKDPGMLCDKNDRRFILVLQSELDLMVDSMKHLANNVGFHYDGFPVTPNCFVIVEAFVEAISDIMNEKIDVETMRWFSLLGQKVDAEDSVSPWETGTDFHCARHSEPKSNFCASVTVGRTCCIVYHVIGSFFRRYHLKKIPTAHQPSSSNS +>sequence51 +KALNGDVYQPAELSACRFSLKGGISSNYSTMINPGHIIFGQTSDAQDHSRTTHKLPLPPNAMGEKNLGNLYSDTLKWLSASNDEEDEQYDHPVIVYTTPELMPVVKSCFRYLACEGDTDKHAKKIIVYDICYLFLTLKKTVLDLVGVPSDHMNIHVTNSFFRRDFFEFSSGIACDYHEEVDRTKYCTKSMVLRWGYMISHYICGDLAIPLQPRKHVPIEVKH +>sequence52 +RTNTGVHLPAELAVVRYSLEGGVKDKLHMFINPGRLPIGMAYDAQRHAEEDHQLPLPPNAMGVSDYGDVAMRLFSFLLQNDDMPLLFTDETDVPRVESMLEHILSDHLSEIELRICPLAELFFRLKQNVELYMMDQTTFPSVYIAQQIITKDVYDYTKGISCDYHEEKDNVLYCPLSRCIRWAYIISDNCCQDMGIEPIPGKHVPLNANT +>sequence53 +FSGSLTCEFVKKSTQHAKNTVTCSTAAHSLYTLKEDDSSNPSEKRLDSCFRNWIENKLSANSPDSWSAFIQKFGTHYIASATFGGIGFQVLKLSFEQVEDLHSKKISLETAAANSLLKGSVSSSTESGYSSYSSTSSSHTVFLGGTVLPSVHDERLDFKDWSESVHLEPVPIQVSLQPITNLLVPLHFPNIGAAELSNKRESLQQAIRVY +>sequence54 +WAFTASSEFNHMQQKIEQTSATFVISMATCQIAQITQVPELAEFHQSFIDQLSALPVEYSAPQYLEFLSNFGTHYATDIILGSKVGYVYTLPPAIVDDFDQKKFKEIDLKQAATITSALLKGVIGQQILPKEQEAKAYSDVSKLSTQSFTIEIGPQSTENTPKDWLRETELEPTPIRYTLKSISELVSEGKGQLSSVKEYQKIGQNLKKALTDY +>sequence56 +VTSGGTYIPAEMGLVRYSLKDGVMDKLHMFIDPGKLPLGMAYDAKQHSESDHQLPIPPDAKGEKDNDEIILKLFSFLSQQEKMPPLFTETNDIRMVENILKGILNQGSMDENTLLVCPLSELFYQLKRATESFGLDIKTFPSVHIAQAIIQKDVYEYTKDISCEFHEDQGNGKYCPLSRCVRWAYIISDSCCLDLSIEMKPGRHLPMNADT +>sequence57 +KSMTNDIYIPAEFSACQFSLKSGICSMYSSHIDPGQLIFGQGSETMHHTKHTHQLPLPPNAMGESDIGRLYANIVEYLRACNPDAKPNDPLVVFATPEFMPIVKGCFRYLESDSEEPLATIHIYDIQYLLYVLKLEVLDSVDIRNVTVNRTATDSLFINDYFCYHLGISCQYHEDIDRCQYCTQSIISRWCYVFSDFMCGDLAITPLPGKHMPPKQEP +>sequence58 +SSSRSYTSHTNEIHKGKSYQLLVVENTVEVAQFINNNPEFLQLAEPFWKELSHLPSLYDYSAYRRLIDQYGTHYLQSGSLGGEYRVLFYVDSEKLKQNDFNSVEEKKCKSSGWHFVVKFSSHGCKELENALKAASGTQNNVLRGEPFIRGGGAGFISGLSYLELDNPAGNKRRYSAWAESVTNLPQVIKQKLTPLYELVKEVPCASVKKLYLKWALEEY +>sequence60 +KDACTPAELAVVQFTLKHGMRNIYHTLINPDGSQYATQEHVRATHQYPNALGNDDLEGILADLLEFVRLECGPEAELSPMFTLESQISVVNNALEFLNGGVASQLKVHPIEYLFYVLKKATCAAGILPPPASFHITNAQFNLDPHEFLSDIGCEFHKQRDLTAHCAKSYVTRWAFAFADYMCSDLAIKMLPNRHMPNRLDT +>sequence63 +IDLANEPLYREAVKASQQKDSVFYRVHQVIATSTFKVKSSDLYLSDPFLQFLNSLPLEYNYALYRHIFQLFGTHYFSSGTLGGKYDLLFQFDREELKTFGLKESDSEYCLSDDDTLVTFFYNRHKQRNTCGNISMKTKYEGSMVKASERCITSVQGGRTEFAAALAWEKKGVSPQSTVYTDWIKSTIENPVVINYELLPLVNLVRGISCAVTKRRHFHRALEEY +>sequence65 +HPFNDSNYYKMLVKRINRGDSIIIEKKLCSKYFSFINDINKNDLDTFFLTTLNELGDNYQNIKDDTYKCSLQYYKMNNMNKYSENCLKTITPWISFFNMYGTHVISGVYYGGKIIHNLYFENNNLKKKEYKIRMYKSRLNPFSTINSNLYFGSSLSKEKIIYIRERNLIMDGGVHINPYNINEVNMENKKKNIYVNNVEKNLYDQKKKYRNYYNFYELKDDVRKRNYYNSWKDTIEWEQAKPVKLNLVPLSEFINSEEGKSAYYMALEFY +>sequence67 +CEQRFLPCEIGCVKYSLQEGIMADFHSFINPGEIPRGFRFHCQAASDSSHKIPISNFERGHNQATVLQNLYRFIHPNPGNWPPIYCKSDDRTRVNWCLKHMAKASEIRQDLQLLTVEDLVVGIYQQKFLKEPSKTWIRSLLDVAMWDYSSNTRCKWHEENDILFCALAVCKKIAYCISNSLATLFGIQLTEAHVPLQ +>sequence68 +GKFSEENTRMKIHQVRGNSVTTRVQVRNHLYTVNAYPDFTLDSRFSQQISELADAIENNQTRQAMYLSEKVILEYGTHVITSIDAGATLVQEDYIKRSYVSDTNSERSSVSASAGINFFNMVNFNFGSKETEQTSETLTYQQNITYSLVQSHGGALFYQGITMQKWQESTQNHLVAIDRSGLPIHYFLNPAVFPDLPVPTLHKLAFSVQKAAERY +>sequence70 +GMFNNMFAFSKCWPKDASSVKTLAYDGWFISLYSVEIVRKQLTLRDEVKREVPSSWDSAALAGFIEKYGTHVVVGVTMGGKDVIHVKQMRKSNHEPEEIQKMLKHWGDERFCVDPVESKSPASVYSGKPKEENLLQWGLQPFGTSVSSAVVMHTKNEEIMRVCIRRGGVDLGQSHERWLSTVSQAPNVISMCFVPITSLLSGLPGTGFLSHAVNLY +>sequence71 +GNFNATFGFQSGSWATDAANVKSLGLDASVVTLFNLHIHNPNRLRLTDRVRNAVPSSWDPQLLARFIERYGTHVITGVSVGGQDVVVVRQDKSSDLDNDLLRHHLYDLGDQLFTGSCLLSTRRLNKAYHHSHSQPKFPEAFNVFDDKQTVAFNNFSINSQNGITVICAKRGGDGRAKSHSEWLITVPDKPDAINFNFIPITSLLKDVPGSGLLSHAMSLY +>sequence72 +KPFSASMPYKSYFADLEIKKKKYALAQNMCVLNYATYDLKESGNNINKDFVLDIEKLPILTKNQMKLCTKVLYMNNNLHCSEGIKSWMKFFEKYGTHVVLSAHFGGMSFNTMEITKRKIEEIKIYKYKYSLWNNPYLNIFKSGSLFQDLSINVDGHKENKKNNSNNNINIDEKKKNDAYIKNDVLIEQYRDNINLEIRGGNNFDEKWRNLTYLVWKNSIYSNIVPIHLDLYSLNTFMPIEKKESYDMALLFY +>sequence73 +EFSAEFMFLNNISKYTNKEMGFVQLMSKIQTSQFKMRSKDLVLDEDMLWALSDLPDHYHFGAYSQFFNEYGTHYVTEGTMGGLMDYVAVVNINEMEENQMTGQMIGSCIGGSFGLVFMEKIKATVKGKSCGKFTSNEKTSDESHSAIKDVFGFVKGGNTASSAGSLGIKDAKSYKDWGKSLKYNPALIEFEILPIYELLRLSTAAEQLSSKLPHVKMAWEEY +>sequence74 +ELPDDMGYMPCEIGVVEYSLQEGITREFHRFIQPGKPPLGYRYLCQSTSDNTHQIPIEGFELAEGDYHRLWTDLCKFTSPNGRDFPPLYVQVTHTSMCEWCLDWLSEMAGEYNRFHVYELDSLVKDLYEHGEGHAPSLSMIASILNTSVFDYEDGSSCEYHASKEVKYCALGAVKRFCFSISDSMAQVYDLQLTARHLPERPEN +>sequence75 +FKFSASAKFKKLQDVSKSGKSKMFINKSYCFKYVAGISTSLKWDFTLGFQSSLGRLSDFKGLEKDSICKPFIYREDPKNENCQELGISDWMELFNTFGTHVATKIYLGGKIFTTLEIKKSQEKKLSDQGLDVRAILSAKIKDTDIDSNVEVSTIKSKNAGDFLLDTKKSTFVLGGDIYGHGKTIEFAEWARSVADHAMPIKAEFTPISHFIDKNLRDAYNKAYLYY +>sequence76 +HPFNSSNYYRMLVERIEKGYSIIIDKKICSRYFVALKNVDSSKLDPFFINMLNDLEKNYKNININKYKCSVHSYKKNKYDQNCLRTITPWITFFNLYGTHLVSEVYYGGKIINILYSEYYNNIYNSEQVQIYKKRLNPFTSGSKLGSFYFGSIISKKQNSTNQKDNDNMLTYIKEKNTIYDGGEDIKEYKDGEGKVLMINGMEDEWEKTINGKYAKPIKLILKPFSDFIKTNDGKVAYYKALEYY +>sequence77 +GRFRASVDYQNMQNDMASGTYQYIVSNSRCSVFQLDLIDSPTYHPQFSNDILLNLQQLALNQNNANNTEANAYYDFFDNWGTHVVTSVDLGSLFGYKFKMLKTDVQSMQNQGIDVSASATLFNVRGRTNTQLEQNSLNSFSQSIQSWTSYSIGATPDLNNDPANWATQTLTKPMPIKSSITPYHEALKIFTQGGNNILSSTQILQLYSKLRMY +>sequence79 +FLGEIENRFDMSDDKSSKRTNEYISYDINNTLYRITLKGNVPLSEQFQEDLNSLDATTLFEKYGTHYLKSTWIGGRISFSTTIDTYGMTDDMRKKFAFVTKRKVGNWTGTSDVELTREEKDISEKMKSNSIVRVWGGDPKLGRDIERAIQGHTVSDIYQQWGQTVEERPYISDFDHGQGLVPIYELATGTRKEQLKEQWEAY +>sequence82 +GKFSASSDYQEVQDGLNSANIQYIESQARCSIFQLDVYNSPSQNAQLTPQLQQALFTLAFNQTSQNDYYDFIDTWGTHVVTSVNLGSRFGYKYQMDKYQSNQLTQQGVNLSVSASYFSSSGSASGAYNQTQIQNFTQAMTSWSSYSIGATPDANQDPLSWAQQTLDTPMPINISILSFDDFLNKFSFSVNGLTSSQLNTVISNLSQY +>sequence83 +SQSRMTHEVIESAQKIDSKYFKVVNTVELAQFKMRRNGLNPSDIFLRRMKDLPVYYNYLDYSFLIEDFGTHYFSSGSLGGQYEYVYRYSRADLSHSGLTEEEQKSCLSAEAKASFFSFSGSSSGSRCKENALSQRNSGSFTLSASESFSHVKGGSSESAGQLAFANGPNPQKYEAWIQDVKRNPAIISYEITPISELLVGIPYADIKRRNMEKALVEY +>sequence85 +YYPTEDKYFICEIAIAAVSLKNGVEDVFHRIVKPGKLPLGYYGGALTHSKETHQMLELVQDEPYENNTREVFNEMTSFLKLWRGKGSDSIVYADEKTHEMITKVIDNFCQEFNYPDEIKVYNFQYLFFALRNSVAARTVWPTETYSSTELEKDLYSYTPDISCEFHEMSDISVYCSKSIVTRYCYTLCDHCCTDLNIQLVAGFHVPKNSRI +>sequence86 +ISAEVKSKFSKESLDVKVGKEVYLTSSVSVPRLEFCINPLKVKLSDEFYSKLNNVETHGELIKVFKEYGEFYPKRYILGGMITNHETQKFTTIENLESKLLSLSAGVNAAIGPVKVGGSVGGESATDEKKSKQNEENSSKKDVIGGDPSKTGSEWVSSLSDINNWGIIGIDVYPIMDLIKKNDNTLYKKLEKIKNS +>sequence87 +KTSTEAFVPAEIALIKYNLELGVLDKLHELINPVRLPLGLAHEALTYSEQTHELPTPPNAMGETDFYTVLQKILSFTDYNSKPHKKLAIMTDAKEVPVIESLLSQLNDDVKLEYQFLVIPLGEFFFHLKRATEKYGLDICTFPTKTVADILLKKDAYEYTSGIACDFHEKLGNQRFCALSKVVRWSYIISDNCCLDLSIDLIAGRHLPSNADT +>sequence89 +GFFNAMFEFTGCWQKDASITKSLAFDGWCITLYTVALSKAHIILKDHVKQAVPSTWEPAALARFIKKFGTHIVVGVKMGGKDVIYLKQQHSSSLQAVDVQKRLKEMSDQRFLDANGHSDISLADSYAKDNKVEAREQRLRFVESNPLNSYSSNEELVMMPKRRGGRDKDIISHSEWLNTVQAEPDVISMSFIPITSLLNGVPGCGFLNHAINLY +>sequence90 +SFSASTGYRDFAKEVSKKDTRTYMLKNYCMRYEAGVAQSNHLKWNVTLAFAAGVSQLPDVFDAHNPECACSAEQWRQDQNAEACTKTNVPIWISFIEQFGTHFLVRLFAGGKMTYQVTAKRSEVEKMRNMGIDVKTQLKMQLGGVSGGAGQGTSSKKNQSSSEYQMNVQKETLVIGGRPPGNVSDPAALAAWADTVEELPMPVKFEVQPLYHLLPVEKQEAFKQAVTFY diff --git a/challenge1.txt b/challenge1.txt new file mode 100644 index 0000000..dd14e90 --- /dev/null +++ b/challenge1.txt @@ -0,0 +1,89 @@ +# Problem 1: +# +# align each of the reference files with muscle into .align file +#NOTE:all the files must be in the same place for convenience or else need to specify +#the path to each file. Here all my files are in the current working directory + +$ ./muscle3.8.31_i86win32.exe -in sigma.ref -out sigma.align +$ ./muscle3.8.31_i86win32.exe -in sporecoat.ref -out sporecoat.align +$ ./muscle3.8.31_i86win32.exe -in transporter.ref -out transporter.align + +# make HMM profiles from each alignment with hmmbuild +$ ./hmmbuild transporter.hmm transporter.align +$ ./hmmbuild sigma.hmm sigma.align +$ ./hmmbuild sporecoat.hmm sporecoat.align + + +# +# run hmmsearch with the --tblout option and use the +# index variable to specify a unique output file +# do the hmmsearch step 3 times, one per HMM profile + +./hmmsearch --tblout Rosigma.hits sigma.hmm Roseobacter.fasta +./hmmsearch --tblout Rosporecoat.hits sporecoat.hmm Roseobacter.fasta +./hmmsearch --tblout Rotransporter.hits transporter.hmm Roseobacter.fasta + +./hmmsearch --tblout Rhsigma.hits sigma.hmm Rhizobium.fasta +./hmmsearch --tblout Rhsporecoat.hits sporecoat.hmm Rhizobium.fasta +./hmmsearch --tblout Rhtransporter.hits transporter.hmm Rhizobium.fasta + +./hmmsearch --tblout Lsigma.hits sigma.hmm Limnohabitans.fasta +./hmmsearch --tblout Lsporecoat.hits sporecoat.hmm Limnohabitans.fasta +./hmmsearch --tblout Ltransporter.hits transporter.hmm Limnohabitans.fasta + +./hmmsearch --tblout Csigma.hits sigma.hmm Clostridium.fasta +./hmmsearch --tblout Csporecoat.hits sporecoat.hmm Clostridium.fasta +./hmmsearch --tblout Ctransporter.hits transporter.hmm Clostridium.fasta + +./hmmsearch --tblout Bsigma.hits sigma.hmm Bacillus.fasta +./hmmsearch --tblout Bsporecoat.hits sporecoat.hmm Bacillus.fasta +./hmmsearch --tblout Btransporter.hits transporter.hmm Bacillus.fasta + +./hmmsearch --tblout Asigma.hits sigma.hmm Arthrobacter.fasta +./hmmsearch --tblout Asporecoat.hits sporecoat.hmm Arthrobacter.fasta +./hmmsearch --tblout Atransporter.hits transporter.hmm Arthrobacter.fasta + +./hmmsearch --tblout Vsigma.hits sigma.hmm Verrucomicrobia.fasta +./hmmsearch --tblout Vsporecoat.hits sporecoat.hmm Verrucomicrobia.fasta +./hmmsearch --tblout Vtransporter.hits transporter.hmm Verrucomicrobia.fasta + + +# +# cat all of the hmmsearch output files +# remove the comment lines (grep -v "#") +# use cut or awk to get the columns you want (1st, 3rd, 5th) + +cat Vsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Rhsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Rosporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Lsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Fsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Csporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Bsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Asporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table + +cat Vtransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Rhtransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Rotransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Ltransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Ftransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Ctransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Btransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Atransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table + +cat Vsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Rhsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Rosigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Lsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Fsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Csigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Bsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Asigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table + + + + +# trim the 1st column contents to just the last bit that is a "species" code +cat transporter.table | awk '{print substr($1,length($1)-3,4),$2,$3}' >> hmmOut +cat sporecoat.table | sed 's/tr|A6BZD2|A6CT85_9BACI/BACI/g' >> hmmOut +cat sigma.table | awk '{print substr($1,length($1)-3,4),$2,$3}' >> hmmout \ No newline at end of file diff --git a/motif1.fasta b/motif1.fasta new file mode 100755 index 0000000..d982f8f --- /dev/null +++ b/motif1.fasta @@ -0,0 +1,22 @@ +>sequence2 +MSLKPFTYPFPETRFLHSGSSVYKFKIRYGDSIRGEDIENKEVIVQELEDSIRVVLGNLDSLQPFATEHFVVFPYKSKWERVSHLKFKHGEIVLIPYPFVLTLYVAKKPRVZEDELKWFNENLSTGKPIDDSPLGLVPAERKAARAMKKKRKRMELSVSPSRPGLDRAKMRTSSQGPSKKKFLMETSRNMERNTQQKCQETPAFDGTDVQEQGSRWEDNLAGEITPPVQQSNPPPPAGPTDLGTSGFFGF +>sequence11 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRAKIGISSQSPSKKKPLMETRRNREGKTHQEWQETPAFNITDVQEQDSKSEDSPAGQIIPPLQQNNPLPPKGPTELATGGFFGF +>sequence12 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIQDKEVIVQELEDSIRVVLGNLDNLQPFATEHFVVFPYKSRWERVAHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWIHEDLSPGKPVNDCPLGLVLPERTAAGAMLRKRKRGQVPSSPGRPGLDRTGKEKPSRNGRRLQRLISPMSRTRVGSGNREGCQGRLSHQCRRTIHLHLKDPQSWEPVASLGF +>sequence35 +MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSVRAEEITDKEVIIQELEDSIRAVLGNLDNLQPFITEHFIVFPYKSKWERVSHLKFKHEEVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLPPGKPINDSPLGSAVAEKKAAGDAGKKRKLVEEHGSPRGTALPRSVAEGKAESQSTEATLKKDQNRKKTQQETWKTVTSDTTDVQTQDSKRGHNLPGAMVPALQQSSSPPPQEPGTRSFFGF +>sequence38 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRTGKEKPIRNGKRPQHLISLMSRNRILSQRTAQQGRSFPHCSKTIHFHLKDPQSWQLVASLGF +>sequence46 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIENKEVIIQELEDSIRVVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEIILIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKPISDSPLGLVPAEKKAVGAVMRKRKHMDEPSSPSRPGLDRTGKEKPNKDCRRLWPLISLVSRNKILSGGTACQGQLSHPCSTTHLHLRSEQPAASLGF +>sequence59 +MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSIRTVATLLKSFQIYLFQDSIRAVLENLDNLQPFVTEHFIVFPYKSKWERVSHLKFKHEDVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGRPINDSPLGLVVVEKKAAGASKKQKRKLVEQHSSPGGARQPRDKMRSSSQRPSTKKPPMGTRRNRERKPQQERQKTVASDTTDVQEQHSKWGHNLPGAIVPPLQQNNSPPPKELGIRSFFGF +>sequence64 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIEDKEVIVQELEDSIRAVLANMDSLQPFVTEHFIVFPYKSKWERVSHLKFKHGESILTPYPFVFTLYIAKKPRVZEDEMKWFAEDLPSGKPADDIPLELVLAETEAEEATMRKWKRKLMEEPSSPSRQGPHRAKMETSSEASSNKKPLKESKRSTDEEAQQEYQDTPASNAIAVKEQDAALGHGLQGLVVPPLQHSSPPPPKEPGARGFLGF +>sequence66 +MSLKAFTYPLPETRFLHAGSSVYKFKIRYGSSVRGEEIEDKKIVSQELEDSIRAVLGNLDNLQPFTTDHFVIFPYKSKWERVSHLRFKHGAALLEPYPFVCTLYVAKKPRVZEDEMKWAPAGGNGGPTNSAPLHLHKTQKEQDRPGTETSRKKEPPAPPSRGGERRTSLEQSWKELADSPELLLQLTRNWTGESASEKGEAEDSDISFLKDHGSGSSLRHQQKSPPKPSSPPSEGPPKQKHAGFLGF +>sequence78 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVRELEDSIRVVLGNLDNLQPFTTEHFIIFPYKSKWERVSHLKFKHGEVVLVPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKLINDSPLGLVSAEKKSAEAMMRKRRHTEVPSSPRKSGRFFPHLRAKVETSSEAPSKKKPPMETRRTWNDNEQQETPAFDATDVQEQGPKWGDSLAGQMAPSLQWNNPPPPKGPKELGTTGFFGF +>sequence81 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVQELEDSIRAVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEAVLVPYPFVFTLYVAKKPRVZEDEMKCFHENLSPGKSMNSSPLGLVLAERKTAEAVLKKRKRGEVPSSPARPGLDRAEMGTSSQGLSKKKPPMETRRNRERKTQQECQKTPAFDVTDVQDQDSKWEDSLVGKTIPPSQQNNPPPAEGPTELGTSGFFGF diff --git a/motif2.fasta b/motif2.fasta new file mode 100755 index 0000000..94d29da --- /dev/null +++ b/motif2.fasta @@ -0,0 +1,26 @@ +>sequence1 +DEFIALMHGSDPVRVELTRLENELRDKERELGEAQTEIRALRLSERAREKAVEELTDELEKMFEKLKLTESLLDSKNLEVKKINDEKKAAMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKQKEAALLDAERTVEIAMAKAAMVDDLQNKNQELMKQIEICHEENKILDKLQRQKVAEVKKLSLTVKELEEAVLRGGATANVVRDYQRQVQEVNDQKKTLECELAAQWWRNYGGARAKVTANRVAVVVANEWKDSNDKVMPVKQWLEERRFLQGEMQQLRDKLAVAERTARSEAQLKEKYQLRLKVLEDGLRGPPSGSSRLPTEGKSFSNGPSRRLSLGGADNMSKLSPNGLLARRSPSFHSRSSLSSSSSLVLKHAKGTSKSFDGGTRSLDRSKINGNGAHLLNRSTDAVRDCETNDSWKGNADEGTIENTNSNTDESNKETANNKSAEMVSGFLYDMLQKEVISLRKACHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQEVKARRLGSSKGTGSSQV +>sequence22 +LAHSDPIVLEFNRLQNQLKEKDRELGVASSEIKALRATIVLKDKALEQFRNEVNKLDERLGVIENLLKQKDLEIKKLTSEKKDALAAQFAAEAALRRVHANQKDDDTVPIEDVIAPLEADIKMYKIEIGRLQEDNKALERHIKSKESALLEAERILRSALERALIVEEVQNQNFELKRQIEICQEENKILDKTNRQKVLEVEKLSQTIQELEEAILAGGVAANAIRDYRRQISELNEEKRTLERELAAQWWRNYGGARVKVSANRVATVVANEWKDENDKVMPIKQWLEERRLLQAETQRLRDKLAISERTAKAEAQLKDKLKLRLKTLEEGLKQVSSFSENPYLSCRSPKPEKSNHILGFLSGNGGLKRRSTSQPRGSCIGKTSPLMPPNVENGAADAAGELKGVNSLKKKYASGENMLRKSLWASRSKVADIGGKENTEMKSNTDMHIDKFNNDTAVSADAKIKGGAKEETQNVGSAGFDSEDMVSAFLYDRLQREVINLRKSCEVKNNTLTAKDDEIKMLMRKVDALSKAIEVESKKIKREAAAREKEAISTKADENKKIRNTDSSKRRVA +>sequence24 +SSDPIVLELNRLENHLKDNDRELGIAHAEIKALKVTERLKEKAVEELNDDLKKLDEKLRFTENLLEDKNLEIKKLVSERRDALAAQFAAEATLRRVHANQKDEDYIPLDAVLAPMESEIRMCRNEISVLQEDKKALERLTKSKELALLETERMLKIAIERALLVEDLQNQNLELKRQIDICQEENRILDKANRQKVAEVEKLSQTIHELEESILAGGAAANAVRDYQRQILEMNEERRTLERELAAQWWRNYGGARVKILANRVATVVANEWKDDNDKVMPVKQWLEERKVLQGEIQRLRDKLNVSERTAKAESQLKDKFKLRLKTLEEGLKQVTTSSPNTEGSHLKQTVKPEPVLGYLSSNMGPRKRSQSQPRASFNAEQSTVQQRPNVTSENSNSNRTLEHVNSLKYKYISGKNLVKKNLWAPRNKLVDDVGKENSERKEDVGLEEFASVGPEVSKDFSAEAHSMQSTPEKDDLNVDCEDIVSGFLYDKLQKEVLNLRKSSQEKDGLLTAKDEEIKMLVKKIDTLTKAMETELKKMRRESASKERELTPRRVQKDPLHKSSTMIISKRAVKSV +>sequence28 +EDIIHLLHGSDPIKVELNRLENEVRDKDRELCEAHAEIKALRQTERLKEKAVEELFDEREKLQEKLKAMEIALENKNLDLKRTNDERKSALAAQAAAEATLRRLHASQKDEDLLPLEAILAPVEAELKSTRNDFLKLQDDNKALDRLTKSKEAALLEAERAVQIAEAKASLVDDLQNRNQELLKQIEICQEENKILDKMHRQKVAEVEKLSTTVAELEEALLAGGAAANAARDFERQVHHLMEEKRTLERELAAQWWRNYGGARAKVTANRVAVVVANEWKDANDKVMPVKQWLEERRFMQGEMQQLRDKLATTERTARSEAQLKEKLQVRLKVLEEGLRTSTNGSTRKHDDFLRSGTNGASVRRQSTGGSDIGNGVARRRPSMSSASQMRGSVSGSTILKNGKFGSKAFDGSKSLDAGRFKAYANGCEEPRKVSSAASGAGGGGGGGGGGGDVKPEAGKVEGATVAAADDNVSVLFYDMLQKEVVTLRKLGHEKDQSLKDKDDAIEMLSKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQENRARRLSIAKGSVNSSHA +>sequence40 +EDLLNLLHGSDPVKVELNRLENEVRDKDRKLAEATAECKVLKQRERLREKAVEELAEELDKVDEKLKAAEDLLESKNLELKKLNDEKKAALAAQFAAEATLRRVHAAQKDEQLPSIEEILSPLEAELKIARQEIAKLQDTNRALDRLTKSKEAALLEIERAIDAAEAKASQVDDLLNRNQELMKQIEICQEENKIMDKMHRQKTAEIEKLSSTVAELEEAVLAGGAAVNAARDYQRQAHELLEGKKTLERELAAQWWRNYGGARAKITANRVAVVVANEWKDANDKVMPVKQWLDERRFMQGEMQQLRDKLASAERTAKNESQLKDKFQMRLKVLEESLKPVTNGAPRRTEEVRSSSTTRRSTSGSEEASKLLANGSRRQRSAVTQVRASMASQTLMRATNGRMTSKSFDGGRSLDAGTTRLRAFSNGFEEVPVKPDSVEAKSEVEAVKSENGTTNQVSGSSSSVEDPVSGVLYDLLQKEVVNLRKASYEKDQSLKDKDDAIEMLSKKVDTLSKALEVEGKKMRREVQAMEKEVATLRAEKDQTRNPRRLSSGTGTVNSSSK +>sequence42 +NEFITLLHGSDPVKVELNRLENEVRDKDRELGEAQAEIKALRLSERLREKAVEELTDELSKVEEKLKLTESLLESKNLEIKKINDEKKASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNKALDRLTKSKEAALLEAERTVQVALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNEERKTLDRELAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERRFLQGEMQQLRDKLAITERAAKSEAQLKEKYHLRLKVLEESLRGSSSNTRSMPEGRSTSNGPSRRQSLGGADNFSKFTSNGFLSKRTPTSQLRSSLSSNSVLKHAKGTSKSFDGGTRSLDRGSRALLNGSSPNCSFNQPCDETKDTEAANMWKGNSDEKPVEFPVTETEDTVPGVLYDLLQKEVVALRKAGHEKDQSLKDKDDAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEHENRAKRFGSSKGPVGAAQL +>sequence55 +DLMNHFNGSDPVRLELTRLENEVRDKSRVLAEAQAEIKSLRLSDRQKQKAVDELSDKLEKVDEKLKGTLILLDNKNLEMKKLNDERKAALAAQTAAEATLRRVHASQKDNDMPSLEVILAPLEAELKIARDSAVVVVTLQISKLQETNRALDRLTKSKEAALIESERVIKAAEAKASMVDDLQNRNQELLKQIEICQEENKILDKMHRSKVNEVEKLSATVRDLEEAVLAGGAAVNAARDYQRQVHELMEIKRTLERELAAQWWRNYGGARAKISANRVATVVANDWKDESEKVMPVKQWLEERRFLQGEMQQLREKLASAERTCKSEAQLKEKVQLRLKVLEEGLKSGNGTVRRGAGAGGTVEAKRSSSVTSNGSVRKGSGSEEGAKVLANGSRARRSAVSQLRAMGGPLVKNGRLTSKSFDGGGGGRSSSGGSYDAGGMAALKPFTNGFEELRAGIKTESRSCSGEAAGDAGEGAGDTVSGVLYDMLQKEVISLRRASQEKDQSLKDKDNAIEMLSKKVDTLGKAMEVEAKKMRREVTVMEKEVASMRVDKDQERRMRRLSMMKEPVNSSQR +>sequence61 +EDVINLLHGSDPVRVELNRLENEVRDKDRELGDAHAEIKALKYSERLKEKAVEELTDELQKVDGKLKATEALLESKNLEIKKINDERKAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARLEAAKLQDDNRALDRLTKSKETALLEAERTVEIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVKKLTQTVCELEEAVLAGGAAANAVRDYRRKVQEMNDERKILDRELAAQWWRNYGGSRAKVTANRVAVVVANEWKDANDKVMPVKQWLEERKFFQGEMQLLRDKLAVAERTAKAEAQLKEKYQLRFKVLEERLRASPSGNLRTTSEGRSISNGPSRRQSLGGAENLSRSASNGFALRRTANSQSGSIRSNSASVLLRNAKISSRSFDGGSRSLDRDKVIPNAARKHEVLTDTNDQIQNAKTIGTHEASTNGNRSEKTKSELDDSVSGVLYDMLQKEVITLRRACHEKDQSLKDKDDAIEMLAKKVDTLNKAMGVEAKKMRREVAAMEKEVAAMRVSKEHDPRARRPSAPRGSQ +>sequence62 +DDFISLFHGSDPVRVELTRLQNELREKDRELGDALAEIKSLRNSERLKEKGVEELTDELIKVDEKLKAAEALLESKNLEIKKINEEKRAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARMEVAKLQDDNRALGRLTKSKEAALLEAERTVQIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMLRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDERKILEREVAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERKFFQGEMQQLRDKLAIAERTAKAEAQMKEKYQLRFKVLEERVKTSNGNSKFTVSDGRNIATGPSRRQSFGGAESLSASSSNGYQSRKTSISRPGSLRSNSANVLLKHAKLSSRSFDGGSRNLERERPTSDANGLDNMPRNSNIQTITSETITTHEESANGTPVKKSKSENEDYVSGMLYDMLQKEVISLRKACHEKDLTLKDKDDAIEMLAKKVDTLSKAMEVEARKMRREVASMEKEVAAMRISKEHDHRARRASAPRGAVNSQSI +>sequence69 +EEFINMLHGSDPVRVELCRLENEVRDRDRELSEAQAEIKALRLSERAREKAVEELTEEVNKMDEKLKLTESLLENRNLEIKKINDEKKAALAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKSKEAALLEAERTVQIALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDEMKTLDRELAAQWWRNYGGSRAKVSANRVAVVVANEWKDSNDKVMPVKQWLEERRFMQGEMQQLRDKLVIAERTARSEAQLKEKFQLRLKVLEDGSRMSASGTYRTTIEGKSVSNGPSRRQSLGGADNVPKSVNGFLSKRPSFQMRSSVSSSTVLKHAKGASKSFDGGTRSLDRSKVLLTGAGLSLNRSSDATGDGVTHESWKKIPDEKTNDFPNVDSDDCVSGLLYDMLQKEVITLRKACHEKDQSLKDKDDAIEMLAKKVDMLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEQDNKSKRLGGSKGLANSSQL +>sequence80 +DDFINLLHGSDPVKVELNRLENEVKDKDRELGEAQAEIKALKLSERLREKAVEELTDELQKVDEKLKAAGALIESKNLEIKKINDEKKASLAAQYAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELRLARLEGAGSPYQVKGAALLEAERTVQVALAKAALVDDLQNKNQELMKQIEICQEENKILDKLHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKFMEMNEEKKILDRELAAQWWRNYGGARTKVTANRVAVVVANEWKDANDKVMPVRQWLEERRFLQGEMQQLRDKLAIAERTAKSEAQLKERYHLRLKVLEDGLKASPSGHIRPSEVRSVSNGRSRRQSLGGAENFSRLSSNGLSRRTPASSPSNNISTVLKHAKGSSRSFDGGNRLSEKNKVCLNNGVVPNSSLNTAVEEHRRTENSNTCKENQDVKQSDTSKADADDYVSGLLYDMLQKEVIALRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVATMEKEVAAMRVGKGHDLRTKRLSNSKVTSQL +>sequence84 +NEFITLLHGSDPVKVELNRLENDVRDKDRELSESQAEIKALRLSERQREKAVEELTEELGKMSEKLKLTENLLDSKNLEIKKINEEKRASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARHEIVKLQDDNRALDRLTKSKEAALLDAERTVQSALAKASMVDDLQNKNQELMKQIEICQEENRILDKLHRQKVAEVEKFTQTVRELEEAVLAGGTAANAVRDYQRKFQEMNEERRILDRELAAQWWRNYGGARAKVSASRVATVVANEWKDGSDKVMPVKQWLEERRFLQGEMQQLRDKLAIADRAAKSEAQLKEKFQLRLRVLEESLRGPSSSGNRSTPEGRSMSNGPSRRQSLGGADIIPKLTSNGFFSKRSPSSQFRSLNASTSTILKHAKGTSRSFDGGSRSLDRSKVLTNEPRSKFPLNQSSEGTSGGGSPNSTKQGDSEKAAGTNNDSVPGVLHDLLQKEVITLRKAANDKDQSLRDKDEAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVSAMRVDNKGSDSRTRRHSTNSKGASTTAQL +>sequence88 +DDFVNFLHGSDPVKIELNRLQNEVIDKNRELVDAQAEIKALKLTDRIKEKALEELTEELRKMVEKFQASEAALENKNLEIKRVVDEKKAALAAQFAAEATLRRVHAAQKDEELPPLEAILSPLEAEIKQLRQEVSKLQDDNRALERLTKSKEAALLEAERDVQSAYFKASLVDELQNRNQELMKQIEISLEENKILDKINRQKIAEVEKLGQTVRDLEEALLSGAAAANAVRDYQRQVSELKGEKRTLERTLAAQWWRNYGGSRAKVVENRVAVVVANEWKDSDGKVMPVKQWLEERRFLMGEMQQLRDKLSIAERTAKTEAQLKEKFQLRLKVVEDGLRSSFNGGVRSSELQNCSNGVSRRLSLGGFENSTKLSSNSFGTKKVPSLTRSSTMSSTSSSALLKHAKGASKSFDGSKSSSEGQSIDGNKSFSNGLDDPCFGNNTDESSMNTINNSGREICCNKQSEFAEPTSTDLVSGILYDMLQKEVIVLRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVASMRADNEHGQRGRRLSGSSKGLLNNAHM diff --git a/motifsort.fasta b/motifsort.fasta new file mode 100644 index 0000000..df0ff18 --- /dev/null +++ b/motifsort.fasta @@ -0,0 +1,180 @@ +>sequence1 +DEFIALMHGSDPVRVELTRLENELRDKERELGEAQTEIRALRLSERAREKAVEELTDELEKMFEKLKLTESLLDSKNLEVKKINDEKKAAMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKQKEAALLDAERTVEIAMAKAAMVDDLQNKNQELMKQIEICHEENKILDKLQRQKVAEVKKLSLTVKELEEAVLRGGATANVVRDYQRQVQEVNDQKKTLECELAAQWWRNYGGARAKVTANRVAVVVANEWKDSNDKVMPVKQWLEERRFLQGEMQQLRDKLAVAERTARSEAQLKEKYQLRLKVLEDGLRGPPSGSSRLPTEGKSFSNGPSRRLSLGGADNMSKLSPNGLLARRSPSFHSRSSLSSSSSLVLKHAKGTSKSFDGGTRSLDRSKINGNGAHLLNRSTDAVRDCETNDSWKGNADEGTIENTNSNTDESNKETANNKSAEMVSGFLYDMLQKEVISLRKACHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQEVKARRLGSSKGTGSSQV +>sequence2 +MSLKPFTYPFPETRFLHSGSSVYKFKIRYGDSIRGEDIENKEVIVQELEDSIRVVLGNLDSLQPFATEHFVVFPYKSKWERVSHLKFKHGEIVLIPYPFVLTLYVAKKPRVZEDELKWFNENLSTGKPIDDSPLGLVPAERKAARAMKKKRKRMELSVSPSRPGLDRAKMRTSSQGPSKKKFLMETSRNMERNTQQKCQETPAFDGTDVQEQGSRWEDNLAGEITPPVQQSNPPPPAGPTDLGTSGFFGF +>sequence3 +GAFNAMFDYHGCWHKDAAATGSLCFDGRFIELYAVEAPRAHLALLDRVKRDVPPFWDPAALAEFIDKYGTHVIAGVKMGGKDVVCIKQLKGSNLTQSDVQSRLKKLSDDKLAQDSPESLTARDDKFLLGLNGSLLLGPGSAAWRSFRPSVVSHKDDILSIHIRRGGVDNGQGHSNWLSTISGSPDVISMAFVPITSLLTGVRGCGFLNHAVNLY +>sequence4 +GLFNSCFDFGSDSWASDAGDTRCLAFDGYFISLLDLRLDCRPLALAGHVVADVPAAWDPSAIASFIEKYGTHIIVGLSMGGQDVVYVKQDKSSPLSPSVIKEHLDKLGDQLFTGTCTLPPSHCKSRDHKFKVPEAFNVFDAQMTRQRIEGMTAPMSCKEGVTVIYSKRGGDTAASNHSEWLPTVPLMPDAINFKLVPITSLLKGVAGVGFLSHAINLY +>sequence5 +KAITNDIYIPAEFAACEFSLKSGKSSLYSSHINPGQLIFGQGSDTLHHTSNTHQLPLPPNALGEANIGKLYVSIVEYLRGCQDGAGQPNEPLVVFTSTELVPVVRGCFRYLESDSDELQENIEVYDIQYLFYVLKKEVMDIADLPNEHINKSITDNLFVNDFFEYHSGISCQFHEDNDRGKYCTQSKVARWCYMFSDYMCGDLAIKPLPGKHMPPKQEP +>sequence6 +KLAEYSMEKTKNDKFSFASQSTSCVFYRSYRLSSSPTLSQEFRKAVRGLPKTYSPENKLKFYRLIDTFGTHYITKVKLGGEVQSVTSIRQCQASLQGLSTEEVQMCLEAEASATIKATVKTELKHCKKDTEKMESKSSFSSLFNDRFTEIKGGQTTEPDLLFSSDKDPSAYKEWLNTLPLIPDIISYSLNSLHELLPTSCPVRKDLRSAIRHY +>sequence7 +NPFSASIPYKGYFTDLEIKKRKYIVAENTCLHSYATYSLRESIKNINSDFLLDTENLPILSKSITEKTCSKLIYMYNSKNDQCIKFIKPWIDFFRKYGTHVIVSAHFGGKTINTLEVPIHKFEELKIYNYKYPIENNRYLNVFKDRLLLQKILKIEKGEYAYRGGSQDNYMEDEQAEKNNDNLEKKANDVLNKYENSTSNKINLDIKGGTKLNEDWKQLTYEKWRNSIYTNIAPIYLDLFSLSSFMHIEKKESYNNALLYY +>sequence8 +YSFSASAGYKNALKKLKIQNSIIFMMKIYCLRYYTGISTTTNTWEFTNNFRNALNKLPNTFDGLKEDNECTYEYYITKSHSPQCEKNVNKWMTFFKLHGTHVAHEMYLGGKIIIKVNIEKEEYNKMKETNLDMKTVFDFYFHKMGLSARKNRRIQKFINKMHGSKTVSILGGHPGLNIDDPSFFEKWINSIDKNSMPIRTKLLPFSFFMDDPNMIKAYNDALMFY +>sequence9 +QFSEKIFPIEIGISSYSLKENKEIASYHKLLYPGKFKNVFARTQMIHGIDARDPRLEQNYSLVCIELIKYIEQFPGLAFFVSKEESLAGDKKCIDEIFLRGNVPIPKQIRFITHIQLFDYWCSIQHIELHEKSSFILNHIFKQLECAERCEYHKKINQKYHCALSDARHTSLMELICMKSYGATIIGSDTLPSVKFV +>sequence10 +AKYSKSVKKLRRVSGKSYSFVRAKAQLELAQYMLKSNDLMLHPEFLHRLRALPLSYVYGEYRQIFQDYGTHYITEAALGGEFEYTIILNKERLEQSDYTLEDYKKCAQAGLKVGANIYGVYVSAGVHGGSCNGLLNEMGENTARGSMVEDFVSVVRGGTSESITALLSKKLPTPELMRLWGEGVQFNPDFIRRTTQPLYELVTSRDFSQASTLKRNLKRALSEY +>sequence11 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRAKIGISSQSPSKKKPLMETRRNREGKTHQEWQETPAFNITDVQEQDSKSEDSPAGQIIPPLQQNNPLPPKGPTELATGGFFGF +>sequence12 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIQDKEVIVQELEDSIRVVLGNLDNLQPFATEHFVVFPYKSRWERVAHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWIHEDLSPGKPVNDCPLGLVLPERTAAGAMLRKRKRGQVPSSPGRPGLDRTGKEKPSRNGRRLQRLISPMSRTRVGSGNREGCQGRLSHQCRRTIHLHLKDPQSWEPVASLGF +>sequence13 +VLGGSRSDLAKFARSQHSVDKATFAIHEISCTYYSYRLADHPQLSAEFTKYLRRLPQRVQTKQDRGPYRRLIDTYGTHYIHQVQLGGKVRRITAFRTCLATLKGFAETDVKTCLNAELRMSLGFLPANVTLSNKCDNLLKGNMSMGFYQGFMTHKIEVIGGERYFPDILYQQDPSEAYDSWMNSLHDNPDVISYAIFPLHQLVPDSQIAANLRDAITEY +>sequence14 +YSKNKSVQRLRQYSETKDKTYMRVSGTVQLASFQMRTRGAMLSPTFIEDIKSLPRDYDKAEYFSILEMYGTHYTVSGTVGGKYDLVYVLDSIVMKSLDITTEDVTDCLKLNAGANIGGTENGAKVDVNPNVKTDICNKGGGETETEPRRTQKPVIESIISFVDGGSVEYVTALEEKLNKKEPVADVDDYIQWASSLKDSPTVINSKSNPIISLIPTDIKDAYIKTRNLERAIEEY +>sequence15 +GSQESEFFHNVTHYKSTDLGFVRLWSKVETAHFKMRSDKLMLHEDFYISLMDLPEQYDFGMYSRFFNTYGTHYVTQGTMGGTLEYALVLNKTKMAESKLQGEQAGRCFSASIGLSYPIGQGASVDLKLGVNPCSKDGTFNQGSDASSVMVEDIITLVKGGILDSTSGVMVVRNPETYRTWGASLKYNPTLIEHEIMPIYELVRFSTAADHVGARLANLRRAWDEY +>sequence16 +TDICTVPVEICIKPTLLNGTINIECFQTIINQPIPIQHFLNSKHYTDFEHGISQENNPVPQTDFDFLWKKINTFIKSNMSKYSDSSMLPIIICTPFISSVQCVEFLASQAKVSDVRRSIFNTMFSVDDFVECVNRFKEIIPNTNAIYNFYKPLVCWTCNNDFKCDFHKSNGTRTFCCSKTNSEYLASTLCDLYKTIKSKIFVASMPSQV +>sequence17 +DRRKFHKTVTESRAHRLIILKNKVELAQFQNTAPEYLTLAEGFWRALSSLPTTYDYAAYRQLFQTYGTHYFSEGSLGGEYQALLELTQHALATTSTTSREYERCWRKVKRRFLRKKVKTVCEKLTSSTAASYVTPWSPGTSMRNVPIKVDVVGGNPGLKRFLSILDLENPEENGRKYDDWASSVKDFPQIIEQKVRPLYELVKEVECAGLKKLHMKQALEEY +>sequence18 +VNYEHKLENKSLNKLLTKNNLSIKKINCSIHTSGMIISYQWKLKKSISILLNDIQNKLVKDSGHTSNSNPQKNQKNIEKDWYNIFNTYGTHVLTKITLGGKIIEINAVEGGQNITENTSIFGSKLDINFFKMSLNSNSKDKLHDLDKNKSEKIIILGGNAMTTDRKTTNNNGEINYDKKLDKQKWVETIKYNPVPIKFELTPLSYFIYQNFSDENLVNSFHYF +>sequence19 +HGDMPSLCEQRYVPCEIACVRYSLREGILGSFHDFIDPGELPRGFRYHCQSGSASTHQIPISGFELANSDYHNMFRKLCSFVCPTPCPVVPVYTKANDIYRVDWCLQWLANKAGMENHFRVQEVETLIIKFYQDKLQEEPSRPTVSRLLDVVQWDYSSNTRCKWHEDNDMWCCALASCKKIAYCISKALASVYGVTLTPAHLPNPERS +>sequence20 +VSLSGEYVPAELAIIKYSLNDGVMDSLNVLINPTDLPLGMALDAKTHSSSTHQLPVPPDALGEANYEKILRQILKFFKNTSGSKVVPPIFTWNKDIPMVDSILRGILEATDLDYVKFSILPLIDFFYNLKLATEDYGLDIKTFPSIHLAKALLEKDVYAYTAGIACDVHEQLNNQVACALSRVVRWAYVISDSCCLDVGIEMEKGRHLPHNMTT +>sequence21 +FSFSASTGYKNFVKSTATNKVRTYITKTYCLRYVGGIVDYHSLDTTDEFKKAVEALPDKFDSHSCTIETFKSNEDDSICAETVLPWMQFIKMFGTHFTTIVHLGGKITHQVQIDKSDVLHMQQNGINVDAAVKASISPVMVDSLQGGFASTSEKASLSQSNNLKYDKQVLVIGGDGLVDSKNANSLNNWAKELYKRPMPIKIKLESIKSLLGKKRELFDEALKFY +>sequence22 +LAHSDPIVLEFNRLQNQLKEKDRELGVASSEIKALRATIVLKDKALEQFRNEVNKLDERLGVIENLLKQKDLEIKKLTSEKKDALAAQFAAEAALRRVHANQKDDDTVPIEDVIAPLEADIKMYKIEIGRLQEDNKALERHIKSKESALLEAERILRSALERALIVEEVQNQNFELKRQIEICQEENKILDKTNRQKVLEVEKLSQTIQELEEAILAGGVAANAIRDYRRQISELNEEKRTLERELAAQWWRNYGGARVKVSANRVATVVANEWKDENDKVMPIKQWLEERRLLQAETQRLRDKLAISERTAKAEAQLKDKLKLRLKTLEEGLKQVSSFSENPYLSCRSPKPEKSNHILGFLSGNGGLKRRSTSQPRGSCIGKTSPLMPPNVENGAADAAGELKGVNSLKKKYASGENMLRKSLWASRSKVADIGGKENTEMKSNTDMHIDKFNNDTAVSADAKIKGGAKEETQNVGSAGFDSEDMVSAFLYDRLQREVINLRKSCEVKNNTLTAKDDEIKMLMRKVDALSKAIEVESKKIKREAAAREKEAISTKADENKKIRNTDSSKRRVA +>sequence23 +SHSRSSQFASSHSRKDKFSFTTHNLKCSYYTFRIHSRPPLSKEFEESLKNLPSTYDHKNTSAFTQFLSVYGTHFIRRVRLGGHVNSITAIRTCQASMSQMSVQTVSNCLSVEAQANIKGVTVSAATQFCKTKSSKLKTGATFRQAFSDRSIEVLGGDGDVGDVLFNSNGVAGFKKWLASIKRVPGLVWYQISPLHLLVPDNPVLQETLSKAISHY +>sequence24 +SSDPIVLELNRLENHLKDNDRELGIAHAEIKALKVTERLKEKAVEELNDDLKKLDEKLRFTENLLEDKNLEIKKLVSERRDALAAQFAAEATLRRVHANQKDEDYIPLDAVLAPMESEIRMCRNEISVLQEDKKALERLTKSKELALLETERMLKIAIERALLVEDLQNQNLELKRQIDICQEENRILDKANRQKVAEVEKLSQTIHELEESILAGGAAANAVRDYQRQILEMNEERRTLERELAAQWWRNYGGARVKILANRVATVVANEWKDDNDKVMPVKQWLEERKVLQGEIQRLRDKLNVSERTAKAESQLKDKFKLRLKTLEEGLKQVTTSSPNTEGSHLKQTVKPEPVLGYLSSNMGPRKRSQSQPRASFNAEQSTVQQRPNVTSENSNSNRTLEHVNSLKYKYISGKNLVKKNLWAPRNKLVDDVGKENSERKEDVGLEEFASVGPEVSKDFSAEAHSMQSTPEKDDLNVDCEDIVSGFLYDKLQKEVLNLRKSSQEKDGLLTAKDEEIKMLVKKIDTLTKAMETELKKMRRESASKERELTPRRVQKDPLHKSSTMIISKRAVKSV +>sequence25 +KAIGNDIYMPAEFAACKFSLRSGRGPVYSSHINPGQLIFGQASDAQHHTSTTHQLPLPPKAMGESNMGSLYVNIVKYLRDCQGAGNPLVVFTTAELMPVVSGCFRYLQSDSDEVGEQIHVYDILYLFYVLKKEVMDIADLPHANINKCITDNFFFNDFFEYYSNIACQFHEDNDRGKYCTHSMVSRWCYTFCDYMCGDLAIKPLAGKHMPPVQEQ +>sequence26 +ASFSASADFKQMKDTLSQKDTQCIQSHATCTAFDLSFYNDINSLPLLSLQLVDKIQQLYSYSNYTNEKEYYYDFFDSWGTHVATSVRLGSLFGYQFKMSSSSVQQQSSLGFDASVGASLYGVKGKVSTSYAQQQLNSFQQSLKSWSSYSLGATPNANLDAAQWATQTLDTPMPIKTELTPIYTFISQYQNNADIPLNSTTMAYVVNAMQNY +>sequence27 +FSASATNEFSDSSLRKSENEFSRCQQSFDLWSISIPADIARLQNYVSDDFIKLINAINPESKDSIATVFNVYGSHVLMSGVMGGKAHVSASANKLTLTQKFEMSTIVQAKYEQLTSQLSVEDKLKYSEAFDSFSESGSYTYDILGGSPSLGALVFKNNSQGSSDDNLKNWIQSISSMPVLTKFIDQTSLMPVWLLCEDKTKADALKKY +>sequence28 +EDIIHLLHGSDPIKVELNRLENEVRDKDRELCEAHAEIKALRQTERLKEKAVEELFDEREKLQEKLKAMEIALENKNLDLKRTNDERKSALAAQAAAEATLRRLHASQKDEDLLPLEAILAPVEAELKSTRNDFLKLQDDNKALDRLTKSKEAALLEAERAVQIAEAKASLVDDLQNRNQELLKQIEICQEENKILDKMHRQKVAEVEKLSTTVAELEEALLAGGAAANAARDFERQVHHLMEEKRTLERELAAQWWRNYGGARAKVTANRVAVVVANEWKDANDKVMPVKQWLEERRFMQGEMQQLRDKLATTERTARSEAQLKEKLQVRLKVLEEGLRTSTNGSTRKHDDFLRSGTNGASVRRQSTGGSDIGNGVARRRPSMSSASQMRGSVSGSTILKNGKFGSKAFDGSKSLDAGRFKAYANGCEEPRKVSSAASGAGGGGGGGGGGGDVKPEAGKVEGATVAAADDNVSVLFYDMLQKEVVTLRKLGHEKDQSLKDKDDAIEMLSKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQENRARRLSIAKGSVNSSHA +>sequence29 +GSFSASTGYKKFINEVSKRTSKTYFIKSNCIKYTIGLPPYVPWEQTTAYMNAVGILPKEFTGLNEDSCAPDVYEQKKMTKQCKNVHQWIQFFKTYGTHIIVEAQLGGKITKIINVSNTAVNQMKKDGVSVKAQIQAQFGFASVGGSTSVSSDNSSKNDNSSYDMSEKLVVIGGNPIKDVTKEENLYEWSKTVSSNPMPIHIKLLPIYKSFDSEELKESYEQAVLYY +>sequence30 +KYNINRLLCYPAEIAITTFNMKEGIIYSDSKFVEFDERWAFGQDERDHRTMSERVNENEDLDELMHQLSSTIGIDHLSTDHNPESPFGVFEWLRSRIDIYPYAKILVDMNQFRFVYNGLKNIAKYHGFTGQTYFNENIKFNMVSIQDFTDVLLDYCSLLVARRWSDQDINNQYLRPNLVPNRDKNTICEYHETVPCPTRYNCMKAHNSRLVHHFFTIMKAHRLQNFRYSPPVHEPCIEDM +>sequence31 +NGKFSTENQRMKIHQVKDSSVTTRVQIRNFIYKVKVFPDFSLDVRFAQQAKEIADAIENNQSRYADYLSERMVMDFGTHVITSVDAGASLVEEDYLNSKYVSDNVSQSSSISAQAGLNFFDKLKFDISSHNSQQSSTLQGYQSNIRYSLIQSHGGGIPFYPGMTLQKWQESTRNNLVAIDRSGLPLQYFISPNMLPDLPQPTVRKVSHLVRSAIERY +>sequence32 +ISLGINHELDQFHQEITQNNKAVSVSQSYWAQYSLTTAPAFLMPLNPMFKQSLDALNRMAKEPTTDTQQTIYNQVINSFGTHYVTSAIMGGAAKIYTTLDQNYLKTVDIEQTKTQIGINFSYNVFQFKFGFNSTDLAQKLDENFKKNSNDIIIFSPEVDHISDPKAWSTWESTVPEKPQPVNTTVSYISDLAYEFPEVQAHLRKTIEFY +>sequence33 +KAVNTDIYIPAEFSACEFSLKTGVNSLYSTMIDPSQLIFGQTCDAMLYAAATHQLPLPPAALGESKMTKLYHSIQDYLRSRLERTDKNLKSLVVFTKTDDIDMVKSCFRFIKSGYHDEQSKRYDDDNDEENDQFKFFEAAASKFLPIVVYDIQYLFLALKLAAMDIGGLTLPKPNLYITDAFFSRDFYEFQDGIACWFHEDMDRSKYCTQSKVKRWAYTFCDYMCADLAIKMQPGKHMPPSYKA +>sequence34 +KALTGDIYVPAEFSACRYSLKGGISSNYSTMINPGHIIYGQSRDAQDHSKTTHKLPLPPQAFGETNMGKLYIDIFNWLSVRNEEKLDQDPVIVYTTPELMPVVKSCFRYLASEAEIDEDERKIMVFDIHHLFYTLKKSVLDVAGVTNDRINFHVTNNFFVKDFFEYTEGISCDYHEKIDRSKYCTNSMVKRWGFTFSDYMCADLAIPLQPGKHIPLKVKP +>sequence35 +MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSVRAEEITDKEVIIQELEDSIRAVLGNLDNLQPFITEHFIVFPYKSKWERVSHLKFKHEEVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLPPGKPINDSPLGSAVAEKKAAGDAGKKRKLVEEHGSPRGTALPRSVAEGKAESQSTEATLKKDQNRKKTQQETWKTVTSDTTDVQTQDSKRGHNLPGAMVPALQQSSSPPPQEPGTRSFFGF +>sequence36 +NSFTGSLEYKNALMNFKSKRQKIYNKTEQCVRYQVGIPLNLKWGYTEYFNRTLSRLPILSSKVIKNCNIDNKLNLSDEECKSIKPWIKFFEVFGTHFNNQLTLGGKINQTMVFDSSTLEELKKKGIDIEAEVRTELGSGNVKLNLDMGGKKSRLDEIGQKKMSVLGGKMPNFPMDDNEFAHWAETVAENPMPIGVVSTSLKTLMHPAMHQSYDQALHQY +>sequence37 +VNGKFSTEFQRMKTLQVKDQAVTTRVQVRNRIYTVKTTPTSELSLGFTKALMDICDQLEKNQTKMATYLAELLILNYGTHVITSVDAGAALVQEDHVRSSFLLDNQNSQNTVTASAGIAFLNIVNFKVETDYISQTSLTKDYLSNRTNSRVQSFGGVPFYPGITLETWQKGITNHLVAIDRAGLPLHFFIKPDKLPGLPGPLVKKLSKTVETAVRHY +>sequence38 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRTGKEKPIRNGKRPQHLISLMSRNRILSQRTAQQGRSFPHCSKTIHFHLKDPQSWQLVASLGF +>sequence39 +SSHNSAFKQAIQASHKKDSSFIRIHKVIKVLNFTMKTKDLQLSDVFLKALNHLPLEYNAALYSRIFDDFGTHYFTSGSLGGVYDLLYQFSNEELKNSGLTQEEAKNCIRIETKKRYFIVTKTKVEHRCTTNRMSEKYEGSFLQGSEKSISLVKGGRSEYAAALAWEKGSSGPGEKTYSEWLESVKENPAVIDFELAPITDLVRNIPCAVTRRNNLRRAFREY +>sequence40 +EDLLNLLHGSDPVKVELNRLENEVRDKDRKLAEATAECKVLKQRERLREKAVEELAEELDKVDEKLKAAEDLLESKNLELKKLNDEKKAALAAQFAAEATLRRVHAAQKDEQLPSIEEILSPLEAELKIARQEIAKLQDTNRALDRLTKSKEAALLEIERAIDAAEAKASQVDDLLNRNQELMKQIEICQEENKIMDKMHRQKTAEIEKLSSTVAELEEAVLAGGAAVNAARDYQRQAHELLEGKKTLERELAAQWWRNYGGARAKITANRVAVVVANEWKDANDKVMPVKQWLDERRFMQGEMQQLRDKLASAERTAKNESQLKDKFQMRLKVLEESLKPVTNGAPRRTEEVRSSSTTRRSTSGSEEASKLLANGSRRQRSAVTQVRASMASQTLMRATNGRMTSKSFDGGRSLDAGTTRLRAFSNGFEEVPVKPDSVEAKSEVEAVKSENGTTNQVSGSSSSVEDPVSGVLYDLLQKEVVNLRKASYEKDQSLKDKDDAIEMLSKKVDTLSKALEVEGKKMRREVQAMEKEVATLRAEKDQTRNPRRLSSGTGTVNSSSK +>sequence41 +YSGYNNDEYTHDDMLHNLNKHNKLLIKSYKCIVYKANLTSLNFLKNKNNDEIGLNFNGMLILNVLKKLNKNCNSEFDNQKCPISMFRNDPFDANCIRCIMPWMEFFKDYGTFMTKEITMGGVINKFYNIKKYEGSMRKEYKKKTIKQSSTFFHLSKSRSESLNEKKSGETNKEELEELYTLTIGPEPPGNVSNSKVISDWLEKVVHNPTPIDLELVPIKQIIPEKYLKIYENALKYY +>sequence42 +NEFITLLHGSDPVKVELNRLENEVRDKDRELGEAQAEIKALRLSERLREKAVEELTDELSKVEEKLKLTESLLESKNLEIKKINDEKKASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNKALDRLTKSKEAALLEAERTVQVALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNEERKTLDRELAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERRFLQGEMQQLRDKLAITERAAKSEAQLKEKYHLRLKVLEESLRGSSSNTRSMPEGRSTSNGPSRRQSLGGADNFSKFTSNGFLSKRTPTSQLRSSLSSNSVLKHAKGTSKSFDGGTRSLDRGSRALLNGSSPNCSFNQPCDETKDTEAANMWKGNSDEKPVEFPVTETEDTVPGVLYDLLQKEVVALRKAGHEKDQSLKDKDDAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEHENRAKRFGSSKGPVGAAQL +>sequence43 +SGSRESAFLNKLSKYNEKKYSFIRIFTKVQTASFKMRRDNIMLDEVMLQSLMELPEQYNYGMYAKFIDDYGTHYITSGSMGGVYEYILVLNKENMTKSGVTSDDVTSCFGGSFGIDYDYTDNLQITGSLSGKHCKKLGGGHREDEESNMAVEDIISRVRGGSSGWGGGLTQNGSIITYRAWGRSLKYNPAVIDFEMKPIYEILRHTNLGPLEAKCQNLRRALDQY +>sequence44 +KALTTDVYVPAEFSASEYSFNEGIMSVYSTLIDPGQIIFGQGSDAQHHSSTTHNLPLPPNALGEKNMGKLYRNILEYLSKIQEGKDATKPFVVFTKTDMVPVVKSCFRYLACENQDGSYENGDQIQVLDIQYLLFILKKEVLDIAGVSDEKINLYVTDAYFLKDFFEFTPEISCQYHEENDRSKYCTQSLVMRWAYTFSDYMCSDLAISVQPGKHIPPKTKP +>sequence45 +SVAGSHSKVANFAAEKTYQDQYNFNSDTVECRMYSFRLVQKPPLHLDFKKALRALPRNFNSSTEHAYHRLISSYGTHFITAVDLGGRISVLTALRTCQLTLNGLTADEVGDCLNVEAQVSIGAQASVSSEYKACEEKKKQHKMATSFHQTYRERHVEVLGGPLDSTHDLLFGNQATPEQFSTWTASLPSNPGLVDYSLEPLHTLLEEQNPKREALRQAISHY +>sequence46 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIENKEVIIQELEDSIRVVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEIILIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKPISDSPLGLVPAEKKAVGAVMRKRKHMDEPSSPSRPGLDRTGKEKPNKDCRRLWPLISLVSRNKILSGGTACQGQLSHPCSTTHLHLRSEQPAASLGF +>sequence47 +KALTTDVYVPAEFAACEYSLKEGIRSIYSTMIDPGQIIFGQGSDALLHSSTTHDLPLPPNALGEKNMTKLYRNIVDYLSKCQGKGKTLVVFTPAENITMVKSCFRYLECDDDFRDGGEKIQVFDIQYLLFILKKEVMNVADLNDEKINKFATDAFFKKDFFEFTAGIACQYHEDNDRTKYCTQSMVTRWAYTFTDFMCGDLAITVQPGKHIPAQTK +>sequence48 +AKFSLSTNYSEISDLLKNNDNKLYVDKSYCFLLEAALPIHNSLKMTRSFATAMSKLTRDFKKHTKDCNAIKYSINKNNKDCKEIKNWMELFDQFGTHFSYNIKLGGRITFITQEEGSKDERGNEKSVDVGVGGKFEKDNKGVGIEGNVKFVFGNKRGESKNLSFKYTNILGGLPVSDISKESEYVKWIKSVYKYPMPIRTQFAPISKIFKSKALKDSYDEAFRFY +>sequence49 +GSFSASVGYASASNTISKKKFRMFILKSYCFKYVASLSQYSQWKLSDQFLRAINLLPSYFNSLEHDGKYCNAEELRDNKTGMDSCGKSVESWLYFFKNFGTHVSTVIHLGGKITQQVKISKNEYKSLSESGLSTSVSASVGFGLFKANASSSTDSKESSNEESSNSSIEKETVIIGGTTIYDPNDPSNFEKWADSIKNNPMPIKGQYEPLSRILPERLTKIYDEALSFY +>sequence50 +NIDGECMLAEMAMNEFSLFSGIVEKFHAIVGPWMPESESHRRRASRHALETHRIPLQNNFATITKKRLVEEILGRVEPSIACHQGVKVGLYSDACNEKTKIDLNIKNNFKDPGMLCDKNDRRFILVLQSELDLMVDSMKHLANNVGFHYDGFPVTPNCFVIVEAFVEAISDIMNEKIDVETMRWFSLLGQKVDAEDSVSPWETGTDFHCARHSEPKSNFCASVTVGRTCCIVYHVIGSFFRRYHLKKIPTAHQPSSSNS +>sequence51 +KALNGDVYQPAELSACRFSLKGGISSNYSTMINPGHIIFGQTSDAQDHSRTTHKLPLPPNAMGEKNLGNLYSDTLKWLSASNDEEDEQYDHPVIVYTTPELMPVVKSCFRYLACEGDTDKHAKKIIVYDICYLFLTLKKTVLDLVGVPSDHMNIHVTNSFFRRDFFEFSSGIACDYHEEVDRTKYCTKSMVLRWGYMISHYICGDLAIPLQPRKHVPIEVKH +>sequence52 +RTNTGVHLPAELAVVRYSLEGGVKDKLHMFINPGRLPIGMAYDAQRHAEEDHQLPLPPNAMGVSDYGDVAMRLFSFLLQNDDMPLLFTDETDVPRVESMLEHILSDHLSEIELRICPLAELFFRLKQNVELYMMDQTTFPSVYIAQQIITKDVYDYTKGISCDYHEEKDNVLYCPLSRCIRWAYIISDNCCQDMGIEPIPGKHVPLNANT +>sequence53 +FSGSLTCEFVKKSTQHAKNTVTCSTAAHSLYTLKEDDSSNPSEKRLDSCFRNWIENKLSANSPDSWSAFIQKFGTHYIASATFGGIGFQVLKLSFEQVEDLHSKKISLETAAANSLLKGSVSSSTESGYSSYSSTSSSHTVFLGGTVLPSVHDERLDFKDWSESVHLEPVPIQVSLQPITNLLVPLHFPNIGAAELSNKRESLQQAIRVY +>sequence54 +WAFTASSEFNHMQQKIEQTSATFVISMATCQIAQITQVPELAEFHQSFIDQLSALPVEYSAPQYLEFLSNFGTHYATDIILGSKVGYVYTLPPAIVDDFDQKKFKEIDLKQAATITSALLKGVIGQQILPKEQEAKAYSDVSKLSTQSFTIEIGPQSTENTPKDWLRETELEPTPIRYTLKSISELVSEGKGQLSSVKEYQKIGQNLKKALTDY +>sequence55 +DLMNHFNGSDPVRLELTRLENEVRDKSRVLAEAQAEIKSLRLSDRQKQKAVDELSDKLEKVDEKLKGTLILLDNKNLEMKKLNDERKAALAAQTAAEATLRRVHASQKDNDMPSLEVILAPLEAELKIARDSAVVVVTLQISKLQETNRALDRLTKSKEAALIESERVIKAAEAKASMVDDLQNRNQELLKQIEICQEENKILDKMHRSKVNEVEKLSATVRDLEEAVLAGGAAVNAARDYQRQVHELMEIKRTLERELAAQWWRNYGGARAKISANRVATVVANDWKDESEKVMPVKQWLEERRFLQGEMQQLREKLASAERTCKSEAQLKEKVQLRLKVLEEGLKSGNGTVRRGAGAGGTVEAKRSSSVTSNGSVRKGSGSEEGAKVLANGSRARRSAVSQLRAMGGPLVKNGRLTSKSFDGGGGGRSSSGGSYDAGGMAALKPFTNGFEELRAGIKTESRSCSGEAAGDAGEGAGDTVSGVLYDMLQKEVISLRRASQEKDQSLKDKDNAIEMLSKKVDTLGKAMEVEAKKMRREVTVMEKEVASMRVDKDQERRMRRLSMMKEPVNSSQR +>sequence56 +VTSGGTYIPAEMGLVRYSLKDGVMDKLHMFIDPGKLPLGMAYDAKQHSESDHQLPIPPDAKGEKDNDEIILKLFSFLSQQEKMPPLFTETNDIRMVENILKGILNQGSMDENTLLVCPLSELFYQLKRATESFGLDIKTFPSVHIAQAIIQKDVYEYTKDISCEFHEDQGNGKYCPLSRCVRWAYIISDSCCLDLSIEMKPGRHLPMNADT +>sequence57 +KSMTNDIYIPAEFSACQFSLKSGICSMYSSHIDPGQLIFGQGSETMHHTKHTHQLPLPPNAMGESDIGRLYANIVEYLRACNPDAKPNDPLVVFATPEFMPIVKGCFRYLESDSEEPLATIHIYDIQYLLYVLKLEVLDSVDIRNVTVNRTATDSLFINDYFCYHLGISCQYHEDIDRCQYCTQSIISRWCYVFSDFMCGDLAITPLPGKHMPPKQEP +>sequence58 +SSSRSYTSHTNEIHKGKSYQLLVVENTVEVAQFINNNPEFLQLAEPFWKELSHLPSLYDYSAYRRLIDQYGTHYLQSGSLGGEYRVLFYVDSEKLKQNDFNSVEEKKCKSSGWHFVVKFSSHGCKELENALKAASGTQNNVLRGEPFIRGGGAGFISGLSYLELDNPAGNKRRYSAWAESVTNLPQVIKQKLTPLYELVKEVPCASVKKLYLKWALEEY +>sequence59 +MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSIRTVATLLKSFQIYLFQDSIRAVLENLDNLQPFVTEHFIVFPYKSKWERVSHLKFKHEDVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGRPINDSPLGLVVVEKKAAGASKKQKRKLVEQHSSPGGARQPRDKMRSSSQRPSTKKPPMGTRRNRERKPQQERQKTVASDTTDVQEQHSKWGHNLPGAIVPPLQQNNSPPPKELGIRSFFGF +>sequence60 +KDACTPAELAVVQFTLKHGMRNIYHTLINPDGSQYATQEHVRATHQYPNALGNDDLEGILADLLEFVRLECGPEAELSPMFTLESQISVVNNALEFLNGGVASQLKVHPIEYLFYVLKKATCAAGILPPPASFHITNAQFNLDPHEFLSDIGCEFHKQRDLTAHCAKSYVTRWAFAFADYMCSDLAIKMLPNRHMPNRLDT +>sequence61 +EDVINLLHGSDPVRVELNRLENEVRDKDRELGDAHAEIKALKYSERLKEKAVEELTDELQKVDGKLKATEALLESKNLEIKKINDERKAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARLEAAKLQDDNRALDRLTKSKETALLEAERTVEIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVKKLTQTVCELEEAVLAGGAAANAVRDYRRKVQEMNDERKILDRELAAQWWRNYGGSRAKVTANRVAVVVANEWKDANDKVMPVKQWLEERKFFQGEMQLLRDKLAVAERTAKAEAQLKEKYQLRFKVLEERLRASPSGNLRTTSEGRSISNGPSRRQSLGGAENLSRSASNGFALRRTANSQSGSIRSNSASVLLRNAKISSRSFDGGSRSLDRDKVIPNAARKHEVLTDTNDQIQNAKTIGTHEASTNGNRSEKTKSELDDSVSGVLYDMLQKEVITLRRACHEKDQSLKDKDDAIEMLAKKVDTLNKAMGVEAKKMRREVAAMEKEVAAMRVSKEHDPRARRPSAPRGSQ +>sequence62 +DDFISLFHGSDPVRVELTRLQNELREKDRELGDALAEIKSLRNSERLKEKGVEELTDELIKVDEKLKAAEALLESKNLEIKKINEEKRAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARMEVAKLQDDNRALGRLTKSKEAALLEAERTVQIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMLRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDERKILEREVAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERKFFQGEMQQLRDKLAIAERTAKAEAQMKEKYQLRFKVLEERVKTSNGNSKFTVSDGRNIATGPSRRQSFGGAESLSASSSNGYQSRKTSISRPGSLRSNSANVLLKHAKLSSRSFDGGSRNLERERPTSDANGLDNMPRNSNIQTITSETITTHEESANGTPVKKSKSENEDYVSGMLYDMLQKEVISLRKACHEKDLTLKDKDDAIEMLAKKVDTLSKAMEVEARKMRREVASMEKEVAAMRISKEHDHRARRASAPRGAVNSQSI +>sequence63 +IDLANEPLYREAVKASQQKDSVFYRVHQVIATSTFKVKSSDLYLSDPFLQFLNSLPLEYNYALYRHIFQLFGTHYFSSGTLGGKYDLLFQFDREELKTFGLKESDSEYCLSDDDTLVTFFYNRHKQRNTCGNISMKTKYEGSMVKASERCITSVQGGRTEFAAALAWEKKGVSPQSTVYTDWIKSTIENPVVINYELLPLVNLVRGISCAVTKRRHFHRALEEY +>sequence64 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIEDKEVIVQELEDSIRAVLANMDSLQPFVTEHFIVFPYKSKWERVSHLKFKHGESILTPYPFVFTLYIAKKPRVZEDEMKWFAEDLPSGKPADDIPLELVLAETEAEEATMRKWKRKLMEEPSSPSRQGPHRAKMETSSEASSNKKPLKESKRSTDEEAQQEYQDTPASNAIAVKEQDAALGHGLQGLVVPPLQHSSPPPPKEPGARGFLGF +>sequence65 +HPFNDSNYYKMLVKRINRGDSIIIEKKLCSKYFSFINDINKNDLDTFFLTTLNELGDNYQNIKDDTYKCSLQYYKMNNMNKYSENCLKTITPWISFFNMYGTHVISGVYYGGKIIHNLYFENNNLKKKEYKIRMYKSRLNPFSTINSNLYFGSSLSKEKIIYIRERNLIMDGGVHINPYNINEVNMENKKKNIYVNNVEKNLYDQKKKYRNYYNFYELKDDVRKRNYYNSWKDTIEWEQAKPVKLNLVPLSEFINSEEGKSAYYMALEFY +>sequence66 +MSLKAFTYPLPETRFLHAGSSVYKFKIRYGSSVRGEEIEDKKIVSQELEDSIRAVLGNLDNLQPFTTDHFVIFPYKSKWERVSHLRFKHGAALLEPYPFVCTLYVAKKPRVZEDEMKWAPAGGNGGPTNSAPLHLHKTQKEQDRPGTETSRKKEPPAPPSRGGERRTSLEQSWKELADSPELLLQLTRNWTGESASEKGEAEDSDISFLKDHGSGSSLRHQQKSPPKPSSPPSEGPPKQKHAGFLGF +>sequence67 +CEQRFLPCEIGCVKYSLQEGIMADFHSFINPGEIPRGFRFHCQAASDSSHKIPISNFERGHNQATVLQNLYRFIHPNPGNWPPIYCKSDDRTRVNWCLKHMAKASEIRQDLQLLTVEDLVVGIYQQKFLKEPSKTWIRSLLDVAMWDYSSNTRCKWHEENDILFCALAVCKKIAYCISNSLATLFGIQLTEAHVPLQ +>sequence68 +GKFSEENTRMKIHQVRGNSVTTRVQVRNHLYTVNAYPDFTLDSRFSQQISELADAIENNQTRQAMYLSEKVILEYGTHVITSIDAGATLVQEDYIKRSYVSDTNSERSSVSASAGINFFNMVNFNFGSKETEQTSETLTYQQNITYSLVQSHGGALFYQGITMQKWQESTQNHLVAIDRSGLPIHYFLNPAVFPDLPVPTLHKLAFSVQKAAERY +>sequence69 +EEFINMLHGSDPVRVELCRLENEVRDRDRELSEAQAEIKALRLSERAREKAVEELTEEVNKMDEKLKLTESLLENRNLEIKKINDEKKAALAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKSKEAALLEAERTVQIALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDEMKTLDRELAAQWWRNYGGSRAKVSANRVAVVVANEWKDSNDKVMPVKQWLEERRFMQGEMQQLRDKLVIAERTARSEAQLKEKFQLRLKVLEDGSRMSASGTYRTTIEGKSVSNGPSRRQSLGGADNVPKSVNGFLSKRPSFQMRSSVSSSTVLKHAKGASKSFDGGTRSLDRSKVLLTGAGLSLNRSSDATGDGVTHESWKKIPDEKTNDFPNVDSDDCVSGLLYDMLQKEVITLRKACHEKDQSLKDKDDAIEMLAKKVDMLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEQDNKSKRLGGSKGLANSSQL +>sequence70 +GMFNNMFAFSKCWPKDASSVKTLAYDGWFISLYSVEIVRKQLTLRDEVKREVPSSWDSAALAGFIEKYGTHVVVGVTMGGKDVIHVKQMRKSNHEPEEIQKMLKHWGDERFCVDPVESKSPASVYSGKPKEENLLQWGLQPFGTSVSSAVVMHTKNEEIMRVCIRRGGVDLGQSHERWLSTVSQAPNVISMCFVPITSLLSGLPGTGFLSHAVNLY +>sequence71 +GNFNATFGFQSGSWATDAANVKSLGLDASVVTLFNLHIHNPNRLRLTDRVRNAVPSSWDPQLLARFIERYGTHVITGVSVGGQDVVVVRQDKSSDLDNDLLRHHLYDLGDQLFTGSCLLSTRRLNKAYHHSHSQPKFPEAFNVFDDKQTVAFNNFSINSQNGITVICAKRGGDGRAKSHSEWLITVPDKPDAINFNFIPITSLLKDVPGSGLLSHAMSLY +>sequence72 +KPFSASMPYKSYFADLEIKKKKYALAQNMCVLNYATYDLKESGNNINKDFVLDIEKLPILTKNQMKLCTKVLYMNNNLHCSEGIKSWMKFFEKYGTHVVLSAHFGGMSFNTMEITKRKIEEIKIYKYKYSLWNNPYLNIFKSGSLFQDLSINVDGHKENKKNNSNNNINIDEKKKNDAYIKNDVLIEQYRDNINLEIRGGNNFDEKWRNLTYLVWKNSIYSNIVPIHLDLYSLNTFMPIEKKESYDMALLFY +>sequence73 +EFSAEFMFLNNISKYTNKEMGFVQLMSKIQTSQFKMRSKDLVLDEDMLWALSDLPDHYHFGAYSQFFNEYGTHYVTEGTMGGLMDYVAVVNINEMEENQMTGQMIGSCIGGSFGLVFMEKIKATVKGKSCGKFTSNEKTSDESHSAIKDVFGFVKGGNTASSAGSLGIKDAKSYKDWGKSLKYNPALIEFEILPIYELLRLSTAAEQLSSKLPHVKMAWEEY +>sequence74 +ELPDDMGYMPCEIGVVEYSLQEGITREFHRFIQPGKPPLGYRYLCQSTSDNTHQIPIEGFELAEGDYHRLWTDLCKFTSPNGRDFPPLYVQVTHTSMCEWCLDWLSEMAGEYNRFHVYELDSLVKDLYEHGEGHAPSLSMIASILNTSVFDYEDGSSCEYHASKEVKYCALGAVKRFCFSISDSMAQVYDLQLTARHLPERPEN +>sequence75 +FKFSASAKFKKLQDVSKSGKSKMFINKSYCFKYVAGISTSLKWDFTLGFQSSLGRLSDFKGLEKDSICKPFIYREDPKNENCQELGISDWMELFNTFGTHVATKIYLGGKIFTTLEIKKSQEKKLSDQGLDVRAILSAKIKDTDIDSNVEVSTIKSKNAGDFLLDTKKSTFVLGGDIYGHGKTIEFAEWARSVADHAMPIKAEFTPISHFIDKNLRDAYNKAYLYY +>sequence76 +HPFNSSNYYRMLVERIEKGYSIIIDKKICSRYFVALKNVDSSKLDPFFINMLNDLEKNYKNININKYKCSVHSYKKNKYDQNCLRTITPWITFFNLYGTHLVSEVYYGGKIINILYSEYYNNIYNSEQVQIYKKRLNPFTSGSKLGSFYFGSIISKKQNSTNQKDNDNMLTYIKEKNTIYDGGEDIKEYKDGEGKVLMINGMEDEWEKTINGKYAKPIKLILKPFSDFIKTNDGKVAYYKALEYY +>sequence77 +GRFRASVDYQNMQNDMASGTYQYIVSNSRCSVFQLDLIDSPTYHPQFSNDILLNLQQLALNQNNANNTEANAYYDFFDNWGTHVVTSVDLGSLFGYKFKMLKTDVQSMQNQGIDVSASATLFNVRGRTNTQLEQNSLNSFSQSIQSWTSYSIGATPDLNNDPANWATQTLTKPMPIKSSITPYHEALKIFTQGGNNILSSTQILQLYSKLRMY +>sequence78 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVRELEDSIRVVLGNLDNLQPFTTEHFIIFPYKSKWERVSHLKFKHGEVVLVPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKLINDSPLGLVSAEKKSAEAMMRKRRHTEVPSSPRKSGRFFPHLRAKVETSSEAPSKKKPPMETRRTWNDNEQQETPAFDATDVQEQGPKWGDSLAGQMAPSLQWNNPPPPKGPKELGTTGFFGF +>sequence79 +FLGEIENRFDMSDDKSSKRTNEYISYDINNTLYRITLKGNVPLSEQFQEDLNSLDATTLFEKYGTHYLKSTWIGGRISFSTTIDTYGMTDDMRKKFAFVTKRKVGNWTGTSDVELTREEKDISEKMKSNSIVRVWGGDPKLGRDIERAIQGHTVSDIYQQWGQTVEERPYISDFDHGQGLVPIYELATGTRKEQLKEQWEAY +>sequence80 +DDFINLLHGSDPVKVELNRLENEVKDKDRELGEAQAEIKALKLSERLREKAVEELTDELQKVDEKLKAAGALIESKNLEIKKINDEKKASLAAQYAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELRLARLEGAGSPYQVKGAALLEAERTVQVALAKAALVDDLQNKNQELMKQIEICQEENKILDKLHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKFMEMNEEKKILDRELAAQWWRNYGGARTKVTANRVAVVVANEWKDANDKVMPVRQWLEERRFLQGEMQQLRDKLAIAERTAKSEAQLKERYHLRLKVLEDGLKASPSGHIRPSEVRSVSNGRSRRQSLGGAENFSRLSSNGLSRRTPASSPSNNISTVLKHAKGSSRSFDGGNRLSEKNKVCLNNGVVPNSSLNTAVEEHRRTENSNTCKENQDVKQSDTSKADADDYVSGLLYDMLQKEVIALRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVATMEKEVAAMRVGKGHDLRTKRLSNSKVTSQL +>sequence81 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVQELEDSIRAVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEAVLVPYPFVFTLYVAKKPRVZEDEMKCFHENLSPGKSMNSSPLGLVLAERKTAEAVLKKRKRGEVPSSPARPGLDRAEMGTSSQGLSKKKPPMETRRNRERKTQQECQKTPAFDVTDVQDQDSKWEDSLVGKTIPPSQQNNPPPAEGPTELGTSGFFGF +>sequence82 +GKFSASSDYQEVQDGLNSANIQYIESQARCSIFQLDVYNSPSQNAQLTPQLQQALFTLAFNQTSQNDYYDFIDTWGTHVVTSVNLGSRFGYKYQMDKYQSNQLTQQGVNLSVSASYFSSSGSASGAYNQTQIQNFTQAMTSWSSYSIGATPDANQDPLSWAQQTLDTPMPINISILSFDDFLNKFSFSVNGLTSSQLNTVISNLSQY +>sequence83 +SQSRMTHEVIESAQKIDSKYFKVVNTVELAQFKMRRNGLNPSDIFLRRMKDLPVYYNYLDYSFLIEDFGTHYFSSGSLGGQYEYVYRYSRADLSHSGLTEEEQKSCLSAEAKASFFSFSGSSSGSRCKENALSQRNSGSFTLSASESFSHVKGGSSESAGQLAFANGPNPQKYEAWIQDVKRNPAIISYEITPISELLVGIPYADIKRRNMEKALVEY +>sequence84 +NEFITLLHGSDPVKVELNRLENDVRDKDRELSESQAEIKALRLSERQREKAVEELTEELGKMSEKLKLTENLLDSKNLEIKKINEEKRASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARHEIVKLQDDNRALDRLTKSKEAALLDAERTVQSALAKASMVDDLQNKNQELMKQIEICQEENRILDKLHRQKVAEVEKFTQTVRELEEAVLAGGTAANAVRDYQRKFQEMNEERRILDRELAAQWWRNYGGARAKVSASRVATVVANEWKDGSDKVMPVKQWLEERRFLQGEMQQLRDKLAIADRAAKSEAQLKEKFQLRLRVLEESLRGPSSSGNRSTPEGRSMSNGPSRRQSLGGADIIPKLTSNGFFSKRSPSSQFRSLNASTSTILKHAKGTSRSFDGGSRSLDRSKVLTNEPRSKFPLNQSSEGTSGGGSPNSTKQGDSEKAAGTNNDSVPGVLHDLLQKEVITLRKAANDKDQSLRDKDEAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVSAMRVDNKGSDSRTRRHSTNSKGASTTAQL +>sequence85 +YYPTEDKYFICEIAIAAVSLKNGVEDVFHRIVKPGKLPLGYYGGALTHSKETHQMLELVQDEPYENNTREVFNEMTSFLKLWRGKGSDSIVYADEKTHEMITKVIDNFCQEFNYPDEIKVYNFQYLFFALRNSVAARTVWPTETYSSTELEKDLYSYTPDISCEFHEMSDISVYCSKSIVTRYCYTLCDHCCTDLNIQLVAGFHVPKNSRI +>sequence86 +ISAEVKSKFSKESLDVKVGKEVYLTSSVSVPRLEFCINPLKVKLSDEFYSKLNNVETHGELIKVFKEYGEFYPKRYILGGMITNHETQKFTTIENLESKLLSLSAGVNAAIGPVKVGGSVGGESATDEKKSKQNEENSSKKDVIGGDPSKTGSEWVSSLSDINNWGIIGIDVYPIMDLIKKNDNTLYKKLEKIKNS +>sequence87 +KTSTEAFVPAEIALIKYNLELGVLDKLHELINPVRLPLGLAHEALTYSEQTHELPTPPNAMGETDFYTVLQKILSFTDYNSKPHKKLAIMTDAKEVPVIESLLSQLNDDVKLEYQFLVIPLGEFFFHLKRATEKYGLDICTFPTKTVADILLKKDAYEYTSGIACDFHEKLGNQRFCALSKVVRWSYIISDNCCLDLSIDLIAGRHLPSNADT +>sequence88 +DDFVNFLHGSDPVKIELNRLQNEVIDKNRELVDAQAEIKALKLTDRIKEKALEELTEELRKMVEKFQASEAALENKNLEIKRVVDEKKAALAAQFAAEATLRRVHAAQKDEELPPLEAILSPLEAEIKQLRQEVSKLQDDNRALERLTKSKEAALLEAERDVQSAYFKASLVDELQNRNQELMKQIEISLEENKILDKINRQKIAEVEKLGQTVRDLEEALLSGAAAANAVRDYQRQVSELKGEKRTLERTLAAQWWRNYGGSRAKVVENRVAVVVANEWKDSDGKVMPVKQWLEERRFLMGEMQQLRDKLSIAERTAKTEAQLKEKFQLRLKVVEDGLRSSFNGGVRSSELQNCSNGVSRRLSLGGFENSTKLSSNSFGTKKVPSLTRSSTMSSTSSSALLKHAKGASKSFDGSKSSSEGQSIDGNKSFSNGLDDPCFGNNTDESSMNTINNSGREICCNKQSEFAEPTSTDLVSGILYDMLQKEVIVLRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVASMRADNEHGQRGRRLSGSSKGLLNNAHM +>sequence89 +GFFNAMFEFTGCWQKDASITKSLAFDGWCITLYTVALSKAHIILKDHVKQAVPSTWEPAALARFIKKFGTHIVVGVKMGGKDVIYLKQQHSSSLQAVDVQKRLKEMSDQRFLDANGHSDISLADSYAKDNKVEAREQRLRFVESNPLNSYSSNEELVMMPKRRGGRDKDIISHSEWLNTVQAEPDVISMSFIPITSLLNGVPGCGFLNHAINLY +>sequence90 +SFSASTGYRDFAKEVSKKDTRTYMLKNYCMRYEAGVAQSNHLKWNVTLAFAAGVSQLPDVFDAHNPECACSAEQWRQDQNAEACTKTNVPIWISFIEQFGTHFLVRLFAGGKMTYQVTAKRSEVEKMRNMGIDVKTQLKMQLGGVSGGAGQGTSSKKNQSSSEYQMNVQKETLVIGGRPPGNVSDPAALAAWADTVEELPMPVKFEVQPLYHLLPVEKQEAFKQAVTFY