From 23449b23323afad6fe06df326c1338766af7a7f6 Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Tue, 14 Nov 2017 14:54:34 -0500 Subject: [PATCH 1/5] Moved file for ease of use --- motifsort.fasta | 180 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) create mode 100644 motifsort.fasta diff --git a/motifsort.fasta b/motifsort.fasta new file mode 100644 index 0000000..df0ff18 --- /dev/null +++ b/motifsort.fasta @@ -0,0 +1,180 @@ +>sequence1 +DEFIALMHGSDPVRVELTRLENELRDKERELGEAQTEIRALRLSERAREKAVEELTDELEKMFEKLKLTESLLDSKNLEVKKINDEKKAAMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKQKEAALLDAERTVEIAMAKAAMVDDLQNKNQELMKQIEICHEENKILDKLQRQKVAEVKKLSLTVKELEEAVLRGGATANVVRDYQRQVQEVNDQKKTLECELAAQWWRNYGGARAKVTANRVAVVVANEWKDSNDKVMPVKQWLEERRFLQGEMQQLRDKLAVAERTARSEAQLKEKYQLRLKVLEDGLRGPPSGSSRLPTEGKSFSNGPSRRLSLGGADNMSKLSPNGLLARRSPSFHSRSSLSSSSSLVLKHAKGTSKSFDGGTRSLDRSKINGNGAHLLNRSTDAVRDCETNDSWKGNADEGTIENTNSNTDESNKETANNKSAEMVSGFLYDMLQKEVISLRKACHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQEVKARRLGSSKGTGSSQV +>sequence2 +MSLKPFTYPFPETRFLHSGSSVYKFKIRYGDSIRGEDIENKEVIVQELEDSIRVVLGNLDSLQPFATEHFVVFPYKSKWERVSHLKFKHGEIVLIPYPFVLTLYVAKKPRVZEDELKWFNENLSTGKPIDDSPLGLVPAERKAARAMKKKRKRMELSVSPSRPGLDRAKMRTSSQGPSKKKFLMETSRNMERNTQQKCQETPAFDGTDVQEQGSRWEDNLAGEITPPVQQSNPPPPAGPTDLGTSGFFGF +>sequence3 +GAFNAMFDYHGCWHKDAAATGSLCFDGRFIELYAVEAPRAHLALLDRVKRDVPPFWDPAALAEFIDKYGTHVIAGVKMGGKDVVCIKQLKGSNLTQSDVQSRLKKLSDDKLAQDSPESLTARDDKFLLGLNGSLLLGPGSAAWRSFRPSVVSHKDDILSIHIRRGGVDNGQGHSNWLSTISGSPDVISMAFVPITSLLTGVRGCGFLNHAVNLY +>sequence4 +GLFNSCFDFGSDSWASDAGDTRCLAFDGYFISLLDLRLDCRPLALAGHVVADVPAAWDPSAIASFIEKYGTHIIVGLSMGGQDVVYVKQDKSSPLSPSVIKEHLDKLGDQLFTGTCTLPPSHCKSRDHKFKVPEAFNVFDAQMTRQRIEGMTAPMSCKEGVTVIYSKRGGDTAASNHSEWLPTVPLMPDAINFKLVPITSLLKGVAGVGFLSHAINLY +>sequence5 +KAITNDIYIPAEFAACEFSLKSGKSSLYSSHINPGQLIFGQGSDTLHHTSNTHQLPLPPNALGEANIGKLYVSIVEYLRGCQDGAGQPNEPLVVFTSTELVPVVRGCFRYLESDSDELQENIEVYDIQYLFYVLKKEVMDIADLPNEHINKSITDNLFVNDFFEYHSGISCQFHEDNDRGKYCTQSKVARWCYMFSDYMCGDLAIKPLPGKHMPPKQEP +>sequence6 +KLAEYSMEKTKNDKFSFASQSTSCVFYRSYRLSSSPTLSQEFRKAVRGLPKTYSPENKLKFYRLIDTFGTHYITKVKLGGEVQSVTSIRQCQASLQGLSTEEVQMCLEAEASATIKATVKTELKHCKKDTEKMESKSSFSSLFNDRFTEIKGGQTTEPDLLFSSDKDPSAYKEWLNTLPLIPDIISYSLNSLHELLPTSCPVRKDLRSAIRHY +>sequence7 +NPFSASIPYKGYFTDLEIKKRKYIVAENTCLHSYATYSLRESIKNINSDFLLDTENLPILSKSITEKTCSKLIYMYNSKNDQCIKFIKPWIDFFRKYGTHVIVSAHFGGKTINTLEVPIHKFEELKIYNYKYPIENNRYLNVFKDRLLLQKILKIEKGEYAYRGGSQDNYMEDEQAEKNNDNLEKKANDVLNKYENSTSNKINLDIKGGTKLNEDWKQLTYEKWRNSIYTNIAPIYLDLFSLSSFMHIEKKESYNNALLYY +>sequence8 +YSFSASAGYKNALKKLKIQNSIIFMMKIYCLRYYTGISTTTNTWEFTNNFRNALNKLPNTFDGLKEDNECTYEYYITKSHSPQCEKNVNKWMTFFKLHGTHVAHEMYLGGKIIIKVNIEKEEYNKMKETNLDMKTVFDFYFHKMGLSARKNRRIQKFINKMHGSKTVSILGGHPGLNIDDPSFFEKWINSIDKNSMPIRTKLLPFSFFMDDPNMIKAYNDALMFY +>sequence9 +QFSEKIFPIEIGISSYSLKENKEIASYHKLLYPGKFKNVFARTQMIHGIDARDPRLEQNYSLVCIELIKYIEQFPGLAFFVSKEESLAGDKKCIDEIFLRGNVPIPKQIRFITHIQLFDYWCSIQHIELHEKSSFILNHIFKQLECAERCEYHKKINQKYHCALSDARHTSLMELICMKSYGATIIGSDTLPSVKFV +>sequence10 +AKYSKSVKKLRRVSGKSYSFVRAKAQLELAQYMLKSNDLMLHPEFLHRLRALPLSYVYGEYRQIFQDYGTHYITEAALGGEFEYTIILNKERLEQSDYTLEDYKKCAQAGLKVGANIYGVYVSAGVHGGSCNGLLNEMGENTARGSMVEDFVSVVRGGTSESITALLSKKLPTPELMRLWGEGVQFNPDFIRRTTQPLYELVTSRDFSQASTLKRNLKRALSEY +>sequence11 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRAKIGISSQSPSKKKPLMETRRNREGKTHQEWQETPAFNITDVQEQDSKSEDSPAGQIIPPLQQNNPLPPKGPTELATGGFFGF +>sequence12 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIQDKEVIVQELEDSIRVVLGNLDNLQPFATEHFVVFPYKSRWERVAHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWIHEDLSPGKPVNDCPLGLVLPERTAAGAMLRKRKRGQVPSSPGRPGLDRTGKEKPSRNGRRLQRLISPMSRTRVGSGNREGCQGRLSHQCRRTIHLHLKDPQSWEPVASLGF +>sequence13 +VLGGSRSDLAKFARSQHSVDKATFAIHEISCTYYSYRLADHPQLSAEFTKYLRRLPQRVQTKQDRGPYRRLIDTYGTHYIHQVQLGGKVRRITAFRTCLATLKGFAETDVKTCLNAELRMSLGFLPANVTLSNKCDNLLKGNMSMGFYQGFMTHKIEVIGGERYFPDILYQQDPSEAYDSWMNSLHDNPDVISYAIFPLHQLVPDSQIAANLRDAITEY +>sequence14 +YSKNKSVQRLRQYSETKDKTYMRVSGTVQLASFQMRTRGAMLSPTFIEDIKSLPRDYDKAEYFSILEMYGTHYTVSGTVGGKYDLVYVLDSIVMKSLDITTEDVTDCLKLNAGANIGGTENGAKVDVNPNVKTDICNKGGGETETEPRRTQKPVIESIISFVDGGSVEYVTALEEKLNKKEPVADVDDYIQWASSLKDSPTVINSKSNPIISLIPTDIKDAYIKTRNLERAIEEY +>sequence15 +GSQESEFFHNVTHYKSTDLGFVRLWSKVETAHFKMRSDKLMLHEDFYISLMDLPEQYDFGMYSRFFNTYGTHYVTQGTMGGTLEYALVLNKTKMAESKLQGEQAGRCFSASIGLSYPIGQGASVDLKLGVNPCSKDGTFNQGSDASSVMVEDIITLVKGGILDSTSGVMVVRNPETYRTWGASLKYNPTLIEHEIMPIYELVRFSTAADHVGARLANLRRAWDEY +>sequence16 +TDICTVPVEICIKPTLLNGTINIECFQTIINQPIPIQHFLNSKHYTDFEHGISQENNPVPQTDFDFLWKKINTFIKSNMSKYSDSSMLPIIICTPFISSVQCVEFLASQAKVSDVRRSIFNTMFSVDDFVECVNRFKEIIPNTNAIYNFYKPLVCWTCNNDFKCDFHKSNGTRTFCCSKTNSEYLASTLCDLYKTIKSKIFVASMPSQV +>sequence17 +DRRKFHKTVTESRAHRLIILKNKVELAQFQNTAPEYLTLAEGFWRALSSLPTTYDYAAYRQLFQTYGTHYFSEGSLGGEYQALLELTQHALATTSTTSREYERCWRKVKRRFLRKKVKTVCEKLTSSTAASYVTPWSPGTSMRNVPIKVDVVGGNPGLKRFLSILDLENPEENGRKYDDWASSVKDFPQIIEQKVRPLYELVKEVECAGLKKLHMKQALEEY +>sequence18 +VNYEHKLENKSLNKLLTKNNLSIKKINCSIHTSGMIISYQWKLKKSISILLNDIQNKLVKDSGHTSNSNPQKNQKNIEKDWYNIFNTYGTHVLTKITLGGKIIEINAVEGGQNITENTSIFGSKLDINFFKMSLNSNSKDKLHDLDKNKSEKIIILGGNAMTTDRKTTNNNGEINYDKKLDKQKWVETIKYNPVPIKFELTPLSYFIYQNFSDENLVNSFHYF +>sequence19 +HGDMPSLCEQRYVPCEIACVRYSLREGILGSFHDFIDPGELPRGFRYHCQSGSASTHQIPISGFELANSDYHNMFRKLCSFVCPTPCPVVPVYTKANDIYRVDWCLQWLANKAGMENHFRVQEVETLIIKFYQDKLQEEPSRPTVSRLLDVVQWDYSSNTRCKWHEDNDMWCCALASCKKIAYCISKALASVYGVTLTPAHLPNPERS +>sequence20 +VSLSGEYVPAELAIIKYSLNDGVMDSLNVLINPTDLPLGMALDAKTHSSSTHQLPVPPDALGEANYEKILRQILKFFKNTSGSKVVPPIFTWNKDIPMVDSILRGILEATDLDYVKFSILPLIDFFYNLKLATEDYGLDIKTFPSIHLAKALLEKDVYAYTAGIACDVHEQLNNQVACALSRVVRWAYVISDSCCLDVGIEMEKGRHLPHNMTT +>sequence21 +FSFSASTGYKNFVKSTATNKVRTYITKTYCLRYVGGIVDYHSLDTTDEFKKAVEALPDKFDSHSCTIETFKSNEDDSICAETVLPWMQFIKMFGTHFTTIVHLGGKITHQVQIDKSDVLHMQQNGINVDAAVKASISPVMVDSLQGGFASTSEKASLSQSNNLKYDKQVLVIGGDGLVDSKNANSLNNWAKELYKRPMPIKIKLESIKSLLGKKRELFDEALKFY +>sequence22 +LAHSDPIVLEFNRLQNQLKEKDRELGVASSEIKALRATIVLKDKALEQFRNEVNKLDERLGVIENLLKQKDLEIKKLTSEKKDALAAQFAAEAALRRVHANQKDDDTVPIEDVIAPLEADIKMYKIEIGRLQEDNKALERHIKSKESALLEAERILRSALERALIVEEVQNQNFELKRQIEICQEENKILDKTNRQKVLEVEKLSQTIQELEEAILAGGVAANAIRDYRRQISELNEEKRTLERELAAQWWRNYGGARVKVSANRVATVVANEWKDENDKVMPIKQWLEERRLLQAETQRLRDKLAISERTAKAEAQLKDKLKLRLKTLEEGLKQVSSFSENPYLSCRSPKPEKSNHILGFLSGNGGLKRRSTSQPRGSCIGKTSPLMPPNVENGAADAAGELKGVNSLKKKYASGENMLRKSLWASRSKVADIGGKENTEMKSNTDMHIDKFNNDTAVSADAKIKGGAKEETQNVGSAGFDSEDMVSAFLYDRLQREVINLRKSCEVKNNTLTAKDDEIKMLMRKVDALSKAIEVESKKIKREAAAREKEAISTKADENKKIRNTDSSKRRVA +>sequence23 +SHSRSSQFASSHSRKDKFSFTTHNLKCSYYTFRIHSRPPLSKEFEESLKNLPSTYDHKNTSAFTQFLSVYGTHFIRRVRLGGHVNSITAIRTCQASMSQMSVQTVSNCLSVEAQANIKGVTVSAATQFCKTKSSKLKTGATFRQAFSDRSIEVLGGDGDVGDVLFNSNGVAGFKKWLASIKRVPGLVWYQISPLHLLVPDNPVLQETLSKAISHY +>sequence24 +SSDPIVLELNRLENHLKDNDRELGIAHAEIKALKVTERLKEKAVEELNDDLKKLDEKLRFTENLLEDKNLEIKKLVSERRDALAAQFAAEATLRRVHANQKDEDYIPLDAVLAPMESEIRMCRNEISVLQEDKKALERLTKSKELALLETERMLKIAIERALLVEDLQNQNLELKRQIDICQEENRILDKANRQKVAEVEKLSQTIHELEESILAGGAAANAVRDYQRQILEMNEERRTLERELAAQWWRNYGGARVKILANRVATVVANEWKDDNDKVMPVKQWLEERKVLQGEIQRLRDKLNVSERTAKAESQLKDKFKLRLKTLEEGLKQVTTSSPNTEGSHLKQTVKPEPVLGYLSSNMGPRKRSQSQPRASFNAEQSTVQQRPNVTSENSNSNRTLEHVNSLKYKYISGKNLVKKNLWAPRNKLVDDVGKENSERKEDVGLEEFASVGPEVSKDFSAEAHSMQSTPEKDDLNVDCEDIVSGFLYDKLQKEVLNLRKSSQEKDGLLTAKDEEIKMLVKKIDTLTKAMETELKKMRRESASKERELTPRRVQKDPLHKSSTMIISKRAVKSV +>sequence25 +KAIGNDIYMPAEFAACKFSLRSGRGPVYSSHINPGQLIFGQASDAQHHTSTTHQLPLPPKAMGESNMGSLYVNIVKYLRDCQGAGNPLVVFTTAELMPVVSGCFRYLQSDSDEVGEQIHVYDILYLFYVLKKEVMDIADLPHANINKCITDNFFFNDFFEYYSNIACQFHEDNDRGKYCTHSMVSRWCYTFCDYMCGDLAIKPLAGKHMPPVQEQ +>sequence26 +ASFSASADFKQMKDTLSQKDTQCIQSHATCTAFDLSFYNDINSLPLLSLQLVDKIQQLYSYSNYTNEKEYYYDFFDSWGTHVATSVRLGSLFGYQFKMSSSSVQQQSSLGFDASVGASLYGVKGKVSTSYAQQQLNSFQQSLKSWSSYSLGATPNANLDAAQWATQTLDTPMPIKTELTPIYTFISQYQNNADIPLNSTTMAYVVNAMQNY +>sequence27 +FSASATNEFSDSSLRKSENEFSRCQQSFDLWSISIPADIARLQNYVSDDFIKLINAINPESKDSIATVFNVYGSHVLMSGVMGGKAHVSASANKLTLTQKFEMSTIVQAKYEQLTSQLSVEDKLKYSEAFDSFSESGSYTYDILGGSPSLGALVFKNNSQGSSDDNLKNWIQSISSMPVLTKFIDQTSLMPVWLLCEDKTKADALKKY +>sequence28 +EDIIHLLHGSDPIKVELNRLENEVRDKDRELCEAHAEIKALRQTERLKEKAVEELFDEREKLQEKLKAMEIALENKNLDLKRTNDERKSALAAQAAAEATLRRLHASQKDEDLLPLEAILAPVEAELKSTRNDFLKLQDDNKALDRLTKSKEAALLEAERAVQIAEAKASLVDDLQNRNQELLKQIEICQEENKILDKMHRQKVAEVEKLSTTVAELEEALLAGGAAANAARDFERQVHHLMEEKRTLERELAAQWWRNYGGARAKVTANRVAVVVANEWKDANDKVMPVKQWLEERRFMQGEMQQLRDKLATTERTARSEAQLKEKLQVRLKVLEEGLRTSTNGSTRKHDDFLRSGTNGASVRRQSTGGSDIGNGVARRRPSMSSASQMRGSVSGSTILKNGKFGSKAFDGSKSLDAGRFKAYANGCEEPRKVSSAASGAGGGGGGGGGGGDVKPEAGKVEGATVAAADDNVSVLFYDMLQKEVVTLRKLGHEKDQSLKDKDDAIEMLSKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQENRARRLSIAKGSVNSSHA +>sequence29 +GSFSASTGYKKFINEVSKRTSKTYFIKSNCIKYTIGLPPYVPWEQTTAYMNAVGILPKEFTGLNEDSCAPDVYEQKKMTKQCKNVHQWIQFFKTYGTHIIVEAQLGGKITKIINVSNTAVNQMKKDGVSVKAQIQAQFGFASVGGSTSVSSDNSSKNDNSSYDMSEKLVVIGGNPIKDVTKEENLYEWSKTVSSNPMPIHIKLLPIYKSFDSEELKESYEQAVLYY +>sequence30 +KYNINRLLCYPAEIAITTFNMKEGIIYSDSKFVEFDERWAFGQDERDHRTMSERVNENEDLDELMHQLSSTIGIDHLSTDHNPESPFGVFEWLRSRIDIYPYAKILVDMNQFRFVYNGLKNIAKYHGFTGQTYFNENIKFNMVSIQDFTDVLLDYCSLLVARRWSDQDINNQYLRPNLVPNRDKNTICEYHETVPCPTRYNCMKAHNSRLVHHFFTIMKAHRLQNFRYSPPVHEPCIEDM +>sequence31 +NGKFSTENQRMKIHQVKDSSVTTRVQIRNFIYKVKVFPDFSLDVRFAQQAKEIADAIENNQSRYADYLSERMVMDFGTHVITSVDAGASLVEEDYLNSKYVSDNVSQSSSISAQAGLNFFDKLKFDISSHNSQQSSTLQGYQSNIRYSLIQSHGGGIPFYPGMTLQKWQESTRNNLVAIDRSGLPLQYFISPNMLPDLPQPTVRKVSHLVRSAIERY +>sequence32 +ISLGINHELDQFHQEITQNNKAVSVSQSYWAQYSLTTAPAFLMPLNPMFKQSLDALNRMAKEPTTDTQQTIYNQVINSFGTHYVTSAIMGGAAKIYTTLDQNYLKTVDIEQTKTQIGINFSYNVFQFKFGFNSTDLAQKLDENFKKNSNDIIIFSPEVDHISDPKAWSTWESTVPEKPQPVNTTVSYISDLAYEFPEVQAHLRKTIEFY +>sequence33 +KAVNTDIYIPAEFSACEFSLKTGVNSLYSTMIDPSQLIFGQTCDAMLYAAATHQLPLPPAALGESKMTKLYHSIQDYLRSRLERTDKNLKSLVVFTKTDDIDMVKSCFRFIKSGYHDEQSKRYDDDNDEENDQFKFFEAAASKFLPIVVYDIQYLFLALKLAAMDIGGLTLPKPNLYITDAFFSRDFYEFQDGIACWFHEDMDRSKYCTQSKVKRWAYTFCDYMCADLAIKMQPGKHMPPSYKA +>sequence34 +KALTGDIYVPAEFSACRYSLKGGISSNYSTMINPGHIIYGQSRDAQDHSKTTHKLPLPPQAFGETNMGKLYIDIFNWLSVRNEEKLDQDPVIVYTTPELMPVVKSCFRYLASEAEIDEDERKIMVFDIHHLFYTLKKSVLDVAGVTNDRINFHVTNNFFVKDFFEYTEGISCDYHEKIDRSKYCTNSMVKRWGFTFSDYMCADLAIPLQPGKHIPLKVKP +>sequence35 +MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSVRAEEITDKEVIIQELEDSIRAVLGNLDNLQPFITEHFIVFPYKSKWERVSHLKFKHEEVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLPPGKPINDSPLGSAVAEKKAAGDAGKKRKLVEEHGSPRGTALPRSVAEGKAESQSTEATLKKDQNRKKTQQETWKTVTSDTTDVQTQDSKRGHNLPGAMVPALQQSSSPPPQEPGTRSFFGF +>sequence36 +NSFTGSLEYKNALMNFKSKRQKIYNKTEQCVRYQVGIPLNLKWGYTEYFNRTLSRLPILSSKVIKNCNIDNKLNLSDEECKSIKPWIKFFEVFGTHFNNQLTLGGKINQTMVFDSSTLEELKKKGIDIEAEVRTELGSGNVKLNLDMGGKKSRLDEIGQKKMSVLGGKMPNFPMDDNEFAHWAETVAENPMPIGVVSTSLKTLMHPAMHQSYDQALHQY +>sequence37 +VNGKFSTEFQRMKTLQVKDQAVTTRVQVRNRIYTVKTTPTSELSLGFTKALMDICDQLEKNQTKMATYLAELLILNYGTHVITSVDAGAALVQEDHVRSSFLLDNQNSQNTVTASAGIAFLNIVNFKVETDYISQTSLTKDYLSNRTNSRVQSFGGVPFYPGITLETWQKGITNHLVAIDRAGLPLHFFIKPDKLPGLPGPLVKKLSKTVETAVRHY +>sequence38 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRTGKEKPIRNGKRPQHLISLMSRNRILSQRTAQQGRSFPHCSKTIHFHLKDPQSWQLVASLGF +>sequence39 +SSHNSAFKQAIQASHKKDSSFIRIHKVIKVLNFTMKTKDLQLSDVFLKALNHLPLEYNAALYSRIFDDFGTHYFTSGSLGGVYDLLYQFSNEELKNSGLTQEEAKNCIRIETKKRYFIVTKTKVEHRCTTNRMSEKYEGSFLQGSEKSISLVKGGRSEYAAALAWEKGSSGPGEKTYSEWLESVKENPAVIDFELAPITDLVRNIPCAVTRRNNLRRAFREY +>sequence40 +EDLLNLLHGSDPVKVELNRLENEVRDKDRKLAEATAECKVLKQRERLREKAVEELAEELDKVDEKLKAAEDLLESKNLELKKLNDEKKAALAAQFAAEATLRRVHAAQKDEQLPSIEEILSPLEAELKIARQEIAKLQDTNRALDRLTKSKEAALLEIERAIDAAEAKASQVDDLLNRNQELMKQIEICQEENKIMDKMHRQKTAEIEKLSSTVAELEEAVLAGGAAVNAARDYQRQAHELLEGKKTLERELAAQWWRNYGGARAKITANRVAVVVANEWKDANDKVMPVKQWLDERRFMQGEMQQLRDKLASAERTAKNESQLKDKFQMRLKVLEESLKPVTNGAPRRTEEVRSSSTTRRSTSGSEEASKLLANGSRRQRSAVTQVRASMASQTLMRATNGRMTSKSFDGGRSLDAGTTRLRAFSNGFEEVPVKPDSVEAKSEVEAVKSENGTTNQVSGSSSSVEDPVSGVLYDLLQKEVVNLRKASYEKDQSLKDKDDAIEMLSKKVDTLSKALEVEGKKMRREVQAMEKEVATLRAEKDQTRNPRRLSSGTGTVNSSSK +>sequence41 +YSGYNNDEYTHDDMLHNLNKHNKLLIKSYKCIVYKANLTSLNFLKNKNNDEIGLNFNGMLILNVLKKLNKNCNSEFDNQKCPISMFRNDPFDANCIRCIMPWMEFFKDYGTFMTKEITMGGVINKFYNIKKYEGSMRKEYKKKTIKQSSTFFHLSKSRSESLNEKKSGETNKEELEELYTLTIGPEPPGNVSNSKVISDWLEKVVHNPTPIDLELVPIKQIIPEKYLKIYENALKYY +>sequence42 +NEFITLLHGSDPVKVELNRLENEVRDKDRELGEAQAEIKALRLSERLREKAVEELTDELSKVEEKLKLTESLLESKNLEIKKINDEKKASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNKALDRLTKSKEAALLEAERTVQVALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNEERKTLDRELAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERRFLQGEMQQLRDKLAITERAAKSEAQLKEKYHLRLKVLEESLRGSSSNTRSMPEGRSTSNGPSRRQSLGGADNFSKFTSNGFLSKRTPTSQLRSSLSSNSVLKHAKGTSKSFDGGTRSLDRGSRALLNGSSPNCSFNQPCDETKDTEAANMWKGNSDEKPVEFPVTETEDTVPGVLYDLLQKEVVALRKAGHEKDQSLKDKDDAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEHENRAKRFGSSKGPVGAAQL +>sequence43 +SGSRESAFLNKLSKYNEKKYSFIRIFTKVQTASFKMRRDNIMLDEVMLQSLMELPEQYNYGMYAKFIDDYGTHYITSGSMGGVYEYILVLNKENMTKSGVTSDDVTSCFGGSFGIDYDYTDNLQITGSLSGKHCKKLGGGHREDEESNMAVEDIISRVRGGSSGWGGGLTQNGSIITYRAWGRSLKYNPAVIDFEMKPIYEILRHTNLGPLEAKCQNLRRALDQY +>sequence44 +KALTTDVYVPAEFSASEYSFNEGIMSVYSTLIDPGQIIFGQGSDAQHHSSTTHNLPLPPNALGEKNMGKLYRNILEYLSKIQEGKDATKPFVVFTKTDMVPVVKSCFRYLACENQDGSYENGDQIQVLDIQYLLFILKKEVLDIAGVSDEKINLYVTDAYFLKDFFEFTPEISCQYHEENDRSKYCTQSLVMRWAYTFSDYMCSDLAISVQPGKHIPPKTKP +>sequence45 +SVAGSHSKVANFAAEKTYQDQYNFNSDTVECRMYSFRLVQKPPLHLDFKKALRALPRNFNSSTEHAYHRLISSYGTHFITAVDLGGRISVLTALRTCQLTLNGLTADEVGDCLNVEAQVSIGAQASVSSEYKACEEKKKQHKMATSFHQTYRERHVEVLGGPLDSTHDLLFGNQATPEQFSTWTASLPSNPGLVDYSLEPLHTLLEEQNPKREALRQAISHY +>sequence46 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIENKEVIIQELEDSIRVVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEIILIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKPISDSPLGLVPAEKKAVGAVMRKRKHMDEPSSPSRPGLDRTGKEKPNKDCRRLWPLISLVSRNKILSGGTACQGQLSHPCSTTHLHLRSEQPAASLGF +>sequence47 +KALTTDVYVPAEFAACEYSLKEGIRSIYSTMIDPGQIIFGQGSDALLHSSTTHDLPLPPNALGEKNMTKLYRNIVDYLSKCQGKGKTLVVFTPAENITMVKSCFRYLECDDDFRDGGEKIQVFDIQYLLFILKKEVMNVADLNDEKINKFATDAFFKKDFFEFTAGIACQYHEDNDRTKYCTQSMVTRWAYTFTDFMCGDLAITVQPGKHIPAQTK +>sequence48 +AKFSLSTNYSEISDLLKNNDNKLYVDKSYCFLLEAALPIHNSLKMTRSFATAMSKLTRDFKKHTKDCNAIKYSINKNNKDCKEIKNWMELFDQFGTHFSYNIKLGGRITFITQEEGSKDERGNEKSVDVGVGGKFEKDNKGVGIEGNVKFVFGNKRGESKNLSFKYTNILGGLPVSDISKESEYVKWIKSVYKYPMPIRTQFAPISKIFKSKALKDSYDEAFRFY +>sequence49 +GSFSASVGYASASNTISKKKFRMFILKSYCFKYVASLSQYSQWKLSDQFLRAINLLPSYFNSLEHDGKYCNAEELRDNKTGMDSCGKSVESWLYFFKNFGTHVSTVIHLGGKITQQVKISKNEYKSLSESGLSTSVSASVGFGLFKANASSSTDSKESSNEESSNSSIEKETVIIGGTTIYDPNDPSNFEKWADSIKNNPMPIKGQYEPLSRILPERLTKIYDEALSFY +>sequence50 +NIDGECMLAEMAMNEFSLFSGIVEKFHAIVGPWMPESESHRRRASRHALETHRIPLQNNFATITKKRLVEEILGRVEPSIACHQGVKVGLYSDACNEKTKIDLNIKNNFKDPGMLCDKNDRRFILVLQSELDLMVDSMKHLANNVGFHYDGFPVTPNCFVIVEAFVEAISDIMNEKIDVETMRWFSLLGQKVDAEDSVSPWETGTDFHCARHSEPKSNFCASVTVGRTCCIVYHVIGSFFRRYHLKKIPTAHQPSSSNS +>sequence51 +KALNGDVYQPAELSACRFSLKGGISSNYSTMINPGHIIFGQTSDAQDHSRTTHKLPLPPNAMGEKNLGNLYSDTLKWLSASNDEEDEQYDHPVIVYTTPELMPVVKSCFRYLACEGDTDKHAKKIIVYDICYLFLTLKKTVLDLVGVPSDHMNIHVTNSFFRRDFFEFSSGIACDYHEEVDRTKYCTKSMVLRWGYMISHYICGDLAIPLQPRKHVPIEVKH +>sequence52 +RTNTGVHLPAELAVVRYSLEGGVKDKLHMFINPGRLPIGMAYDAQRHAEEDHQLPLPPNAMGVSDYGDVAMRLFSFLLQNDDMPLLFTDETDVPRVESMLEHILSDHLSEIELRICPLAELFFRLKQNVELYMMDQTTFPSVYIAQQIITKDVYDYTKGISCDYHEEKDNVLYCPLSRCIRWAYIISDNCCQDMGIEPIPGKHVPLNANT +>sequence53 +FSGSLTCEFVKKSTQHAKNTVTCSTAAHSLYTLKEDDSSNPSEKRLDSCFRNWIENKLSANSPDSWSAFIQKFGTHYIASATFGGIGFQVLKLSFEQVEDLHSKKISLETAAANSLLKGSVSSSTESGYSSYSSTSSSHTVFLGGTVLPSVHDERLDFKDWSESVHLEPVPIQVSLQPITNLLVPLHFPNIGAAELSNKRESLQQAIRVY +>sequence54 +WAFTASSEFNHMQQKIEQTSATFVISMATCQIAQITQVPELAEFHQSFIDQLSALPVEYSAPQYLEFLSNFGTHYATDIILGSKVGYVYTLPPAIVDDFDQKKFKEIDLKQAATITSALLKGVIGQQILPKEQEAKAYSDVSKLSTQSFTIEIGPQSTENTPKDWLRETELEPTPIRYTLKSISELVSEGKGQLSSVKEYQKIGQNLKKALTDY +>sequence55 +DLMNHFNGSDPVRLELTRLENEVRDKSRVLAEAQAEIKSLRLSDRQKQKAVDELSDKLEKVDEKLKGTLILLDNKNLEMKKLNDERKAALAAQTAAEATLRRVHASQKDNDMPSLEVILAPLEAELKIARDSAVVVVTLQISKLQETNRALDRLTKSKEAALIESERVIKAAEAKASMVDDLQNRNQELLKQIEICQEENKILDKMHRSKVNEVEKLSATVRDLEEAVLAGGAAVNAARDYQRQVHELMEIKRTLERELAAQWWRNYGGARAKISANRVATVVANDWKDESEKVMPVKQWLEERRFLQGEMQQLREKLASAERTCKSEAQLKEKVQLRLKVLEEGLKSGNGTVRRGAGAGGTVEAKRSSSVTSNGSVRKGSGSEEGAKVLANGSRARRSAVSQLRAMGGPLVKNGRLTSKSFDGGGGGRSSSGGSYDAGGMAALKPFTNGFEELRAGIKTESRSCSGEAAGDAGEGAGDTVSGVLYDMLQKEVISLRRASQEKDQSLKDKDNAIEMLSKKVDTLGKAMEVEAKKMRREVTVMEKEVASMRVDKDQERRMRRLSMMKEPVNSSQR +>sequence56 +VTSGGTYIPAEMGLVRYSLKDGVMDKLHMFIDPGKLPLGMAYDAKQHSESDHQLPIPPDAKGEKDNDEIILKLFSFLSQQEKMPPLFTETNDIRMVENILKGILNQGSMDENTLLVCPLSELFYQLKRATESFGLDIKTFPSVHIAQAIIQKDVYEYTKDISCEFHEDQGNGKYCPLSRCVRWAYIISDSCCLDLSIEMKPGRHLPMNADT +>sequence57 +KSMTNDIYIPAEFSACQFSLKSGICSMYSSHIDPGQLIFGQGSETMHHTKHTHQLPLPPNAMGESDIGRLYANIVEYLRACNPDAKPNDPLVVFATPEFMPIVKGCFRYLESDSEEPLATIHIYDIQYLLYVLKLEVLDSVDIRNVTVNRTATDSLFINDYFCYHLGISCQYHEDIDRCQYCTQSIISRWCYVFSDFMCGDLAITPLPGKHMPPKQEP +>sequence58 +SSSRSYTSHTNEIHKGKSYQLLVVENTVEVAQFINNNPEFLQLAEPFWKELSHLPSLYDYSAYRRLIDQYGTHYLQSGSLGGEYRVLFYVDSEKLKQNDFNSVEEKKCKSSGWHFVVKFSSHGCKELENALKAASGTQNNVLRGEPFIRGGGAGFISGLSYLELDNPAGNKRRYSAWAESVTNLPQVIKQKLTPLYELVKEVPCASVKKLYLKWALEEY +>sequence59 +MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSIRTVATLLKSFQIYLFQDSIRAVLENLDNLQPFVTEHFIVFPYKSKWERVSHLKFKHEDVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGRPINDSPLGLVVVEKKAAGASKKQKRKLVEQHSSPGGARQPRDKMRSSSQRPSTKKPPMGTRRNRERKPQQERQKTVASDTTDVQEQHSKWGHNLPGAIVPPLQQNNSPPPKELGIRSFFGF +>sequence60 +KDACTPAELAVVQFTLKHGMRNIYHTLINPDGSQYATQEHVRATHQYPNALGNDDLEGILADLLEFVRLECGPEAELSPMFTLESQISVVNNALEFLNGGVASQLKVHPIEYLFYVLKKATCAAGILPPPASFHITNAQFNLDPHEFLSDIGCEFHKQRDLTAHCAKSYVTRWAFAFADYMCSDLAIKMLPNRHMPNRLDT +>sequence61 +EDVINLLHGSDPVRVELNRLENEVRDKDRELGDAHAEIKALKYSERLKEKAVEELTDELQKVDGKLKATEALLESKNLEIKKINDERKAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARLEAAKLQDDNRALDRLTKSKETALLEAERTVEIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVKKLTQTVCELEEAVLAGGAAANAVRDYRRKVQEMNDERKILDRELAAQWWRNYGGSRAKVTANRVAVVVANEWKDANDKVMPVKQWLEERKFFQGEMQLLRDKLAVAERTAKAEAQLKEKYQLRFKVLEERLRASPSGNLRTTSEGRSISNGPSRRQSLGGAENLSRSASNGFALRRTANSQSGSIRSNSASVLLRNAKISSRSFDGGSRSLDRDKVIPNAARKHEVLTDTNDQIQNAKTIGTHEASTNGNRSEKTKSELDDSVSGVLYDMLQKEVITLRRACHEKDQSLKDKDDAIEMLAKKVDTLNKAMGVEAKKMRREVAAMEKEVAAMRVSKEHDPRARRPSAPRGSQ +>sequence62 +DDFISLFHGSDPVRVELTRLQNELREKDRELGDALAEIKSLRNSERLKEKGVEELTDELIKVDEKLKAAEALLESKNLEIKKINEEKRAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARMEVAKLQDDNRALGRLTKSKEAALLEAERTVQIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMLRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDERKILEREVAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERKFFQGEMQQLRDKLAIAERTAKAEAQMKEKYQLRFKVLEERVKTSNGNSKFTVSDGRNIATGPSRRQSFGGAESLSASSSNGYQSRKTSISRPGSLRSNSANVLLKHAKLSSRSFDGGSRNLERERPTSDANGLDNMPRNSNIQTITSETITTHEESANGTPVKKSKSENEDYVSGMLYDMLQKEVISLRKACHEKDLTLKDKDDAIEMLAKKVDTLSKAMEVEARKMRREVASMEKEVAAMRISKEHDHRARRASAPRGAVNSQSI +>sequence63 +IDLANEPLYREAVKASQQKDSVFYRVHQVIATSTFKVKSSDLYLSDPFLQFLNSLPLEYNYALYRHIFQLFGTHYFSSGTLGGKYDLLFQFDREELKTFGLKESDSEYCLSDDDTLVTFFYNRHKQRNTCGNISMKTKYEGSMVKASERCITSVQGGRTEFAAALAWEKKGVSPQSTVYTDWIKSTIENPVVINYELLPLVNLVRGISCAVTKRRHFHRALEEY +>sequence64 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIEDKEVIVQELEDSIRAVLANMDSLQPFVTEHFIVFPYKSKWERVSHLKFKHGESILTPYPFVFTLYIAKKPRVZEDEMKWFAEDLPSGKPADDIPLELVLAETEAEEATMRKWKRKLMEEPSSPSRQGPHRAKMETSSEASSNKKPLKESKRSTDEEAQQEYQDTPASNAIAVKEQDAALGHGLQGLVVPPLQHSSPPPPKEPGARGFLGF +>sequence65 +HPFNDSNYYKMLVKRINRGDSIIIEKKLCSKYFSFINDINKNDLDTFFLTTLNELGDNYQNIKDDTYKCSLQYYKMNNMNKYSENCLKTITPWISFFNMYGTHVISGVYYGGKIIHNLYFENNNLKKKEYKIRMYKSRLNPFSTINSNLYFGSSLSKEKIIYIRERNLIMDGGVHINPYNINEVNMENKKKNIYVNNVEKNLYDQKKKYRNYYNFYELKDDVRKRNYYNSWKDTIEWEQAKPVKLNLVPLSEFINSEEGKSAYYMALEFY +>sequence66 +MSLKAFTYPLPETRFLHAGSSVYKFKIRYGSSVRGEEIEDKKIVSQELEDSIRAVLGNLDNLQPFTTDHFVIFPYKSKWERVSHLRFKHGAALLEPYPFVCTLYVAKKPRVZEDEMKWAPAGGNGGPTNSAPLHLHKTQKEQDRPGTETSRKKEPPAPPSRGGERRTSLEQSWKELADSPELLLQLTRNWTGESASEKGEAEDSDISFLKDHGSGSSLRHQQKSPPKPSSPPSEGPPKQKHAGFLGF +>sequence67 +CEQRFLPCEIGCVKYSLQEGIMADFHSFINPGEIPRGFRFHCQAASDSSHKIPISNFERGHNQATVLQNLYRFIHPNPGNWPPIYCKSDDRTRVNWCLKHMAKASEIRQDLQLLTVEDLVVGIYQQKFLKEPSKTWIRSLLDVAMWDYSSNTRCKWHEENDILFCALAVCKKIAYCISNSLATLFGIQLTEAHVPLQ +>sequence68 +GKFSEENTRMKIHQVRGNSVTTRVQVRNHLYTVNAYPDFTLDSRFSQQISELADAIENNQTRQAMYLSEKVILEYGTHVITSIDAGATLVQEDYIKRSYVSDTNSERSSVSASAGINFFNMVNFNFGSKETEQTSETLTYQQNITYSLVQSHGGALFYQGITMQKWQESTQNHLVAIDRSGLPIHYFLNPAVFPDLPVPTLHKLAFSVQKAAERY +>sequence69 +EEFINMLHGSDPVRVELCRLENEVRDRDRELSEAQAEIKALRLSERAREKAVEELTEEVNKMDEKLKLTESLLENRNLEIKKINDEKKAALAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKSKEAALLEAERTVQIALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDEMKTLDRELAAQWWRNYGGSRAKVSANRVAVVVANEWKDSNDKVMPVKQWLEERRFMQGEMQQLRDKLVIAERTARSEAQLKEKFQLRLKVLEDGSRMSASGTYRTTIEGKSVSNGPSRRQSLGGADNVPKSVNGFLSKRPSFQMRSSVSSSTVLKHAKGASKSFDGGTRSLDRSKVLLTGAGLSLNRSSDATGDGVTHESWKKIPDEKTNDFPNVDSDDCVSGLLYDMLQKEVITLRKACHEKDQSLKDKDDAIEMLAKKVDMLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEQDNKSKRLGGSKGLANSSQL +>sequence70 +GMFNNMFAFSKCWPKDASSVKTLAYDGWFISLYSVEIVRKQLTLRDEVKREVPSSWDSAALAGFIEKYGTHVVVGVTMGGKDVIHVKQMRKSNHEPEEIQKMLKHWGDERFCVDPVESKSPASVYSGKPKEENLLQWGLQPFGTSVSSAVVMHTKNEEIMRVCIRRGGVDLGQSHERWLSTVSQAPNVISMCFVPITSLLSGLPGTGFLSHAVNLY +>sequence71 +GNFNATFGFQSGSWATDAANVKSLGLDASVVTLFNLHIHNPNRLRLTDRVRNAVPSSWDPQLLARFIERYGTHVITGVSVGGQDVVVVRQDKSSDLDNDLLRHHLYDLGDQLFTGSCLLSTRRLNKAYHHSHSQPKFPEAFNVFDDKQTVAFNNFSINSQNGITVICAKRGGDGRAKSHSEWLITVPDKPDAINFNFIPITSLLKDVPGSGLLSHAMSLY +>sequence72 +KPFSASMPYKSYFADLEIKKKKYALAQNMCVLNYATYDLKESGNNINKDFVLDIEKLPILTKNQMKLCTKVLYMNNNLHCSEGIKSWMKFFEKYGTHVVLSAHFGGMSFNTMEITKRKIEEIKIYKYKYSLWNNPYLNIFKSGSLFQDLSINVDGHKENKKNNSNNNINIDEKKKNDAYIKNDVLIEQYRDNINLEIRGGNNFDEKWRNLTYLVWKNSIYSNIVPIHLDLYSLNTFMPIEKKESYDMALLFY +>sequence73 +EFSAEFMFLNNISKYTNKEMGFVQLMSKIQTSQFKMRSKDLVLDEDMLWALSDLPDHYHFGAYSQFFNEYGTHYVTEGTMGGLMDYVAVVNINEMEENQMTGQMIGSCIGGSFGLVFMEKIKATVKGKSCGKFTSNEKTSDESHSAIKDVFGFVKGGNTASSAGSLGIKDAKSYKDWGKSLKYNPALIEFEILPIYELLRLSTAAEQLSSKLPHVKMAWEEY +>sequence74 +ELPDDMGYMPCEIGVVEYSLQEGITREFHRFIQPGKPPLGYRYLCQSTSDNTHQIPIEGFELAEGDYHRLWTDLCKFTSPNGRDFPPLYVQVTHTSMCEWCLDWLSEMAGEYNRFHVYELDSLVKDLYEHGEGHAPSLSMIASILNTSVFDYEDGSSCEYHASKEVKYCALGAVKRFCFSISDSMAQVYDLQLTARHLPERPEN +>sequence75 +FKFSASAKFKKLQDVSKSGKSKMFINKSYCFKYVAGISTSLKWDFTLGFQSSLGRLSDFKGLEKDSICKPFIYREDPKNENCQELGISDWMELFNTFGTHVATKIYLGGKIFTTLEIKKSQEKKLSDQGLDVRAILSAKIKDTDIDSNVEVSTIKSKNAGDFLLDTKKSTFVLGGDIYGHGKTIEFAEWARSVADHAMPIKAEFTPISHFIDKNLRDAYNKAYLYY +>sequence76 +HPFNSSNYYRMLVERIEKGYSIIIDKKICSRYFVALKNVDSSKLDPFFINMLNDLEKNYKNININKYKCSVHSYKKNKYDQNCLRTITPWITFFNLYGTHLVSEVYYGGKIINILYSEYYNNIYNSEQVQIYKKRLNPFTSGSKLGSFYFGSIISKKQNSTNQKDNDNMLTYIKEKNTIYDGGEDIKEYKDGEGKVLMINGMEDEWEKTINGKYAKPIKLILKPFSDFIKTNDGKVAYYKALEYY +>sequence77 +GRFRASVDYQNMQNDMASGTYQYIVSNSRCSVFQLDLIDSPTYHPQFSNDILLNLQQLALNQNNANNTEANAYYDFFDNWGTHVVTSVDLGSLFGYKFKMLKTDVQSMQNQGIDVSASATLFNVRGRTNTQLEQNSLNSFSQSIQSWTSYSIGATPDLNNDPANWATQTLTKPMPIKSSITPYHEALKIFTQGGNNILSSTQILQLYSKLRMY +>sequence78 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVRELEDSIRVVLGNLDNLQPFTTEHFIIFPYKSKWERVSHLKFKHGEVVLVPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKLINDSPLGLVSAEKKSAEAMMRKRRHTEVPSSPRKSGRFFPHLRAKVETSSEAPSKKKPPMETRRTWNDNEQQETPAFDATDVQEQGPKWGDSLAGQMAPSLQWNNPPPPKGPKELGTTGFFGF +>sequence79 +FLGEIENRFDMSDDKSSKRTNEYISYDINNTLYRITLKGNVPLSEQFQEDLNSLDATTLFEKYGTHYLKSTWIGGRISFSTTIDTYGMTDDMRKKFAFVTKRKVGNWTGTSDVELTREEKDISEKMKSNSIVRVWGGDPKLGRDIERAIQGHTVSDIYQQWGQTVEERPYISDFDHGQGLVPIYELATGTRKEQLKEQWEAY +>sequence80 +DDFINLLHGSDPVKVELNRLENEVKDKDRELGEAQAEIKALKLSERLREKAVEELTDELQKVDEKLKAAGALIESKNLEIKKINDEKKASLAAQYAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELRLARLEGAGSPYQVKGAALLEAERTVQVALAKAALVDDLQNKNQELMKQIEICQEENKILDKLHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKFMEMNEEKKILDRELAAQWWRNYGGARTKVTANRVAVVVANEWKDANDKVMPVRQWLEERRFLQGEMQQLRDKLAIAERTAKSEAQLKERYHLRLKVLEDGLKASPSGHIRPSEVRSVSNGRSRRQSLGGAENFSRLSSNGLSRRTPASSPSNNISTVLKHAKGSSRSFDGGNRLSEKNKVCLNNGVVPNSSLNTAVEEHRRTENSNTCKENQDVKQSDTSKADADDYVSGLLYDMLQKEVIALRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVATMEKEVAAMRVGKGHDLRTKRLSNSKVTSQL +>sequence81 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVQELEDSIRAVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEAVLVPYPFVFTLYVAKKPRVZEDEMKCFHENLSPGKSMNSSPLGLVLAERKTAEAVLKKRKRGEVPSSPARPGLDRAEMGTSSQGLSKKKPPMETRRNRERKTQQECQKTPAFDVTDVQDQDSKWEDSLVGKTIPPSQQNNPPPAEGPTELGTSGFFGF +>sequence82 +GKFSASSDYQEVQDGLNSANIQYIESQARCSIFQLDVYNSPSQNAQLTPQLQQALFTLAFNQTSQNDYYDFIDTWGTHVVTSVNLGSRFGYKYQMDKYQSNQLTQQGVNLSVSASYFSSSGSASGAYNQTQIQNFTQAMTSWSSYSIGATPDANQDPLSWAQQTLDTPMPINISILSFDDFLNKFSFSVNGLTSSQLNTVISNLSQY +>sequence83 +SQSRMTHEVIESAQKIDSKYFKVVNTVELAQFKMRRNGLNPSDIFLRRMKDLPVYYNYLDYSFLIEDFGTHYFSSGSLGGQYEYVYRYSRADLSHSGLTEEEQKSCLSAEAKASFFSFSGSSSGSRCKENALSQRNSGSFTLSASESFSHVKGGSSESAGQLAFANGPNPQKYEAWIQDVKRNPAIISYEITPISELLVGIPYADIKRRNMEKALVEY +>sequence84 +NEFITLLHGSDPVKVELNRLENDVRDKDRELSESQAEIKALRLSERQREKAVEELTEELGKMSEKLKLTENLLDSKNLEIKKINEEKRASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARHEIVKLQDDNRALDRLTKSKEAALLDAERTVQSALAKASMVDDLQNKNQELMKQIEICQEENRILDKLHRQKVAEVEKFTQTVRELEEAVLAGGTAANAVRDYQRKFQEMNEERRILDRELAAQWWRNYGGARAKVSASRVATVVANEWKDGSDKVMPVKQWLEERRFLQGEMQQLRDKLAIADRAAKSEAQLKEKFQLRLRVLEESLRGPSSSGNRSTPEGRSMSNGPSRRQSLGGADIIPKLTSNGFFSKRSPSSQFRSLNASTSTILKHAKGTSRSFDGGSRSLDRSKVLTNEPRSKFPLNQSSEGTSGGGSPNSTKQGDSEKAAGTNNDSVPGVLHDLLQKEVITLRKAANDKDQSLRDKDEAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVSAMRVDNKGSDSRTRRHSTNSKGASTTAQL +>sequence85 +YYPTEDKYFICEIAIAAVSLKNGVEDVFHRIVKPGKLPLGYYGGALTHSKETHQMLELVQDEPYENNTREVFNEMTSFLKLWRGKGSDSIVYADEKTHEMITKVIDNFCQEFNYPDEIKVYNFQYLFFALRNSVAARTVWPTETYSSTELEKDLYSYTPDISCEFHEMSDISVYCSKSIVTRYCYTLCDHCCTDLNIQLVAGFHVPKNSRI +>sequence86 +ISAEVKSKFSKESLDVKVGKEVYLTSSVSVPRLEFCINPLKVKLSDEFYSKLNNVETHGELIKVFKEYGEFYPKRYILGGMITNHETQKFTTIENLESKLLSLSAGVNAAIGPVKVGGSVGGESATDEKKSKQNEENSSKKDVIGGDPSKTGSEWVSSLSDINNWGIIGIDVYPIMDLIKKNDNTLYKKLEKIKNS +>sequence87 +KTSTEAFVPAEIALIKYNLELGVLDKLHELINPVRLPLGLAHEALTYSEQTHELPTPPNAMGETDFYTVLQKILSFTDYNSKPHKKLAIMTDAKEVPVIESLLSQLNDDVKLEYQFLVIPLGEFFFHLKRATEKYGLDICTFPTKTVADILLKKDAYEYTSGIACDFHEKLGNQRFCALSKVVRWSYIISDNCCLDLSIDLIAGRHLPSNADT +>sequence88 +DDFVNFLHGSDPVKIELNRLQNEVIDKNRELVDAQAEIKALKLTDRIKEKALEELTEELRKMVEKFQASEAALENKNLEIKRVVDEKKAALAAQFAAEATLRRVHAAQKDEELPPLEAILSPLEAEIKQLRQEVSKLQDDNRALERLTKSKEAALLEAERDVQSAYFKASLVDELQNRNQELMKQIEISLEENKILDKINRQKIAEVEKLGQTVRDLEEALLSGAAAANAVRDYQRQVSELKGEKRTLERTLAAQWWRNYGGSRAKVVENRVAVVVANEWKDSDGKVMPVKQWLEERRFLMGEMQQLRDKLSIAERTAKTEAQLKEKFQLRLKVVEDGLRSSFNGGVRSSELQNCSNGVSRRLSLGGFENSTKLSSNSFGTKKVPSLTRSSTMSSTSSSALLKHAKGASKSFDGSKSSSEGQSIDGNKSFSNGLDDPCFGNNTDESSMNTINNSGREICCNKQSEFAEPTSTDLVSGILYDMLQKEVIVLRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVASMRADNEHGQRGRRLSGSSKGLLNNAHM +>sequence89 +GFFNAMFEFTGCWQKDASITKSLAFDGWCITLYTVALSKAHIILKDHVKQAVPSTWEPAALARFIKKFGTHIVVGVKMGGKDVIYLKQQHSSSLQAVDVQKRLKEMSDQRFLDANGHSDISLADSYAKDNKVEAREQRLRFVESNPLNSYSSNEELVMMPKRRGGRDKDIISHSEWLNTVQAEPDVISMSFIPITSLLNGVPGCGFLNHAINLY +>sequence90 +SFSASTGYRDFAKEVSKKDTRTYMLKNYCMRYEAGVAQSNHLKWNVTLAFAAGVSQLPDVFDAHNPECACSAEQWRQDQNAEACTKTNVPIWISFIEQFGTHFLVRLFAGGKMTYQVTAKRSEVEKMRNMGIDVKTQLKMQLGGVSGGAGQGTSSKKNQSSSEYQMNVQKETLVIGGRPPGNVSDPAALAAWADTVEELPMPVKFEVQPLYHLLPVEKQEAFKQAVTFY From 77667d1e1ec1f464e9e54bfbf4fd492682f55dab Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Tue, 14 Nov 2017 15:33:56 -0500 Subject: [PATCH 2/5] Almost completed - formatting slightly wrong on fasta files --- ChallengePart2.R | 42 ++++++++ NoMotif.fasta | 265 +++++++++++++++++++++++++++++++++++++++++++++++ motif1.fasta | 45 ++++++++ motif2.fasta | 53 ++++++++++ 4 files changed, 405 insertions(+) create mode 100755 ChallengePart2.R create mode 100755 NoMotif.fasta create mode 100755 motif1.fasta create mode 100755 motif2.fasta diff --git a/ChallengePart2.R b/ChallengePart2.R new file mode 100755 index 0000000..fcc8130 --- /dev/null +++ b/ChallengePart2.R @@ -0,0 +1,42 @@ +rm(list=ls()) + +#load string package +library(stringr) + +#Scan in files +fasta=scan(file="motifsort.fasta",what=character(),sep="\n") + +#Set mofits +motifOne='AKKPRVZE' +motifTwo='AAQWWRNYGG' + +motif1=as.data.frame(matrix(ncol = 1, nrow = 10), sep = "\n") +motif2=as.data.frame(matrix(ncol = 1, nrow = 10), sep = "\n") +NoMotif=as.data.frame(matrix(ncol = 1, nrow = 10), sep = "\n") + +a=2 +b=2 +c=2 + +for (i in 1:length(fasta)){ + if (str_detect(fasta[i],motifOne)==TRUE){ + motif1[a-1,1]=fasta[i-1] + motif1[a,1]=fasta[i] + a=a+2 + } + else if (str_detect(fasta[i],motifTwo)==TRUE){ + motif2[b-1,1]=fasta[i-1] + motif2[b,1]=fasta[i] + b=b+2 + } + else if (str_detect(fasta[i],'sequence')==FALSE){ + NoMotif[c-1,1]=fasta[i-1] + NoMotif[c,1]=fasta[i] + c=c+2 + } +} + +write.table(motif1, file='motif1.fasta', sep = "\n") +write.table(motif2, file='motif2.fasta', sep = "\n") +write.table(NoMotif, file='NoMotif.fasta', sep = "\n") + diff --git a/NoMotif.fasta b/NoMotif.fasta new file mode 100755 index 0000000..cdb2795 --- /dev/null +++ b/NoMotif.fasta @@ -0,0 +1,265 @@ +"V1" +"1" +">sequence3" +"2" +"GAFNAMFDYHGCWHKDAAATGSLCFDGRFIELYAVEAPRAHLALLDRVKRDVPPFWDPAALAEFIDKYGTHVIAGVKMGGKDVVCIKQLKGSNLTQSDVQSRLKKLSDDKLAQDSPESLTARDDKFLLGLNGSLLLGPGSAAWRSFRPSVVSHKDDILSIHIRRGGVDNGQGHSNWLSTISGSPDVISMAFVPITSLLTGVRGCGFLNHAVNLY" +"3" +">sequence4" +"4" +"GLFNSCFDFGSDSWASDAGDTRCLAFDGYFISLLDLRLDCRPLALAGHVVADVPAAWDPSAIASFIEKYGTHIIVGLSMGGQDVVYVKQDKSSPLSPSVIKEHLDKLGDQLFTGTCTLPPSHCKSRDHKFKVPEAFNVFDAQMTRQRIEGMTAPMSCKEGVTVIYSKRGGDTAASNHSEWLPTVPLMPDAINFKLVPITSLLKGVAGVGFLSHAINLY" +"5" +">sequence5" +"6" +"KAITNDIYIPAEFAACEFSLKSGKSSLYSSHINPGQLIFGQGSDTLHHTSNTHQLPLPPNALGEANIGKLYVSIVEYLRGCQDGAGQPNEPLVVFTSTELVPVVRGCFRYLESDSDELQENIEVYDIQYLFYVLKKEVMDIADLPNEHINKSITDNLFVNDFFEYHSGISCQFHEDNDRGKYCTQSKVARWCYMFSDYMCGDLAIKPLPGKHMPPKQEP" +"7" +">sequence6" +"8" +"KLAEYSMEKTKNDKFSFASQSTSCVFYRSYRLSSSPTLSQEFRKAVRGLPKTYSPENKLKFYRLIDTFGTHYITKVKLGGEVQSVTSIRQCQASLQGLSTEEVQMCLEAEASATIKATVKTELKHCKKDTEKMESKSSFSSLFNDRFTEIKGGQTTEPDLLFSSDKDPSAYKEWLNTLPLIPDIISYSLNSLHELLPTSCPVRKDLRSAIRHY" +"9" +">sequence7" +"10" +"NPFSASIPYKGYFTDLEIKKRKYIVAENTCLHSYATYSLRESIKNINSDFLLDTENLPILSKSITEKTCSKLIYMYNSKNDQCIKFIKPWIDFFRKYGTHVIVSAHFGGKTINTLEVPIHKFEELKIYNYKYPIENNRYLNVFKDRLLLQKILKIEKGEYAYRGGSQDNYMEDEQAEKNNDNLEKKANDVLNKYENSTSNKINLDIKGGTKLNEDWKQLTYEKWRNSIYTNIAPIYLDLFSLSSFMHIEKKESYNNALLYY" +"11" +">sequence8" +"12" +"YSFSASAGYKNALKKLKIQNSIIFMMKIYCLRYYTGISTTTNTWEFTNNFRNALNKLPNTFDGLKEDNECTYEYYITKSHSPQCEKNVNKWMTFFKLHGTHVAHEMYLGGKIIIKVNIEKEEYNKMKETNLDMKTVFDFYFHKMGLSARKNRRIQKFINKMHGSKTVSILGGHPGLNIDDPSFFEKWINSIDKNSMPIRTKLLPFSFFMDDPNMIKAYNDALMFY" +"13" +">sequence9" +"14" +"QFSEKIFPIEIGISSYSLKENKEIASYHKLLYPGKFKNVFARTQMIHGIDARDPRLEQNYSLVCIELIKYIEQFPGLAFFVSKEESLAGDKKCIDEIFLRGNVPIPKQIRFITHIQLFDYWCSIQHIELHEKSSFILNHIFKQLECAERCEYHKKINQKYHCALSDARHTSLMELICMKSYGATIIGSDTLPSVKFV" +"15" +">sequence10" +"16" +"AKYSKSVKKLRRVSGKSYSFVRAKAQLELAQYMLKSNDLMLHPEFLHRLRALPLSYVYGEYRQIFQDYGTHYITEAALGGEFEYTIILNKERLEQSDYTLEDYKKCAQAGLKVGANIYGVYVSAGVHGGSCNGLLNEMGENTARGSMVEDFVSVVRGGTSESITALLSKKLPTPELMRLWGEGVQFNPDFIRRTTQPLYELVTSRDFSQASTLKRNLKRALSEY" +"17" +">sequence13" +"18" +"VLGGSRSDLAKFARSQHSVDKATFAIHEISCTYYSYRLADHPQLSAEFTKYLRRLPQRVQTKQDRGPYRRLIDTYGTHYIHQVQLGGKVRRITAFRTCLATLKGFAETDVKTCLNAELRMSLGFLPANVTLSNKCDNLLKGNMSMGFYQGFMTHKIEVIGGERYFPDILYQQDPSEAYDSWMNSLHDNPDVISYAIFPLHQLVPDSQIAANLRDAITEY" +"19" +">sequence14" +"20" +"YSKNKSVQRLRQYSETKDKTYMRVSGTVQLASFQMRTRGAMLSPTFIEDIKSLPRDYDKAEYFSILEMYGTHYTVSGTVGGKYDLVYVLDSIVMKSLDITTEDVTDCLKLNAGANIGGTENGAKVDVNPNVKTDICNKGGGETETEPRRTQKPVIESIISFVDGGSVEYVTALEEKLNKKEPVADVDDYIQWASSLKDSPTVINSKSNPIISLIPTDIKDAYIKTRNLERAIEEY" +"21" +">sequence15" +"22" +"GSQESEFFHNVTHYKSTDLGFVRLWSKVETAHFKMRSDKLMLHEDFYISLMDLPEQYDFGMYSRFFNTYGTHYVTQGTMGGTLEYALVLNKTKMAESKLQGEQAGRCFSASIGLSYPIGQGASVDLKLGVNPCSKDGTFNQGSDASSVMVEDIITLVKGGILDSTSGVMVVRNPETYRTWGASLKYNPTLIEHEIMPIYELVRFSTAADHVGARLANLRRAWDEY" +"23" +">sequence16" +"24" +"TDICTVPVEICIKPTLLNGTINIECFQTIINQPIPIQHFLNSKHYTDFEHGISQENNPVPQTDFDFLWKKINTFIKSNMSKYSDSSMLPIIICTPFISSVQCVEFLASQAKVSDVRRSIFNTMFSVDDFVECVNRFKEIIPNTNAIYNFYKPLVCWTCNNDFKCDFHKSNGTRTFCCSKTNSEYLASTLCDLYKTIKSKIFVASMPSQV" +"25" +">sequence17" +"26" +"DRRKFHKTVTESRAHRLIILKNKVELAQFQNTAPEYLTLAEGFWRALSSLPTTYDYAAYRQLFQTYGTHYFSEGSLGGEYQALLELTQHALATTSTTSREYERCWRKVKRRFLRKKVKTVCEKLTSSTAASYVTPWSPGTSMRNVPIKVDVVGGNPGLKRFLSILDLENPEENGRKYDDWASSVKDFPQIIEQKVRPLYELVKEVECAGLKKLHMKQALEEY" +"27" +">sequence18" +"28" +"VNYEHKLENKSLNKLLTKNNLSIKKINCSIHTSGMIISYQWKLKKSISILLNDIQNKLVKDSGHTSNSNPQKNQKNIEKDWYNIFNTYGTHVLTKITLGGKIIEINAVEGGQNITENTSIFGSKLDINFFKMSLNSNSKDKLHDLDKNKSEKIIILGGNAMTTDRKTTNNNGEINYDKKLDKQKWVETIKYNPVPIKFELTPLSYFIYQNFSDENLVNSFHYF" +"29" +">sequence19" +"30" +"HGDMPSLCEQRYVPCEIACVRYSLREGILGSFHDFIDPGELPRGFRYHCQSGSASTHQIPISGFELANSDYHNMFRKLCSFVCPTPCPVVPVYTKANDIYRVDWCLQWLANKAGMENHFRVQEVETLIIKFYQDKLQEEPSRPTVSRLLDVVQWDYSSNTRCKWHEDNDMWCCALASCKKIAYCISKALASVYGVTLTPAHLPNPERS" +"31" +">sequence20" +"32" +"VSLSGEYVPAELAIIKYSLNDGVMDSLNVLINPTDLPLGMALDAKTHSSSTHQLPVPPDALGEANYEKILRQILKFFKNTSGSKVVPPIFTWNKDIPMVDSILRGILEATDLDYVKFSILPLIDFFYNLKLATEDYGLDIKTFPSIHLAKALLEKDVYAYTAGIACDVHEQLNNQVACALSRVVRWAYVISDSCCLDVGIEMEKGRHLPHNMTT" +"33" +">sequence21" +"34" +"FSFSASTGYKNFVKSTATNKVRTYITKTYCLRYVGGIVDYHSLDTTDEFKKAVEALPDKFDSHSCTIETFKSNEDDSICAETVLPWMQFIKMFGTHFTTIVHLGGKITHQVQIDKSDVLHMQQNGINVDAAVKASISPVMVDSLQGGFASTSEKASLSQSNNLKYDKQVLVIGGDGLVDSKNANSLNNWAKELYKRPMPIKIKLESIKSLLGKKRELFDEALKFY" +"35" +">sequence23" +"36" +"SHSRSSQFASSHSRKDKFSFTTHNLKCSYYTFRIHSRPPLSKEFEESLKNLPSTYDHKNTSAFTQFLSVYGTHFIRRVRLGGHVNSITAIRTCQASMSQMSVQTVSNCLSVEAQANIKGVTVSAATQFCKTKSSKLKTGATFRQAFSDRSIEVLGGDGDVGDVLFNSNGVAGFKKWLASIKRVPGLVWYQISPLHLLVPDNPVLQETLSKAISHY" +"37" +">sequence25" +"38" +"KAIGNDIYMPAEFAACKFSLRSGRGPVYSSHINPGQLIFGQASDAQHHTSTTHQLPLPPKAMGESNMGSLYVNIVKYLRDCQGAGNPLVVFTTAELMPVVSGCFRYLQSDSDEVGEQIHVYDILYLFYVLKKEVMDIADLPHANINKCITDNFFFNDFFEYYSNIACQFHEDNDRGKYCTHSMVSRWCYTFCDYMCGDLAIKPLAGKHMPPVQEQ" +"39" +">sequence26" +"40" +"ASFSASADFKQMKDTLSQKDTQCIQSHATCTAFDLSFYNDINSLPLLSLQLVDKIQQLYSYSNYTNEKEYYYDFFDSWGTHVATSVRLGSLFGYQFKMSSSSVQQQSSLGFDASVGASLYGVKGKVSTSYAQQQLNSFQQSLKSWSSYSLGATPNANLDAAQWATQTLDTPMPIKTELTPIYTFISQYQNNADIPLNSTTMAYVVNAMQNY" +"41" +">sequence27" +"42" +"FSASATNEFSDSSLRKSENEFSRCQQSFDLWSISIPADIARLQNYVSDDFIKLINAINPESKDSIATVFNVYGSHVLMSGVMGGKAHVSASANKLTLTQKFEMSTIVQAKYEQLTSQLSVEDKLKYSEAFDSFSESGSYTYDILGGSPSLGALVFKNNSQGSSDDNLKNWIQSISSMPVLTKFIDQTSLMPVWLLCEDKTKADALKKY" +"43" +">sequence29" +"44" +"GSFSASTGYKKFINEVSKRTSKTYFIKSNCIKYTIGLPPYVPWEQTTAYMNAVGILPKEFTGLNEDSCAPDVYEQKKMTKQCKNVHQWIQFFKTYGTHIIVEAQLGGKITKIINVSNTAVNQMKKDGVSVKAQIQAQFGFASVGGSTSVSSDNSSKNDNSSYDMSEKLVVIGGNPIKDVTKEENLYEWSKTVSSNPMPIHIKLLPIYKSFDSEELKESYEQAVLYY" +"45" +">sequence30" +"46" +"KYNINRLLCYPAEIAITTFNMKEGIIYSDSKFVEFDERWAFGQDERDHRTMSERVNENEDLDELMHQLSSTIGIDHLSTDHNPESPFGVFEWLRSRIDIYPYAKILVDMNQFRFVYNGLKNIAKYHGFTGQTYFNENIKFNMVSIQDFTDVLLDYCSLLVARRWSDQDINNQYLRPNLVPNRDKNTICEYHETVPCPTRYNCMKAHNSRLVHHFFTIMKAHRLQNFRYSPPVHEPCIEDM" +"47" +">sequence31" +"48" +"NGKFSTENQRMKIHQVKDSSVTTRVQIRNFIYKVKVFPDFSLDVRFAQQAKEIADAIENNQSRYADYLSERMVMDFGTHVITSVDAGASLVEEDYLNSKYVSDNVSQSSSISAQAGLNFFDKLKFDISSHNSQQSSTLQGYQSNIRYSLIQSHGGGIPFYPGMTLQKWQESTRNNLVAIDRSGLPLQYFISPNMLPDLPQPTVRKVSHLVRSAIERY" +"49" +">sequence32" +"50" +"ISLGINHELDQFHQEITQNNKAVSVSQSYWAQYSLTTAPAFLMPLNPMFKQSLDALNRMAKEPTTDTQQTIYNQVINSFGTHYVTSAIMGGAAKIYTTLDQNYLKTVDIEQTKTQIGINFSYNVFQFKFGFNSTDLAQKLDENFKKNSNDIIIFSPEVDHISDPKAWSTWESTVPEKPQPVNTTVSYISDLAYEFPEVQAHLRKTIEFY" +"51" +">sequence33" +"52" +"KAVNTDIYIPAEFSACEFSLKTGVNSLYSTMIDPSQLIFGQTCDAMLYAAATHQLPLPPAALGESKMTKLYHSIQDYLRSRLERTDKNLKSLVVFTKTDDIDMVKSCFRFIKSGYHDEQSKRYDDDNDEENDQFKFFEAAASKFLPIVVYDIQYLFLALKLAAMDIGGLTLPKPNLYITDAFFSRDFYEFQDGIACWFHEDMDRSKYCTQSKVKRWAYTFCDYMCADLAIKMQPGKHMPPSYKA" +"53" +">sequence34" +"54" +"KALTGDIYVPAEFSACRYSLKGGISSNYSTMINPGHIIYGQSRDAQDHSKTTHKLPLPPQAFGETNMGKLYIDIFNWLSVRNEEKLDQDPVIVYTTPELMPVVKSCFRYLASEAEIDEDERKIMVFDIHHLFYTLKKSVLDVAGVTNDRINFHVTNNFFVKDFFEYTEGISCDYHEKIDRSKYCTNSMVKRWGFTFSDYMCADLAIPLQPGKHIPLKVKP" +"55" +">sequence36" +"56" +"NSFTGSLEYKNALMNFKSKRQKIYNKTEQCVRYQVGIPLNLKWGYTEYFNRTLSRLPILSSKVIKNCNIDNKLNLSDEECKSIKPWIKFFEVFGTHFNNQLTLGGKINQTMVFDSSTLEELKKKGIDIEAEVRTELGSGNVKLNLDMGGKKSRLDEIGQKKMSVLGGKMPNFPMDDNEFAHWAETVAENPMPIGVVSTSLKTLMHPAMHQSYDQALHQY" +"57" +">sequence37" +"58" +"VNGKFSTEFQRMKTLQVKDQAVTTRVQVRNRIYTVKTTPTSELSLGFTKALMDICDQLEKNQTKMATYLAELLILNYGTHVITSVDAGAALVQEDHVRSSFLLDNQNSQNTVTASAGIAFLNIVNFKVETDYISQTSLTKDYLSNRTNSRVQSFGGVPFYPGITLETWQKGITNHLVAIDRAGLPLHFFIKPDKLPGLPGPLVKKLSKTVETAVRHY" +"59" +">sequence39" +"60" +"SSHNSAFKQAIQASHKKDSSFIRIHKVIKVLNFTMKTKDLQLSDVFLKALNHLPLEYNAALYSRIFDDFGTHYFTSGSLGGVYDLLYQFSNEELKNSGLTQEEAKNCIRIETKKRYFIVTKTKVEHRCTTNRMSEKYEGSFLQGSEKSISLVKGGRSEYAAALAWEKGSSGPGEKTYSEWLESVKENPAVIDFELAPITDLVRNIPCAVTRRNNLRRAFREY" +"61" +">sequence41" +"62" +"YSGYNNDEYTHDDMLHNLNKHNKLLIKSYKCIVYKANLTSLNFLKNKNNDEIGLNFNGMLILNVLKKLNKNCNSEFDNQKCPISMFRNDPFDANCIRCIMPWMEFFKDYGTFMTKEITMGGVINKFYNIKKYEGSMRKEYKKKTIKQSSTFFHLSKSRSESLNEKKSGETNKEELEELYTLTIGPEPPGNVSNSKVISDWLEKVVHNPTPIDLELVPIKQIIPEKYLKIYENALKYY" +"63" +">sequence43" +"64" +"SGSRESAFLNKLSKYNEKKYSFIRIFTKVQTASFKMRRDNIMLDEVMLQSLMELPEQYNYGMYAKFIDDYGTHYITSGSMGGVYEYILVLNKENMTKSGVTSDDVTSCFGGSFGIDYDYTDNLQITGSLSGKHCKKLGGGHREDEESNMAVEDIISRVRGGSSGWGGGLTQNGSIITYRAWGRSLKYNPAVIDFEMKPIYEILRHTNLGPLEAKCQNLRRALDQY" +"65" +">sequence44" +"66" +"KALTTDVYVPAEFSASEYSFNEGIMSVYSTLIDPGQIIFGQGSDAQHHSSTTHNLPLPPNALGEKNMGKLYRNILEYLSKIQEGKDATKPFVVFTKTDMVPVVKSCFRYLACENQDGSYENGDQIQVLDIQYLLFILKKEVLDIAGVSDEKINLYVTDAYFLKDFFEFTPEISCQYHEENDRSKYCTQSLVMRWAYTFSDYMCSDLAISVQPGKHIPPKTKP" +"67" +">sequence45" +"68" +"SVAGSHSKVANFAAEKTYQDQYNFNSDTVECRMYSFRLVQKPPLHLDFKKALRALPRNFNSSTEHAYHRLISSYGTHFITAVDLGGRISVLTALRTCQLTLNGLTADEVGDCLNVEAQVSIGAQASVSSEYKACEEKKKQHKMATSFHQTYRERHVEVLGGPLDSTHDLLFGNQATPEQFSTWTASLPSNPGLVDYSLEPLHTLLEEQNPKREALRQAISHY" +"69" +">sequence47" +"70" +"KALTTDVYVPAEFAACEYSLKEGIRSIYSTMIDPGQIIFGQGSDALLHSSTTHDLPLPPNALGEKNMTKLYRNIVDYLSKCQGKGKTLVVFTPAENITMVKSCFRYLECDDDFRDGGEKIQVFDIQYLLFILKKEVMNVADLNDEKINKFATDAFFKKDFFEFTAGIACQYHEDNDRTKYCTQSMVTRWAYTFTDFMCGDLAITVQPGKHIPAQTK" +"71" +">sequence48" +"72" +"AKFSLSTNYSEISDLLKNNDNKLYVDKSYCFLLEAALPIHNSLKMTRSFATAMSKLTRDFKKHTKDCNAIKYSINKNNKDCKEIKNWMELFDQFGTHFSYNIKLGGRITFITQEEGSKDERGNEKSVDVGVGGKFEKDNKGVGIEGNVKFVFGNKRGESKNLSFKYTNILGGLPVSDISKESEYVKWIKSVYKYPMPIRTQFAPISKIFKSKALKDSYDEAFRFY" +"73" +">sequence49" +"74" +"GSFSASVGYASASNTISKKKFRMFILKSYCFKYVASLSQYSQWKLSDQFLRAINLLPSYFNSLEHDGKYCNAEELRDNKTGMDSCGKSVESWLYFFKNFGTHVSTVIHLGGKITQQVKISKNEYKSLSESGLSTSVSASVGFGLFKANASSSTDSKESSNEESSNSSIEKETVIIGGTTIYDPNDPSNFEKWADSIKNNPMPIKGQYEPLSRILPERLTKIYDEALSFY" +"75" +">sequence50" +"76" +"NIDGECMLAEMAMNEFSLFSGIVEKFHAIVGPWMPESESHRRRASRHALETHRIPLQNNFATITKKRLVEEILGRVEPSIACHQGVKVGLYSDACNEKTKIDLNIKNNFKDPGMLCDKNDRRFILVLQSELDLMVDSMKHLANNVGFHYDGFPVTPNCFVIVEAFVEAISDIMNEKIDVETMRWFSLLGQKVDAEDSVSPWETGTDFHCARHSEPKSNFCASVTVGRTCCIVYHVIGSFFRRYHLKKIPTAHQPSSSNS" +"77" +">sequence51" +"78" +"KALNGDVYQPAELSACRFSLKGGISSNYSTMINPGHIIFGQTSDAQDHSRTTHKLPLPPNAMGEKNLGNLYSDTLKWLSASNDEEDEQYDHPVIVYTTPELMPVVKSCFRYLACEGDTDKHAKKIIVYDICYLFLTLKKTVLDLVGVPSDHMNIHVTNSFFRRDFFEFSSGIACDYHEEVDRTKYCTKSMVLRWGYMISHYICGDLAIPLQPRKHVPIEVKH" +"79" +">sequence52" +"80" +"RTNTGVHLPAELAVVRYSLEGGVKDKLHMFINPGRLPIGMAYDAQRHAEEDHQLPLPPNAMGVSDYGDVAMRLFSFLLQNDDMPLLFTDETDVPRVESMLEHILSDHLSEIELRICPLAELFFRLKQNVELYMMDQTTFPSVYIAQQIITKDVYDYTKGISCDYHEEKDNVLYCPLSRCIRWAYIISDNCCQDMGIEPIPGKHVPLNANT" +"81" +">sequence53" +"82" +"FSGSLTCEFVKKSTQHAKNTVTCSTAAHSLYTLKEDDSSNPSEKRLDSCFRNWIENKLSANSPDSWSAFIQKFGTHYIASATFGGIGFQVLKLSFEQVEDLHSKKISLETAAANSLLKGSVSSSTESGYSSYSSTSSSHTVFLGGTVLPSVHDERLDFKDWSESVHLEPVPIQVSLQPITNLLVPLHFPNIGAAELSNKRESLQQAIRVY" +"83" +">sequence54" +"84" +"WAFTASSEFNHMQQKIEQTSATFVISMATCQIAQITQVPELAEFHQSFIDQLSALPVEYSAPQYLEFLSNFGTHYATDIILGSKVGYVYTLPPAIVDDFDQKKFKEIDLKQAATITSALLKGVIGQQILPKEQEAKAYSDVSKLSTQSFTIEIGPQSTENTPKDWLRETELEPTPIRYTLKSISELVSEGKGQLSSVKEYQKIGQNLKKALTDY" +"85" +">sequence56" +"86" +"VTSGGTYIPAEMGLVRYSLKDGVMDKLHMFIDPGKLPLGMAYDAKQHSESDHQLPIPPDAKGEKDNDEIILKLFSFLSQQEKMPPLFTETNDIRMVENILKGILNQGSMDENTLLVCPLSELFYQLKRATESFGLDIKTFPSVHIAQAIIQKDVYEYTKDISCEFHEDQGNGKYCPLSRCVRWAYIISDSCCLDLSIEMKPGRHLPMNADT" +"87" +">sequence57" +"88" +"KSMTNDIYIPAEFSACQFSLKSGICSMYSSHIDPGQLIFGQGSETMHHTKHTHQLPLPPNAMGESDIGRLYANIVEYLRACNPDAKPNDPLVVFATPEFMPIVKGCFRYLESDSEEPLATIHIYDIQYLLYVLKLEVLDSVDIRNVTVNRTATDSLFINDYFCYHLGISCQYHEDIDRCQYCTQSIISRWCYVFSDFMCGDLAITPLPGKHMPPKQEP" +"89" +">sequence58" +"90" +"SSSRSYTSHTNEIHKGKSYQLLVVENTVEVAQFINNNPEFLQLAEPFWKELSHLPSLYDYSAYRRLIDQYGTHYLQSGSLGGEYRVLFYVDSEKLKQNDFNSVEEKKCKSSGWHFVVKFSSHGCKELENALKAASGTQNNVLRGEPFIRGGGAGFISGLSYLELDNPAGNKRRYSAWAESVTNLPQVIKQKLTPLYELVKEVPCASVKKLYLKWALEEY" +"91" +">sequence60" +"92" +"KDACTPAELAVVQFTLKHGMRNIYHTLINPDGSQYATQEHVRATHQYPNALGNDDLEGILADLLEFVRLECGPEAELSPMFTLESQISVVNNALEFLNGGVASQLKVHPIEYLFYVLKKATCAAGILPPPASFHITNAQFNLDPHEFLSDIGCEFHKQRDLTAHCAKSYVTRWAFAFADYMCSDLAIKMLPNRHMPNRLDT" +"93" +">sequence63" +"94" +"IDLANEPLYREAVKASQQKDSVFYRVHQVIATSTFKVKSSDLYLSDPFLQFLNSLPLEYNYALYRHIFQLFGTHYFSSGTLGGKYDLLFQFDREELKTFGLKESDSEYCLSDDDTLVTFFYNRHKQRNTCGNISMKTKYEGSMVKASERCITSVQGGRTEFAAALAWEKKGVSPQSTVYTDWIKSTIENPVVINYELLPLVNLVRGISCAVTKRRHFHRALEEY" +"95" +">sequence65" +"96" +"HPFNDSNYYKMLVKRINRGDSIIIEKKLCSKYFSFINDINKNDLDTFFLTTLNELGDNYQNIKDDTYKCSLQYYKMNNMNKYSENCLKTITPWISFFNMYGTHVISGVYYGGKIIHNLYFENNNLKKKEYKIRMYKSRLNPFSTINSNLYFGSSLSKEKIIYIRERNLIMDGGVHINPYNINEVNMENKKKNIYVNNVEKNLYDQKKKYRNYYNFYELKDDVRKRNYYNSWKDTIEWEQAKPVKLNLVPLSEFINSEEGKSAYYMALEFY" +"97" +">sequence67" +"98" +"CEQRFLPCEIGCVKYSLQEGIMADFHSFINPGEIPRGFRFHCQAASDSSHKIPISNFERGHNQATVLQNLYRFIHPNPGNWPPIYCKSDDRTRVNWCLKHMAKASEIRQDLQLLTVEDLVVGIYQQKFLKEPSKTWIRSLLDVAMWDYSSNTRCKWHEENDILFCALAVCKKIAYCISNSLATLFGIQLTEAHVPLQ" +"99" +">sequence68" +"100" +"GKFSEENTRMKIHQVRGNSVTTRVQVRNHLYTVNAYPDFTLDSRFSQQISELADAIENNQTRQAMYLSEKVILEYGTHVITSIDAGATLVQEDYIKRSYVSDTNSERSSVSASAGINFFNMVNFNFGSKETEQTSETLTYQQNITYSLVQSHGGALFYQGITMQKWQESTQNHLVAIDRSGLPIHYFLNPAVFPDLPVPTLHKLAFSVQKAAERY" +"101" +">sequence70" +"102" +"GMFNNMFAFSKCWPKDASSVKTLAYDGWFISLYSVEIVRKQLTLRDEVKREVPSSWDSAALAGFIEKYGTHVVVGVTMGGKDVIHVKQMRKSNHEPEEIQKMLKHWGDERFCVDPVESKSPASVYSGKPKEENLLQWGLQPFGTSVSSAVVMHTKNEEIMRVCIRRGGVDLGQSHERWLSTVSQAPNVISMCFVPITSLLSGLPGTGFLSHAVNLY" +"103" +">sequence71" +"104" +"GNFNATFGFQSGSWATDAANVKSLGLDASVVTLFNLHIHNPNRLRLTDRVRNAVPSSWDPQLLARFIERYGTHVITGVSVGGQDVVVVRQDKSSDLDNDLLRHHLYDLGDQLFTGSCLLSTRRLNKAYHHSHSQPKFPEAFNVFDDKQTVAFNNFSINSQNGITVICAKRGGDGRAKSHSEWLITVPDKPDAINFNFIPITSLLKDVPGSGLLSHAMSLY" +"105" +">sequence72" +"106" +"KPFSASMPYKSYFADLEIKKKKYALAQNMCVLNYATYDLKESGNNINKDFVLDIEKLPILTKNQMKLCTKVLYMNNNLHCSEGIKSWMKFFEKYGTHVVLSAHFGGMSFNTMEITKRKIEEIKIYKYKYSLWNNPYLNIFKSGSLFQDLSINVDGHKENKKNNSNNNINIDEKKKNDAYIKNDVLIEQYRDNINLEIRGGNNFDEKWRNLTYLVWKNSIYSNIVPIHLDLYSLNTFMPIEKKESYDMALLFY" +"107" +">sequence73" +"108" +"EFSAEFMFLNNISKYTNKEMGFVQLMSKIQTSQFKMRSKDLVLDEDMLWALSDLPDHYHFGAYSQFFNEYGTHYVTEGTMGGLMDYVAVVNINEMEENQMTGQMIGSCIGGSFGLVFMEKIKATVKGKSCGKFTSNEKTSDESHSAIKDVFGFVKGGNTASSAGSLGIKDAKSYKDWGKSLKYNPALIEFEILPIYELLRLSTAAEQLSSKLPHVKMAWEEY" +"109" +">sequence74" +"110" +"ELPDDMGYMPCEIGVVEYSLQEGITREFHRFIQPGKPPLGYRYLCQSTSDNTHQIPIEGFELAEGDYHRLWTDLCKFTSPNGRDFPPLYVQVTHTSMCEWCLDWLSEMAGEYNRFHVYELDSLVKDLYEHGEGHAPSLSMIASILNTSVFDYEDGSSCEYHASKEVKYCALGAVKRFCFSISDSMAQVYDLQLTARHLPERPEN" +"111" +">sequence75" +"112" +"FKFSASAKFKKLQDVSKSGKSKMFINKSYCFKYVAGISTSLKWDFTLGFQSSLGRLSDFKGLEKDSICKPFIYREDPKNENCQELGISDWMELFNTFGTHVATKIYLGGKIFTTLEIKKSQEKKLSDQGLDVRAILSAKIKDTDIDSNVEVSTIKSKNAGDFLLDTKKSTFVLGGDIYGHGKTIEFAEWARSVADHAMPIKAEFTPISHFIDKNLRDAYNKAYLYY" +"113" +">sequence76" +"114" +"HPFNSSNYYRMLVERIEKGYSIIIDKKICSRYFVALKNVDSSKLDPFFINMLNDLEKNYKNININKYKCSVHSYKKNKYDQNCLRTITPWITFFNLYGTHLVSEVYYGGKIINILYSEYYNNIYNSEQVQIYKKRLNPFTSGSKLGSFYFGSIISKKQNSTNQKDNDNMLTYIKEKNTIYDGGEDIKEYKDGEGKVLMINGMEDEWEKTINGKYAKPIKLILKPFSDFIKTNDGKVAYYKALEYY" +"115" +">sequence77" +"116" +"GRFRASVDYQNMQNDMASGTYQYIVSNSRCSVFQLDLIDSPTYHPQFSNDILLNLQQLALNQNNANNTEANAYYDFFDNWGTHVVTSVDLGSLFGYKFKMLKTDVQSMQNQGIDVSASATLFNVRGRTNTQLEQNSLNSFSQSIQSWTSYSIGATPDLNNDPANWATQTLTKPMPIKSSITPYHEALKIFTQGGNNILSSTQILQLYSKLRMY" +"117" +">sequence79" +"118" +"FLGEIENRFDMSDDKSSKRTNEYISYDINNTLYRITLKGNVPLSEQFQEDLNSLDATTLFEKYGTHYLKSTWIGGRISFSTTIDTYGMTDDMRKKFAFVTKRKVGNWTGTSDVELTREEKDISEKMKSNSIVRVWGGDPKLGRDIERAIQGHTVSDIYQQWGQTVEERPYISDFDHGQGLVPIYELATGTRKEQLKEQWEAY" +"119" +">sequence82" +"120" +"GKFSASSDYQEVQDGLNSANIQYIESQARCSIFQLDVYNSPSQNAQLTPQLQQALFTLAFNQTSQNDYYDFIDTWGTHVVTSVNLGSRFGYKYQMDKYQSNQLTQQGVNLSVSASYFSSSGSASGAYNQTQIQNFTQAMTSWSSYSIGATPDANQDPLSWAQQTLDTPMPINISILSFDDFLNKFSFSVNGLTSSQLNTVISNLSQY" +"121" +">sequence83" +"122" +"SQSRMTHEVIESAQKIDSKYFKVVNTVELAQFKMRRNGLNPSDIFLRRMKDLPVYYNYLDYSFLIEDFGTHYFSSGSLGGQYEYVYRYSRADLSHSGLTEEEQKSCLSAEAKASFFSFSGSSSGSRCKENALSQRNSGSFTLSASESFSHVKGGSSESAGQLAFANGPNPQKYEAWIQDVKRNPAIISYEITPISELLVGIPYADIKRRNMEKALVEY" +"123" +">sequence85" +"124" +"YYPTEDKYFICEIAIAAVSLKNGVEDVFHRIVKPGKLPLGYYGGALTHSKETHQMLELVQDEPYENNTREVFNEMTSFLKLWRGKGSDSIVYADEKTHEMITKVIDNFCQEFNYPDEIKVYNFQYLFFALRNSVAARTVWPTETYSSTELEKDLYSYTPDISCEFHEMSDISVYCSKSIVTRYCYTLCDHCCTDLNIQLVAGFHVPKNSRI" +"125" +">sequence86" +"126" +"ISAEVKSKFSKESLDVKVGKEVYLTSSVSVPRLEFCINPLKVKLSDEFYSKLNNVETHGELIKVFKEYGEFYPKRYILGGMITNHETQKFTTIENLESKLLSLSAGVNAAIGPVKVGGSVGGESATDEKKSKQNEENSSKKDVIGGDPSKTGSEWVSSLSDINNWGIIGIDVYPIMDLIKKNDNTLYKKLEKIKNS" +"127" +">sequence87" +"128" +"KTSTEAFVPAEIALIKYNLELGVLDKLHELINPVRLPLGLAHEALTYSEQTHELPTPPNAMGETDFYTVLQKILSFTDYNSKPHKKLAIMTDAKEVPVIESLLSQLNDDVKLEYQFLVIPLGEFFFHLKRATEKYGLDICTFPTKTVADILLKKDAYEYTSGIACDFHEKLGNQRFCALSKVVRWSYIISDNCCLDLSIDLIAGRHLPSNADT" +"129" +">sequence89" +"130" +"GFFNAMFEFTGCWQKDASITKSLAFDGWCITLYTVALSKAHIILKDHVKQAVPSTWEPAALARFIKKFGTHIVVGVKMGGKDVIYLKQQHSSSLQAVDVQKRLKEMSDQRFLDANGHSDISLADSYAKDNKVEAREQRLRFVESNPLNSYSSNEELVMMPKRRGGRDKDIISHSEWLNTVQAEPDVISMSFIPITSLLNGVPGCGFLNHAINLY" +"131" +">sequence90" +"132" +"SFSASTGYRDFAKEVSKKDTRTYMLKNYCMRYEAGVAQSNHLKWNVTLAFAAGVSQLPDVFDAHNPECACSAEQWRQDQNAEACTKTNVPIWISFIEQFGTHFLVRLFAGGKMTYQVTAKRSEVEKMRNMGIDVKTQLKMQLGGVSGGAGQGTSSKKNQSSSEYQMNVQKETLVIGGRPPGNVSDPAALAAWADTVEELPMPVKFEVQPLYHLLPVEKQEAFKQAVTFY" diff --git a/motif1.fasta b/motif1.fasta new file mode 100755 index 0000000..5aaa491 --- /dev/null +++ b/motif1.fasta @@ -0,0 +1,45 @@ +"V1" +"1" +">sequence2" +"2" +"MSLKPFTYPFPETRFLHSGSSVYKFKIRYGDSIRGEDIENKEVIVQELEDSIRVVLGNLDSLQPFATEHFVVFPYKSKWERVSHLKFKHGEIVLIPYPFVLTLYVAKKPRVZEDELKWFNENLSTGKPIDDSPLGLVPAERKAARAMKKKRKRMELSVSPSRPGLDRAKMRTSSQGPSKKKFLMETSRNMERNTQQKCQETPAFDGTDVQEQGSRWEDNLAGEITPPVQQSNPPPPAGPTDLGTSGFFGF" +"3" +">sequence11" +"4" +"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRAKIGISSQSPSKKKPLMETRRNREGKTHQEWQETPAFNITDVQEQDSKSEDSPAGQIIPPLQQNNPLPPKGPTELATGGFFGF" +"5" +">sequence12" +"6" +"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIQDKEVIVQELEDSIRVVLGNLDNLQPFATEHFVVFPYKSRWERVAHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWIHEDLSPGKPVNDCPLGLVLPERTAAGAMLRKRKRGQVPSSPGRPGLDRTGKEKPSRNGRRLQRLISPMSRTRVGSGNREGCQGRLSHQCRRTIHLHLKDPQSWEPVASLGF" +"7" +">sequence35" +"8" +"MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSVRAEEITDKEVIIQELEDSIRAVLGNLDNLQPFITEHFIVFPYKSKWERVSHLKFKHEEVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLPPGKPINDSPLGSAVAEKKAAGDAGKKRKLVEEHGSPRGTALPRSVAEGKAESQSTEATLKKDQNRKKTQQETWKTVTSDTTDVQTQDSKRGHNLPGAMVPALQQSSSPPPQEPGTRSFFGF" +"9" +">sequence38" +"10" +"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRTGKEKPIRNGKRPQHLISLMSRNRILSQRTAQQGRSFPHCSKTIHFHLKDPQSWQLVASLGF" +"11" +">sequence46" +"12" +"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIENKEVIIQELEDSIRVVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEIILIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKPISDSPLGLVPAEKKAVGAVMRKRKHMDEPSSPSRPGLDRTGKEKPNKDCRRLWPLISLVSRNKILSGGTACQGQLSHPCSTTHLHLRSEQPAASLGF" +"13" +">sequence59" +"14" +"MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSIRTVATLLKSFQIYLFQDSIRAVLENLDNLQPFVTEHFIVFPYKSKWERVSHLKFKHEDVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGRPINDSPLGLVVVEKKAAGASKKQKRKLVEQHSSPGGARQPRDKMRSSSQRPSTKKPPMGTRRNRERKPQQERQKTVASDTTDVQEQHSKWGHNLPGAIVPPLQQNNSPPPKELGIRSFFGF" +"15" +">sequence64" +"16" +"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIEDKEVIVQELEDSIRAVLANMDSLQPFVTEHFIVFPYKSKWERVSHLKFKHGESILTPYPFVFTLYIAKKPRVZEDEMKWFAEDLPSGKPADDIPLELVLAETEAEEATMRKWKRKLMEEPSSPSRQGPHRAKMETSSEASSNKKPLKESKRSTDEEAQQEYQDTPASNAIAVKEQDAALGHGLQGLVVPPLQHSSPPPPKEPGARGFLGF" +"17" +">sequence66" +"18" +"MSLKAFTYPLPETRFLHAGSSVYKFKIRYGSSVRGEEIEDKKIVSQELEDSIRAVLGNLDNLQPFTTDHFVIFPYKSKWERVSHLRFKHGAALLEPYPFVCTLYVAKKPRVZEDEMKWAPAGGNGGPTNSAPLHLHKTQKEQDRPGTETSRKKEPPAPPSRGGERRTSLEQSWKELADSPELLLQLTRNWTGESASEKGEAEDSDISFLKDHGSGSSLRHQQKSPPKPSSPPSEGPPKQKHAGFLGF" +"19" +">sequence78" +"20" +"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVRELEDSIRVVLGNLDNLQPFTTEHFIIFPYKSKWERVSHLKFKHGEVVLVPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKLINDSPLGLVSAEKKSAEAMMRKRRHTEVPSSPRKSGRFFPHLRAKVETSSEAPSKKKPPMETRRTWNDNEQQETPAFDATDVQEQGPKWGDSLAGQMAPSLQWNNPPPPKGPKELGTTGFFGF" +"21" +">sequence81" +"22" +"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVQELEDSIRAVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEAVLVPYPFVFTLYVAKKPRVZEDEMKCFHENLSPGKSMNSSPLGLVLAERKTAEAVLKKRKRGEVPSSPARPGLDRAEMGTSSQGLSKKKPPMETRRNRERKTQQECQKTPAFDVTDVQDQDSKWEDSLVGKTIPPSQQNNPPPAEGPTELGTSGFFGF" diff --git a/motif2.fasta b/motif2.fasta new file mode 100755 index 0000000..dc57988 --- /dev/null +++ b/motif2.fasta @@ -0,0 +1,53 @@ +"V1" +"1" +">sequence1" +"2" +"DEFIALMHGSDPVRVELTRLENELRDKERELGEAQTEIRALRLSERAREKAVEELTDELEKMFEKLKLTESLLDSKNLEVKKINDEKKAAMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKQKEAALLDAERTVEIAMAKAAMVDDLQNKNQELMKQIEICHEENKILDKLQRQKVAEVKKLSLTVKELEEAVLRGGATANVVRDYQRQVQEVNDQKKTLECELAAQWWRNYGGARAKVTANRVAVVVANEWKDSNDKVMPVKQWLEERRFLQGEMQQLRDKLAVAERTARSEAQLKEKYQLRLKVLEDGLRGPPSGSSRLPTEGKSFSNGPSRRLSLGGADNMSKLSPNGLLARRSPSFHSRSSLSSSSSLVLKHAKGTSKSFDGGTRSLDRSKINGNGAHLLNRSTDAVRDCETNDSWKGNADEGTIENTNSNTDESNKETANNKSAEMVSGFLYDMLQKEVISLRKACHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQEVKARRLGSSKGTGSSQV" +"3" +">sequence22" +"4" +"LAHSDPIVLEFNRLQNQLKEKDRELGVASSEIKALRATIVLKDKALEQFRNEVNKLDERLGVIENLLKQKDLEIKKLTSEKKDALAAQFAAEAALRRVHANQKDDDTVPIEDVIAPLEADIKMYKIEIGRLQEDNKALERHIKSKESALLEAERILRSALERALIVEEVQNQNFELKRQIEICQEENKILDKTNRQKVLEVEKLSQTIQELEEAILAGGVAANAIRDYRRQISELNEEKRTLERELAAQWWRNYGGARVKVSANRVATVVANEWKDENDKVMPIKQWLEERRLLQAETQRLRDKLAISERTAKAEAQLKDKLKLRLKTLEEGLKQVSSFSENPYLSCRSPKPEKSNHILGFLSGNGGLKRRSTSQPRGSCIGKTSPLMPPNVENGAADAAGELKGVNSLKKKYASGENMLRKSLWASRSKVADIGGKENTEMKSNTDMHIDKFNNDTAVSADAKIKGGAKEETQNVGSAGFDSEDMVSAFLYDRLQREVINLRKSCEVKNNTLTAKDDEIKMLMRKVDALSKAIEVESKKIKREAAAREKEAISTKADENKKIRNTDSSKRRVA" +"5" +">sequence24" +"6" +"SSDPIVLELNRLENHLKDNDRELGIAHAEIKALKVTERLKEKAVEELNDDLKKLDEKLRFTENLLEDKNLEIKKLVSERRDALAAQFAAEATLRRVHANQKDEDYIPLDAVLAPMESEIRMCRNEISVLQEDKKALERLTKSKELALLETERMLKIAIERALLVEDLQNQNLELKRQIDICQEENRILDKANRQKVAEVEKLSQTIHELEESILAGGAAANAVRDYQRQILEMNEERRTLERELAAQWWRNYGGARVKILANRVATVVANEWKDDNDKVMPVKQWLEERKVLQGEIQRLRDKLNVSERTAKAESQLKDKFKLRLKTLEEGLKQVTTSSPNTEGSHLKQTVKPEPVLGYLSSNMGPRKRSQSQPRASFNAEQSTVQQRPNVTSENSNSNRTLEHVNSLKYKYISGKNLVKKNLWAPRNKLVDDVGKENSERKEDVGLEEFASVGPEVSKDFSAEAHSMQSTPEKDDLNVDCEDIVSGFLYDKLQKEVLNLRKSSQEKDGLLTAKDEEIKMLVKKIDTLTKAMETELKKMRRESASKERELTPRRVQKDPLHKSSTMIISKRAVKSV" +"7" +">sequence28" +"8" +"EDIIHLLHGSDPIKVELNRLENEVRDKDRELCEAHAEIKALRQTERLKEKAVEELFDEREKLQEKLKAMEIALENKNLDLKRTNDERKSALAAQAAAEATLRRLHASQKDEDLLPLEAILAPVEAELKSTRNDFLKLQDDNKALDRLTKSKEAALLEAERAVQIAEAKASLVDDLQNRNQELLKQIEICQEENKILDKMHRQKVAEVEKLSTTVAELEEALLAGGAAANAARDFERQVHHLMEEKRTLERELAAQWWRNYGGARAKVTANRVAVVVANEWKDANDKVMPVKQWLEERRFMQGEMQQLRDKLATTERTARSEAQLKEKLQVRLKVLEEGLRTSTNGSTRKHDDFLRSGTNGASVRRQSTGGSDIGNGVARRRPSMSSASQMRGSVSGSTILKNGKFGSKAFDGSKSLDAGRFKAYANGCEEPRKVSSAASGAGGGGGGGGGGGDVKPEAGKVEGATVAAADDNVSVLFYDMLQKEVVTLRKLGHEKDQSLKDKDDAIEMLSKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQENRARRLSIAKGSVNSSHA" +"9" +">sequence40" +"10" +"EDLLNLLHGSDPVKVELNRLENEVRDKDRKLAEATAECKVLKQRERLREKAVEELAEELDKVDEKLKAAEDLLESKNLELKKLNDEKKAALAAQFAAEATLRRVHAAQKDEQLPSIEEILSPLEAELKIARQEIAKLQDTNRALDRLTKSKEAALLEIERAIDAAEAKASQVDDLLNRNQELMKQIEICQEENKIMDKMHRQKTAEIEKLSSTVAELEEAVLAGGAAVNAARDYQRQAHELLEGKKTLERELAAQWWRNYGGARAKITANRVAVVVANEWKDANDKVMPVKQWLDERRFMQGEMQQLRDKLASAERTAKNESQLKDKFQMRLKVLEESLKPVTNGAPRRTEEVRSSSTTRRSTSGSEEASKLLANGSRRQRSAVTQVRASMASQTLMRATNGRMTSKSFDGGRSLDAGTTRLRAFSNGFEEVPVKPDSVEAKSEVEAVKSENGTTNQVSGSSSSVEDPVSGVLYDLLQKEVVNLRKASYEKDQSLKDKDDAIEMLSKKVDTLSKALEVEGKKMRREVQAMEKEVATLRAEKDQTRNPRRLSSGTGTVNSSSK" +"11" +">sequence42" +"12" +"NEFITLLHGSDPVKVELNRLENEVRDKDRELGEAQAEIKALRLSERLREKAVEELTDELSKVEEKLKLTESLLESKNLEIKKINDEKKASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNKALDRLTKSKEAALLEAERTVQVALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNEERKTLDRELAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERRFLQGEMQQLRDKLAITERAAKSEAQLKEKYHLRLKVLEESLRGSSSNTRSMPEGRSTSNGPSRRQSLGGADNFSKFTSNGFLSKRTPTSQLRSSLSSNSVLKHAKGTSKSFDGGTRSLDRGSRALLNGSSPNCSFNQPCDETKDTEAANMWKGNSDEKPVEFPVTETEDTVPGVLYDLLQKEVVALRKAGHEKDQSLKDKDDAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEHENRAKRFGSSKGPVGAAQL" +"13" +">sequence55" +"14" +"DLMNHFNGSDPVRLELTRLENEVRDKSRVLAEAQAEIKSLRLSDRQKQKAVDELSDKLEKVDEKLKGTLILLDNKNLEMKKLNDERKAALAAQTAAEATLRRVHASQKDNDMPSLEVILAPLEAELKIARDSAVVVVTLQISKLQETNRALDRLTKSKEAALIESERVIKAAEAKASMVDDLQNRNQELLKQIEICQEENKILDKMHRSKVNEVEKLSATVRDLEEAVLAGGAAVNAARDYQRQVHELMEIKRTLERELAAQWWRNYGGARAKISANRVATVVANDWKDESEKVMPVKQWLEERRFLQGEMQQLREKLASAERTCKSEAQLKEKVQLRLKVLEEGLKSGNGTVRRGAGAGGTVEAKRSSSVTSNGSVRKGSGSEEGAKVLANGSRARRSAVSQLRAMGGPLVKNGRLTSKSFDGGGGGRSSSGGSYDAGGMAALKPFTNGFEELRAGIKTESRSCSGEAAGDAGEGAGDTVSGVLYDMLQKEVISLRRASQEKDQSLKDKDNAIEMLSKKVDTLGKAMEVEAKKMRREVTVMEKEVASMRVDKDQERRMRRLSMMKEPVNSSQR" +"15" +">sequence61" +"16" +"EDVINLLHGSDPVRVELNRLENEVRDKDRELGDAHAEIKALKYSERLKEKAVEELTDELQKVDGKLKATEALLESKNLEIKKINDERKAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARLEAAKLQDDNRALDRLTKSKETALLEAERTVEIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVKKLTQTVCELEEAVLAGGAAANAVRDYRRKVQEMNDERKILDRELAAQWWRNYGGSRAKVTANRVAVVVANEWKDANDKVMPVKQWLEERKFFQGEMQLLRDKLAVAERTAKAEAQLKEKYQLRFKVLEERLRASPSGNLRTTSEGRSISNGPSRRQSLGGAENLSRSASNGFALRRTANSQSGSIRSNSASVLLRNAKISSRSFDGGSRSLDRDKVIPNAARKHEVLTDTNDQIQNAKTIGTHEASTNGNRSEKTKSELDDSVSGVLYDMLQKEVITLRRACHEKDQSLKDKDDAIEMLAKKVDTLNKAMGVEAKKMRREVAAMEKEVAAMRVSKEHDPRARRPSAPRGSQ" +"17" +">sequence62" +"18" +"DDFISLFHGSDPVRVELTRLQNELREKDRELGDALAEIKSLRNSERLKEKGVEELTDELIKVDEKLKAAEALLESKNLEIKKINEEKRAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARMEVAKLQDDNRALGRLTKSKEAALLEAERTVQIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMLRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDERKILEREVAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERKFFQGEMQQLRDKLAIAERTAKAEAQMKEKYQLRFKVLEERVKTSNGNSKFTVSDGRNIATGPSRRQSFGGAESLSASSSNGYQSRKTSISRPGSLRSNSANVLLKHAKLSSRSFDGGSRNLERERPTSDANGLDNMPRNSNIQTITSETITTHEESANGTPVKKSKSENEDYVSGMLYDMLQKEVISLRKACHEKDLTLKDKDDAIEMLAKKVDTLSKAMEVEARKMRREVASMEKEVAAMRISKEHDHRARRASAPRGAVNSQSI" +"19" +">sequence69" +"20" +"EEFINMLHGSDPVRVELCRLENEVRDRDRELSEAQAEIKALRLSERAREKAVEELTEEVNKMDEKLKLTESLLENRNLEIKKINDEKKAALAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKSKEAALLEAERTVQIALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDEMKTLDRELAAQWWRNYGGSRAKVSANRVAVVVANEWKDSNDKVMPVKQWLEERRFMQGEMQQLRDKLVIAERTARSEAQLKEKFQLRLKVLEDGSRMSASGTYRTTIEGKSVSNGPSRRQSLGGADNVPKSVNGFLSKRPSFQMRSSVSSSTVLKHAKGASKSFDGGTRSLDRSKVLLTGAGLSLNRSSDATGDGVTHESWKKIPDEKTNDFPNVDSDDCVSGLLYDMLQKEVITLRKACHEKDQSLKDKDDAIEMLAKKVDMLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEQDNKSKRLGGSKGLANSSQL" +"21" +">sequence80" +"22" +"DDFINLLHGSDPVKVELNRLENEVKDKDRELGEAQAEIKALKLSERLREKAVEELTDELQKVDEKLKAAGALIESKNLEIKKINDEKKASLAAQYAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELRLARLEGAGSPYQVKGAALLEAERTVQVALAKAALVDDLQNKNQELMKQIEICQEENKILDKLHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKFMEMNEEKKILDRELAAQWWRNYGGARTKVTANRVAVVVANEWKDANDKVMPVRQWLEERRFLQGEMQQLRDKLAIAERTAKSEAQLKERYHLRLKVLEDGLKASPSGHIRPSEVRSVSNGRSRRQSLGGAENFSRLSSNGLSRRTPASSPSNNISTVLKHAKGSSRSFDGGNRLSEKNKVCLNNGVVPNSSLNTAVEEHRRTENSNTCKENQDVKQSDTSKADADDYVSGLLYDMLQKEVIALRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVATMEKEVAAMRVGKGHDLRTKRLSNSKVTSQL" +"23" +">sequence84" +"24" +"NEFITLLHGSDPVKVELNRLENDVRDKDRELSESQAEIKALRLSERQREKAVEELTEELGKMSEKLKLTENLLDSKNLEIKKINEEKRASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARHEIVKLQDDNRALDRLTKSKEAALLDAERTVQSALAKASMVDDLQNKNQELMKQIEICQEENRILDKLHRQKVAEVEKFTQTVRELEEAVLAGGTAANAVRDYQRKFQEMNEERRILDRELAAQWWRNYGGARAKVSASRVATVVANEWKDGSDKVMPVKQWLEERRFLQGEMQQLRDKLAIADRAAKSEAQLKEKFQLRLRVLEESLRGPSSSGNRSTPEGRSMSNGPSRRQSLGGADIIPKLTSNGFFSKRSPSSQFRSLNASTSTILKHAKGTSRSFDGGSRSLDRSKVLTNEPRSKFPLNQSSEGTSGGGSPNSTKQGDSEKAAGTNNDSVPGVLHDLLQKEVITLRKAANDKDQSLRDKDEAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVSAMRVDNKGSDSRTRRHSTNSKGASTTAQL" +"25" +">sequence88" +"26" +"DDFVNFLHGSDPVKIELNRLQNEVIDKNRELVDAQAEIKALKLTDRIKEKALEELTEELRKMVEKFQASEAALENKNLEIKRVVDEKKAALAAQFAAEATLRRVHAAQKDEELPPLEAILSPLEAEIKQLRQEVSKLQDDNRALERLTKSKEAALLEAERDVQSAYFKASLVDELQNRNQELMKQIEISLEENKILDKINRQKIAEVEKLGQTVRDLEEALLSGAAAANAVRDYQRQVSELKGEKRTLERTLAAQWWRNYGGSRAKVVENRVAVVVANEWKDSDGKVMPVKQWLEERRFLMGEMQQLRDKLSIAERTAKTEAQLKEKFQLRLKVVEDGLRSSFNGGVRSSELQNCSNGVSRRLSLGGFENSTKLSSNSFGTKKVPSLTRSSTMSSTSSSALLKHAKGASKSFDGSKSSSEGQSIDGNKSFSNGLDDPCFGNNTDESSMNTINNSGREICCNKQSEFAEPTSTDLVSGILYDMLQKEVIVLRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVASMRADNEHGQRGRRLSGSSKGLLNNAHM" From 2035b7c8aa32f4b865be439c10ed27145a5f0b9a Mon Sep 17 00:00:00 2001 From: Patrick LeBlanc Date: Tue, 14 Nov 2017 16:12:12 -0500 Subject: [PATCH 3/5] Challenge Part 2 Completed --- ChallengePart2.R | 52 ++++--- NoMotif.fasta | 397 ++++++++++++++++------------------------------- motif1.fasta | 67 +++----- motif2.fasta | 79 ++++------ 4 files changed, 214 insertions(+), 381 deletions(-) diff --git a/ChallengePart2.R b/ChallengePart2.R index fcc8130..c6e0251 100755 --- a/ChallengePart2.R +++ b/ChallengePart2.R @@ -2,6 +2,7 @@ rm(list=ls()) #load string package library(stringr) +library(seqinr) #Scan in files fasta=scan(file="motifsort.fasta",what=character(),sep="\n") @@ -10,33 +11,48 @@ fasta=scan(file="motifsort.fasta",what=character(),sep="\n") motifOne='AKKPRVZE' motifTwo='AAQWWRNYGG' -motif1=as.data.frame(matrix(ncol = 1, nrow = 10), sep = "\n") -motif2=as.data.frame(matrix(ncol = 1, nrow = 10), sep = "\n") -NoMotif=as.data.frame(matrix(ncol = 1, nrow = 10), sep = "\n") +motif1Names=vector(mode='list',length=11) +motif2Names=vector(mode='list',length=13) +NoMotifNames=vector(mode='list',length=66) -a=2 -b=2 -c=2 +motif1=vector(mode='list',length=11) +motif2=vector(mode='list',length=13) +NoMotif=vector(mode='list',length=66) + +a=1 +b=1 +c=1 for (i in 1:length(fasta)){ if (str_detect(fasta[i],motifOne)==TRUE){ - motif1[a-1,1]=fasta[i-1] - motif1[a,1]=fasta[i] - a=a+2 + motif1Names[a]=fasta[i-1] + motif1[a]=fasta[i] + a=a+1 } else if (str_detect(fasta[i],motifTwo)==TRUE){ - motif2[b-1,1]=fasta[i-1] - motif2[b,1]=fasta[i] - b=b+2 + motif2Names[b]=fasta[i-1] + motif2[b]=fasta[i] + b=b+1 } else if (str_detect(fasta[i],'sequence')==FALSE){ - NoMotif[c-1,1]=fasta[i-1] - NoMotif[c,1]=fasta[i] - c=c+2 + NoMotifNames[c]=fasta[i-1] + NoMotif[c]=fasta[i] + c=c+1 } } -write.table(motif1, file='motif1.fasta', sep = "\n") -write.table(motif2, file='motif2.fasta', sep = "\n") -write.table(NoMotif, file='NoMotif.fasta', sep = "\n") +for (i in 1:length(motif1Names)){ + motif1Names[i]=str_replace_all(motif1Names[i],">","") +} + +for (i in 1:length(motif2Names)){ + motif2Names[i]=str_replace_all(motif2Names[i],">","") +} + +for (i in 1:length(NoMotifNames)){ + NoMotifNames[i]=str_replace_all(NoMotifNames[i],">","") +} +write.fasta(sequences=motif1, names=motif1Names,file.out="motif1.fasta") +write.fasta(sequences=motif2, names=motif2Names,file.out="motif2.fasta") +write.fasta(sequences=NoMotif, names=NoMotifNames,file.out="NoMotif.fasta") \ No newline at end of file diff --git a/NoMotif.fasta b/NoMotif.fasta index cdb2795..b9b1d8f 100755 --- a/NoMotif.fasta +++ b/NoMotif.fasta @@ -1,265 +1,132 @@ -"V1" -"1" -">sequence3" -"2" -"GAFNAMFDYHGCWHKDAAATGSLCFDGRFIELYAVEAPRAHLALLDRVKRDVPPFWDPAALAEFIDKYGTHVIAGVKMGGKDVVCIKQLKGSNLTQSDVQSRLKKLSDDKLAQDSPESLTARDDKFLLGLNGSLLLGPGSAAWRSFRPSVVSHKDDILSIHIRRGGVDNGQGHSNWLSTISGSPDVISMAFVPITSLLTGVRGCGFLNHAVNLY" -"3" -">sequence4" -"4" -"GLFNSCFDFGSDSWASDAGDTRCLAFDGYFISLLDLRLDCRPLALAGHVVADVPAAWDPSAIASFIEKYGTHIIVGLSMGGQDVVYVKQDKSSPLSPSVIKEHLDKLGDQLFTGTCTLPPSHCKSRDHKFKVPEAFNVFDAQMTRQRIEGMTAPMSCKEGVTVIYSKRGGDTAASNHSEWLPTVPLMPDAINFKLVPITSLLKGVAGVGFLSHAINLY" -"5" -">sequence5" -"6" -"KAITNDIYIPAEFAACEFSLKSGKSSLYSSHINPGQLIFGQGSDTLHHTSNTHQLPLPPNALGEANIGKLYVSIVEYLRGCQDGAGQPNEPLVVFTSTELVPVVRGCFRYLESDSDELQENIEVYDIQYLFYVLKKEVMDIADLPNEHINKSITDNLFVNDFFEYHSGISCQFHEDNDRGKYCTQSKVARWCYMFSDYMCGDLAIKPLPGKHMPPKQEP" -"7" -">sequence6" -"8" -"KLAEYSMEKTKNDKFSFASQSTSCVFYRSYRLSSSPTLSQEFRKAVRGLPKTYSPENKLKFYRLIDTFGTHYITKVKLGGEVQSVTSIRQCQASLQGLSTEEVQMCLEAEASATIKATVKTELKHCKKDTEKMESKSSFSSLFNDRFTEIKGGQTTEPDLLFSSDKDPSAYKEWLNTLPLIPDIISYSLNSLHELLPTSCPVRKDLRSAIRHY" -"9" -">sequence7" -"10" -"NPFSASIPYKGYFTDLEIKKRKYIVAENTCLHSYATYSLRESIKNINSDFLLDTENLPILSKSITEKTCSKLIYMYNSKNDQCIKFIKPWIDFFRKYGTHVIVSAHFGGKTINTLEVPIHKFEELKIYNYKYPIENNRYLNVFKDRLLLQKILKIEKGEYAYRGGSQDNYMEDEQAEKNNDNLEKKANDVLNKYENSTSNKINLDIKGGTKLNEDWKQLTYEKWRNSIYTNIAPIYLDLFSLSSFMHIEKKESYNNALLYY" -"11" -">sequence8" -"12" -"YSFSASAGYKNALKKLKIQNSIIFMMKIYCLRYYTGISTTTNTWEFTNNFRNALNKLPNTFDGLKEDNECTYEYYITKSHSPQCEKNVNKWMTFFKLHGTHVAHEMYLGGKIIIKVNIEKEEYNKMKETNLDMKTVFDFYFHKMGLSARKNRRIQKFINKMHGSKTVSILGGHPGLNIDDPSFFEKWINSIDKNSMPIRTKLLPFSFFMDDPNMIKAYNDALMFY" -"13" -">sequence9" -"14" -"QFSEKIFPIEIGISSYSLKENKEIASYHKLLYPGKFKNVFARTQMIHGIDARDPRLEQNYSLVCIELIKYIEQFPGLAFFVSKEESLAGDKKCIDEIFLRGNVPIPKQIRFITHIQLFDYWCSIQHIELHEKSSFILNHIFKQLECAERCEYHKKINQKYHCALSDARHTSLMELICMKSYGATIIGSDTLPSVKFV" -"15" -">sequence10" -"16" -"AKYSKSVKKLRRVSGKSYSFVRAKAQLELAQYMLKSNDLMLHPEFLHRLRALPLSYVYGEYRQIFQDYGTHYITEAALGGEFEYTIILNKERLEQSDYTLEDYKKCAQAGLKVGANIYGVYVSAGVHGGSCNGLLNEMGENTARGSMVEDFVSVVRGGTSESITALLSKKLPTPELMRLWGEGVQFNPDFIRRTTQPLYELVTSRDFSQASTLKRNLKRALSEY" -"17" -">sequence13" -"18" -"VLGGSRSDLAKFARSQHSVDKATFAIHEISCTYYSYRLADHPQLSAEFTKYLRRLPQRVQTKQDRGPYRRLIDTYGTHYIHQVQLGGKVRRITAFRTCLATLKGFAETDVKTCLNAELRMSLGFLPANVTLSNKCDNLLKGNMSMGFYQGFMTHKIEVIGGERYFPDILYQQDPSEAYDSWMNSLHDNPDVISYAIFPLHQLVPDSQIAANLRDAITEY" -"19" -">sequence14" -"20" -"YSKNKSVQRLRQYSETKDKTYMRVSGTVQLASFQMRTRGAMLSPTFIEDIKSLPRDYDKAEYFSILEMYGTHYTVSGTVGGKYDLVYVLDSIVMKSLDITTEDVTDCLKLNAGANIGGTENGAKVDVNPNVKTDICNKGGGETETEPRRTQKPVIESIISFVDGGSVEYVTALEEKLNKKEPVADVDDYIQWASSLKDSPTVINSKSNPIISLIPTDIKDAYIKTRNLERAIEEY" -"21" -">sequence15" -"22" -"GSQESEFFHNVTHYKSTDLGFVRLWSKVETAHFKMRSDKLMLHEDFYISLMDLPEQYDFGMYSRFFNTYGTHYVTQGTMGGTLEYALVLNKTKMAESKLQGEQAGRCFSASIGLSYPIGQGASVDLKLGVNPCSKDGTFNQGSDASSVMVEDIITLVKGGILDSTSGVMVVRNPETYRTWGASLKYNPTLIEHEIMPIYELVRFSTAADHVGARLANLRRAWDEY" -"23" -">sequence16" -"24" -"TDICTVPVEICIKPTLLNGTINIECFQTIINQPIPIQHFLNSKHYTDFEHGISQENNPVPQTDFDFLWKKINTFIKSNMSKYSDSSMLPIIICTPFISSVQCVEFLASQAKVSDVRRSIFNTMFSVDDFVECVNRFKEIIPNTNAIYNFYKPLVCWTCNNDFKCDFHKSNGTRTFCCSKTNSEYLASTLCDLYKTIKSKIFVASMPSQV" -"25" -">sequence17" -"26" -"DRRKFHKTVTESRAHRLIILKNKVELAQFQNTAPEYLTLAEGFWRALSSLPTTYDYAAYRQLFQTYGTHYFSEGSLGGEYQALLELTQHALATTSTTSREYERCWRKVKRRFLRKKVKTVCEKLTSSTAASYVTPWSPGTSMRNVPIKVDVVGGNPGLKRFLSILDLENPEENGRKYDDWASSVKDFPQIIEQKVRPLYELVKEVECAGLKKLHMKQALEEY" -"27" -">sequence18" -"28" -"VNYEHKLENKSLNKLLTKNNLSIKKINCSIHTSGMIISYQWKLKKSISILLNDIQNKLVKDSGHTSNSNPQKNQKNIEKDWYNIFNTYGTHVLTKITLGGKIIEINAVEGGQNITENTSIFGSKLDINFFKMSLNSNSKDKLHDLDKNKSEKIIILGGNAMTTDRKTTNNNGEINYDKKLDKQKWVETIKYNPVPIKFELTPLSYFIYQNFSDENLVNSFHYF" -"29" -">sequence19" -"30" -"HGDMPSLCEQRYVPCEIACVRYSLREGILGSFHDFIDPGELPRGFRYHCQSGSASTHQIPISGFELANSDYHNMFRKLCSFVCPTPCPVVPVYTKANDIYRVDWCLQWLANKAGMENHFRVQEVETLIIKFYQDKLQEEPSRPTVSRLLDVVQWDYSSNTRCKWHEDNDMWCCALASCKKIAYCISKALASVYGVTLTPAHLPNPERS" -"31" -">sequence20" -"32" -"VSLSGEYVPAELAIIKYSLNDGVMDSLNVLINPTDLPLGMALDAKTHSSSTHQLPVPPDALGEANYEKILRQILKFFKNTSGSKVVPPIFTWNKDIPMVDSILRGILEATDLDYVKFSILPLIDFFYNLKLATEDYGLDIKTFPSIHLAKALLEKDVYAYTAGIACDVHEQLNNQVACALSRVVRWAYVISDSCCLDVGIEMEKGRHLPHNMTT" -"33" -">sequence21" -"34" -"FSFSASTGYKNFVKSTATNKVRTYITKTYCLRYVGGIVDYHSLDTTDEFKKAVEALPDKFDSHSCTIETFKSNEDDSICAETVLPWMQFIKMFGTHFTTIVHLGGKITHQVQIDKSDVLHMQQNGINVDAAVKASISPVMVDSLQGGFASTSEKASLSQSNNLKYDKQVLVIGGDGLVDSKNANSLNNWAKELYKRPMPIKIKLESIKSLLGKKRELFDEALKFY" -"35" -">sequence23" -"36" -"SHSRSSQFASSHSRKDKFSFTTHNLKCSYYTFRIHSRPPLSKEFEESLKNLPSTYDHKNTSAFTQFLSVYGTHFIRRVRLGGHVNSITAIRTCQASMSQMSVQTVSNCLSVEAQANIKGVTVSAATQFCKTKSSKLKTGATFRQAFSDRSIEVLGGDGDVGDVLFNSNGVAGFKKWLASIKRVPGLVWYQISPLHLLVPDNPVLQETLSKAISHY" -"37" -">sequence25" -"38" -"KAIGNDIYMPAEFAACKFSLRSGRGPVYSSHINPGQLIFGQASDAQHHTSTTHQLPLPPKAMGESNMGSLYVNIVKYLRDCQGAGNPLVVFTTAELMPVVSGCFRYLQSDSDEVGEQIHVYDILYLFYVLKKEVMDIADLPHANINKCITDNFFFNDFFEYYSNIACQFHEDNDRGKYCTHSMVSRWCYTFCDYMCGDLAIKPLAGKHMPPVQEQ" -"39" -">sequence26" -"40" -"ASFSASADFKQMKDTLSQKDTQCIQSHATCTAFDLSFYNDINSLPLLSLQLVDKIQQLYSYSNYTNEKEYYYDFFDSWGTHVATSVRLGSLFGYQFKMSSSSVQQQSSLGFDASVGASLYGVKGKVSTSYAQQQLNSFQQSLKSWSSYSLGATPNANLDAAQWATQTLDTPMPIKTELTPIYTFISQYQNNADIPLNSTTMAYVVNAMQNY" -"41" -">sequence27" -"42" -"FSASATNEFSDSSLRKSENEFSRCQQSFDLWSISIPADIARLQNYVSDDFIKLINAINPESKDSIATVFNVYGSHVLMSGVMGGKAHVSASANKLTLTQKFEMSTIVQAKYEQLTSQLSVEDKLKYSEAFDSFSESGSYTYDILGGSPSLGALVFKNNSQGSSDDNLKNWIQSISSMPVLTKFIDQTSLMPVWLLCEDKTKADALKKY" -"43" -">sequence29" -"44" -"GSFSASTGYKKFINEVSKRTSKTYFIKSNCIKYTIGLPPYVPWEQTTAYMNAVGILPKEFTGLNEDSCAPDVYEQKKMTKQCKNVHQWIQFFKTYGTHIIVEAQLGGKITKIINVSNTAVNQMKKDGVSVKAQIQAQFGFASVGGSTSVSSDNSSKNDNSSYDMSEKLVVIGGNPIKDVTKEENLYEWSKTVSSNPMPIHIKLLPIYKSFDSEELKESYEQAVLYY" -"45" -">sequence30" -"46" -"KYNINRLLCYPAEIAITTFNMKEGIIYSDSKFVEFDERWAFGQDERDHRTMSERVNENEDLDELMHQLSSTIGIDHLSTDHNPESPFGVFEWLRSRIDIYPYAKILVDMNQFRFVYNGLKNIAKYHGFTGQTYFNENIKFNMVSIQDFTDVLLDYCSLLVARRWSDQDINNQYLRPNLVPNRDKNTICEYHETVPCPTRYNCMKAHNSRLVHHFFTIMKAHRLQNFRYSPPVHEPCIEDM" -"47" -">sequence31" -"48" -"NGKFSTENQRMKIHQVKDSSVTTRVQIRNFIYKVKVFPDFSLDVRFAQQAKEIADAIENNQSRYADYLSERMVMDFGTHVITSVDAGASLVEEDYLNSKYVSDNVSQSSSISAQAGLNFFDKLKFDISSHNSQQSSTLQGYQSNIRYSLIQSHGGGIPFYPGMTLQKWQESTRNNLVAIDRSGLPLQYFISPNMLPDLPQPTVRKVSHLVRSAIERY" -"49" -">sequence32" -"50" -"ISLGINHELDQFHQEITQNNKAVSVSQSYWAQYSLTTAPAFLMPLNPMFKQSLDALNRMAKEPTTDTQQTIYNQVINSFGTHYVTSAIMGGAAKIYTTLDQNYLKTVDIEQTKTQIGINFSYNVFQFKFGFNSTDLAQKLDENFKKNSNDIIIFSPEVDHISDPKAWSTWESTVPEKPQPVNTTVSYISDLAYEFPEVQAHLRKTIEFY" -"51" -">sequence33" -"52" -"KAVNTDIYIPAEFSACEFSLKTGVNSLYSTMIDPSQLIFGQTCDAMLYAAATHQLPLPPAALGESKMTKLYHSIQDYLRSRLERTDKNLKSLVVFTKTDDIDMVKSCFRFIKSGYHDEQSKRYDDDNDEENDQFKFFEAAASKFLPIVVYDIQYLFLALKLAAMDIGGLTLPKPNLYITDAFFSRDFYEFQDGIACWFHEDMDRSKYCTQSKVKRWAYTFCDYMCADLAIKMQPGKHMPPSYKA" -"53" -">sequence34" -"54" -"KALTGDIYVPAEFSACRYSLKGGISSNYSTMINPGHIIYGQSRDAQDHSKTTHKLPLPPQAFGETNMGKLYIDIFNWLSVRNEEKLDQDPVIVYTTPELMPVVKSCFRYLASEAEIDEDERKIMVFDIHHLFYTLKKSVLDVAGVTNDRINFHVTNNFFVKDFFEYTEGISCDYHEKIDRSKYCTNSMVKRWGFTFSDYMCADLAIPLQPGKHIPLKVKP" -"55" -">sequence36" -"56" -"NSFTGSLEYKNALMNFKSKRQKIYNKTEQCVRYQVGIPLNLKWGYTEYFNRTLSRLPILSSKVIKNCNIDNKLNLSDEECKSIKPWIKFFEVFGTHFNNQLTLGGKINQTMVFDSSTLEELKKKGIDIEAEVRTELGSGNVKLNLDMGGKKSRLDEIGQKKMSVLGGKMPNFPMDDNEFAHWAETVAENPMPIGVVSTSLKTLMHPAMHQSYDQALHQY" -"57" -">sequence37" -"58" -"VNGKFSTEFQRMKTLQVKDQAVTTRVQVRNRIYTVKTTPTSELSLGFTKALMDICDQLEKNQTKMATYLAELLILNYGTHVITSVDAGAALVQEDHVRSSFLLDNQNSQNTVTASAGIAFLNIVNFKVETDYISQTSLTKDYLSNRTNSRVQSFGGVPFYPGITLETWQKGITNHLVAIDRAGLPLHFFIKPDKLPGLPGPLVKKLSKTVETAVRHY" -"59" -">sequence39" -"60" -"SSHNSAFKQAIQASHKKDSSFIRIHKVIKVLNFTMKTKDLQLSDVFLKALNHLPLEYNAALYSRIFDDFGTHYFTSGSLGGVYDLLYQFSNEELKNSGLTQEEAKNCIRIETKKRYFIVTKTKVEHRCTTNRMSEKYEGSFLQGSEKSISLVKGGRSEYAAALAWEKGSSGPGEKTYSEWLESVKENPAVIDFELAPITDLVRNIPCAVTRRNNLRRAFREY" -"61" -">sequence41" -"62" -"YSGYNNDEYTHDDMLHNLNKHNKLLIKSYKCIVYKANLTSLNFLKNKNNDEIGLNFNGMLILNVLKKLNKNCNSEFDNQKCPISMFRNDPFDANCIRCIMPWMEFFKDYGTFMTKEITMGGVINKFYNIKKYEGSMRKEYKKKTIKQSSTFFHLSKSRSESLNEKKSGETNKEELEELYTLTIGPEPPGNVSNSKVISDWLEKVVHNPTPIDLELVPIKQIIPEKYLKIYENALKYY" -"63" -">sequence43" -"64" -"SGSRESAFLNKLSKYNEKKYSFIRIFTKVQTASFKMRRDNIMLDEVMLQSLMELPEQYNYGMYAKFIDDYGTHYITSGSMGGVYEYILVLNKENMTKSGVTSDDVTSCFGGSFGIDYDYTDNLQITGSLSGKHCKKLGGGHREDEESNMAVEDIISRVRGGSSGWGGGLTQNGSIITYRAWGRSLKYNPAVIDFEMKPIYEILRHTNLGPLEAKCQNLRRALDQY" -"65" -">sequence44" -"66" -"KALTTDVYVPAEFSASEYSFNEGIMSVYSTLIDPGQIIFGQGSDAQHHSSTTHNLPLPPNALGEKNMGKLYRNILEYLSKIQEGKDATKPFVVFTKTDMVPVVKSCFRYLACENQDGSYENGDQIQVLDIQYLLFILKKEVLDIAGVSDEKINLYVTDAYFLKDFFEFTPEISCQYHEENDRSKYCTQSLVMRWAYTFSDYMCSDLAISVQPGKHIPPKTKP" -"67" -">sequence45" -"68" -"SVAGSHSKVANFAAEKTYQDQYNFNSDTVECRMYSFRLVQKPPLHLDFKKALRALPRNFNSSTEHAYHRLISSYGTHFITAVDLGGRISVLTALRTCQLTLNGLTADEVGDCLNVEAQVSIGAQASVSSEYKACEEKKKQHKMATSFHQTYRERHVEVLGGPLDSTHDLLFGNQATPEQFSTWTASLPSNPGLVDYSLEPLHTLLEEQNPKREALRQAISHY" -"69" -">sequence47" -"70" -"KALTTDVYVPAEFAACEYSLKEGIRSIYSTMIDPGQIIFGQGSDALLHSSTTHDLPLPPNALGEKNMTKLYRNIVDYLSKCQGKGKTLVVFTPAENITMVKSCFRYLECDDDFRDGGEKIQVFDIQYLLFILKKEVMNVADLNDEKINKFATDAFFKKDFFEFTAGIACQYHEDNDRTKYCTQSMVTRWAYTFTDFMCGDLAITVQPGKHIPAQTK" -"71" -">sequence48" -"72" -"AKFSLSTNYSEISDLLKNNDNKLYVDKSYCFLLEAALPIHNSLKMTRSFATAMSKLTRDFKKHTKDCNAIKYSINKNNKDCKEIKNWMELFDQFGTHFSYNIKLGGRITFITQEEGSKDERGNEKSVDVGVGGKFEKDNKGVGIEGNVKFVFGNKRGESKNLSFKYTNILGGLPVSDISKESEYVKWIKSVYKYPMPIRTQFAPISKIFKSKALKDSYDEAFRFY" -"73" -">sequence49" -"74" -"GSFSASVGYASASNTISKKKFRMFILKSYCFKYVASLSQYSQWKLSDQFLRAINLLPSYFNSLEHDGKYCNAEELRDNKTGMDSCGKSVESWLYFFKNFGTHVSTVIHLGGKITQQVKISKNEYKSLSESGLSTSVSASVGFGLFKANASSSTDSKESSNEESSNSSIEKETVIIGGTTIYDPNDPSNFEKWADSIKNNPMPIKGQYEPLSRILPERLTKIYDEALSFY" -"75" -">sequence50" -"76" -"NIDGECMLAEMAMNEFSLFSGIVEKFHAIVGPWMPESESHRRRASRHALETHRIPLQNNFATITKKRLVEEILGRVEPSIACHQGVKVGLYSDACNEKTKIDLNIKNNFKDPGMLCDKNDRRFILVLQSELDLMVDSMKHLANNVGFHYDGFPVTPNCFVIVEAFVEAISDIMNEKIDVETMRWFSLLGQKVDAEDSVSPWETGTDFHCARHSEPKSNFCASVTVGRTCCIVYHVIGSFFRRYHLKKIPTAHQPSSSNS" -"77" -">sequence51" -"78" -"KALNGDVYQPAELSACRFSLKGGISSNYSTMINPGHIIFGQTSDAQDHSRTTHKLPLPPNAMGEKNLGNLYSDTLKWLSASNDEEDEQYDHPVIVYTTPELMPVVKSCFRYLACEGDTDKHAKKIIVYDICYLFLTLKKTVLDLVGVPSDHMNIHVTNSFFRRDFFEFSSGIACDYHEEVDRTKYCTKSMVLRWGYMISHYICGDLAIPLQPRKHVPIEVKH" -"79" -">sequence52" -"80" -"RTNTGVHLPAELAVVRYSLEGGVKDKLHMFINPGRLPIGMAYDAQRHAEEDHQLPLPPNAMGVSDYGDVAMRLFSFLLQNDDMPLLFTDETDVPRVESMLEHILSDHLSEIELRICPLAELFFRLKQNVELYMMDQTTFPSVYIAQQIITKDVYDYTKGISCDYHEEKDNVLYCPLSRCIRWAYIISDNCCQDMGIEPIPGKHVPLNANT" -"81" -">sequence53" -"82" -"FSGSLTCEFVKKSTQHAKNTVTCSTAAHSLYTLKEDDSSNPSEKRLDSCFRNWIENKLSANSPDSWSAFIQKFGTHYIASATFGGIGFQVLKLSFEQVEDLHSKKISLETAAANSLLKGSVSSSTESGYSSYSSTSSSHTVFLGGTVLPSVHDERLDFKDWSESVHLEPVPIQVSLQPITNLLVPLHFPNIGAAELSNKRESLQQAIRVY" -"83" -">sequence54" -"84" -"WAFTASSEFNHMQQKIEQTSATFVISMATCQIAQITQVPELAEFHQSFIDQLSALPVEYSAPQYLEFLSNFGTHYATDIILGSKVGYVYTLPPAIVDDFDQKKFKEIDLKQAATITSALLKGVIGQQILPKEQEAKAYSDVSKLSTQSFTIEIGPQSTENTPKDWLRETELEPTPIRYTLKSISELVSEGKGQLSSVKEYQKIGQNLKKALTDY" -"85" -">sequence56" -"86" -"VTSGGTYIPAEMGLVRYSLKDGVMDKLHMFIDPGKLPLGMAYDAKQHSESDHQLPIPPDAKGEKDNDEIILKLFSFLSQQEKMPPLFTETNDIRMVENILKGILNQGSMDENTLLVCPLSELFYQLKRATESFGLDIKTFPSVHIAQAIIQKDVYEYTKDISCEFHEDQGNGKYCPLSRCVRWAYIISDSCCLDLSIEMKPGRHLPMNADT" -"87" -">sequence57" -"88" -"KSMTNDIYIPAEFSACQFSLKSGICSMYSSHIDPGQLIFGQGSETMHHTKHTHQLPLPPNAMGESDIGRLYANIVEYLRACNPDAKPNDPLVVFATPEFMPIVKGCFRYLESDSEEPLATIHIYDIQYLLYVLKLEVLDSVDIRNVTVNRTATDSLFINDYFCYHLGISCQYHEDIDRCQYCTQSIISRWCYVFSDFMCGDLAITPLPGKHMPPKQEP" -"89" -">sequence58" -"90" -"SSSRSYTSHTNEIHKGKSYQLLVVENTVEVAQFINNNPEFLQLAEPFWKELSHLPSLYDYSAYRRLIDQYGTHYLQSGSLGGEYRVLFYVDSEKLKQNDFNSVEEKKCKSSGWHFVVKFSSHGCKELENALKAASGTQNNVLRGEPFIRGGGAGFISGLSYLELDNPAGNKRRYSAWAESVTNLPQVIKQKLTPLYELVKEVPCASVKKLYLKWALEEY" -"91" -">sequence60" -"92" -"KDACTPAELAVVQFTLKHGMRNIYHTLINPDGSQYATQEHVRATHQYPNALGNDDLEGILADLLEFVRLECGPEAELSPMFTLESQISVVNNALEFLNGGVASQLKVHPIEYLFYVLKKATCAAGILPPPASFHITNAQFNLDPHEFLSDIGCEFHKQRDLTAHCAKSYVTRWAFAFADYMCSDLAIKMLPNRHMPNRLDT" -"93" -">sequence63" -"94" -"IDLANEPLYREAVKASQQKDSVFYRVHQVIATSTFKVKSSDLYLSDPFLQFLNSLPLEYNYALYRHIFQLFGTHYFSSGTLGGKYDLLFQFDREELKTFGLKESDSEYCLSDDDTLVTFFYNRHKQRNTCGNISMKTKYEGSMVKASERCITSVQGGRTEFAAALAWEKKGVSPQSTVYTDWIKSTIENPVVINYELLPLVNLVRGISCAVTKRRHFHRALEEY" -"95" -">sequence65" -"96" -"HPFNDSNYYKMLVKRINRGDSIIIEKKLCSKYFSFINDINKNDLDTFFLTTLNELGDNYQNIKDDTYKCSLQYYKMNNMNKYSENCLKTITPWISFFNMYGTHVISGVYYGGKIIHNLYFENNNLKKKEYKIRMYKSRLNPFSTINSNLYFGSSLSKEKIIYIRERNLIMDGGVHINPYNINEVNMENKKKNIYVNNVEKNLYDQKKKYRNYYNFYELKDDVRKRNYYNSWKDTIEWEQAKPVKLNLVPLSEFINSEEGKSAYYMALEFY" -"97" -">sequence67" -"98" -"CEQRFLPCEIGCVKYSLQEGIMADFHSFINPGEIPRGFRFHCQAASDSSHKIPISNFERGHNQATVLQNLYRFIHPNPGNWPPIYCKSDDRTRVNWCLKHMAKASEIRQDLQLLTVEDLVVGIYQQKFLKEPSKTWIRSLLDVAMWDYSSNTRCKWHEENDILFCALAVCKKIAYCISNSLATLFGIQLTEAHVPLQ" -"99" -">sequence68" -"100" -"GKFSEENTRMKIHQVRGNSVTTRVQVRNHLYTVNAYPDFTLDSRFSQQISELADAIENNQTRQAMYLSEKVILEYGTHVITSIDAGATLVQEDYIKRSYVSDTNSERSSVSASAGINFFNMVNFNFGSKETEQTSETLTYQQNITYSLVQSHGGALFYQGITMQKWQESTQNHLVAIDRSGLPIHYFLNPAVFPDLPVPTLHKLAFSVQKAAERY" -"101" -">sequence70" -"102" -"GMFNNMFAFSKCWPKDASSVKTLAYDGWFISLYSVEIVRKQLTLRDEVKREVPSSWDSAALAGFIEKYGTHVVVGVTMGGKDVIHVKQMRKSNHEPEEIQKMLKHWGDERFCVDPVESKSPASVYSGKPKEENLLQWGLQPFGTSVSSAVVMHTKNEEIMRVCIRRGGVDLGQSHERWLSTVSQAPNVISMCFVPITSLLSGLPGTGFLSHAVNLY" -"103" -">sequence71" -"104" -"GNFNATFGFQSGSWATDAANVKSLGLDASVVTLFNLHIHNPNRLRLTDRVRNAVPSSWDPQLLARFIERYGTHVITGVSVGGQDVVVVRQDKSSDLDNDLLRHHLYDLGDQLFTGSCLLSTRRLNKAYHHSHSQPKFPEAFNVFDDKQTVAFNNFSINSQNGITVICAKRGGDGRAKSHSEWLITVPDKPDAINFNFIPITSLLKDVPGSGLLSHAMSLY" -"105" -">sequence72" -"106" -"KPFSASMPYKSYFADLEIKKKKYALAQNMCVLNYATYDLKESGNNINKDFVLDIEKLPILTKNQMKLCTKVLYMNNNLHCSEGIKSWMKFFEKYGTHVVLSAHFGGMSFNTMEITKRKIEEIKIYKYKYSLWNNPYLNIFKSGSLFQDLSINVDGHKENKKNNSNNNINIDEKKKNDAYIKNDVLIEQYRDNINLEIRGGNNFDEKWRNLTYLVWKNSIYSNIVPIHLDLYSLNTFMPIEKKESYDMALLFY" -"107" -">sequence73" -"108" -"EFSAEFMFLNNISKYTNKEMGFVQLMSKIQTSQFKMRSKDLVLDEDMLWALSDLPDHYHFGAYSQFFNEYGTHYVTEGTMGGLMDYVAVVNINEMEENQMTGQMIGSCIGGSFGLVFMEKIKATVKGKSCGKFTSNEKTSDESHSAIKDVFGFVKGGNTASSAGSLGIKDAKSYKDWGKSLKYNPALIEFEILPIYELLRLSTAAEQLSSKLPHVKMAWEEY" -"109" -">sequence74" -"110" -"ELPDDMGYMPCEIGVVEYSLQEGITREFHRFIQPGKPPLGYRYLCQSTSDNTHQIPIEGFELAEGDYHRLWTDLCKFTSPNGRDFPPLYVQVTHTSMCEWCLDWLSEMAGEYNRFHVYELDSLVKDLYEHGEGHAPSLSMIASILNTSVFDYEDGSSCEYHASKEVKYCALGAVKRFCFSISDSMAQVYDLQLTARHLPERPEN" -"111" -">sequence75" -"112" -"FKFSASAKFKKLQDVSKSGKSKMFINKSYCFKYVAGISTSLKWDFTLGFQSSLGRLSDFKGLEKDSICKPFIYREDPKNENCQELGISDWMELFNTFGTHVATKIYLGGKIFTTLEIKKSQEKKLSDQGLDVRAILSAKIKDTDIDSNVEVSTIKSKNAGDFLLDTKKSTFVLGGDIYGHGKTIEFAEWARSVADHAMPIKAEFTPISHFIDKNLRDAYNKAYLYY" -"113" -">sequence76" -"114" -"HPFNSSNYYRMLVERIEKGYSIIIDKKICSRYFVALKNVDSSKLDPFFINMLNDLEKNYKNININKYKCSVHSYKKNKYDQNCLRTITPWITFFNLYGTHLVSEVYYGGKIINILYSEYYNNIYNSEQVQIYKKRLNPFTSGSKLGSFYFGSIISKKQNSTNQKDNDNMLTYIKEKNTIYDGGEDIKEYKDGEGKVLMINGMEDEWEKTINGKYAKPIKLILKPFSDFIKTNDGKVAYYKALEYY" -"115" -">sequence77" -"116" -"GRFRASVDYQNMQNDMASGTYQYIVSNSRCSVFQLDLIDSPTYHPQFSNDILLNLQQLALNQNNANNTEANAYYDFFDNWGTHVVTSVDLGSLFGYKFKMLKTDVQSMQNQGIDVSASATLFNVRGRTNTQLEQNSLNSFSQSIQSWTSYSIGATPDLNNDPANWATQTLTKPMPIKSSITPYHEALKIFTQGGNNILSSTQILQLYSKLRMY" -"117" -">sequence79" -"118" -"FLGEIENRFDMSDDKSSKRTNEYISYDINNTLYRITLKGNVPLSEQFQEDLNSLDATTLFEKYGTHYLKSTWIGGRISFSTTIDTYGMTDDMRKKFAFVTKRKVGNWTGTSDVELTREEKDISEKMKSNSIVRVWGGDPKLGRDIERAIQGHTVSDIYQQWGQTVEERPYISDFDHGQGLVPIYELATGTRKEQLKEQWEAY" -"119" -">sequence82" -"120" -"GKFSASSDYQEVQDGLNSANIQYIESQARCSIFQLDVYNSPSQNAQLTPQLQQALFTLAFNQTSQNDYYDFIDTWGTHVVTSVNLGSRFGYKYQMDKYQSNQLTQQGVNLSVSASYFSSSGSASGAYNQTQIQNFTQAMTSWSSYSIGATPDANQDPLSWAQQTLDTPMPINISILSFDDFLNKFSFSVNGLTSSQLNTVISNLSQY" -"121" -">sequence83" -"122" -"SQSRMTHEVIESAQKIDSKYFKVVNTVELAQFKMRRNGLNPSDIFLRRMKDLPVYYNYLDYSFLIEDFGTHYFSSGSLGGQYEYVYRYSRADLSHSGLTEEEQKSCLSAEAKASFFSFSGSSSGSRCKENALSQRNSGSFTLSASESFSHVKGGSSESAGQLAFANGPNPQKYEAWIQDVKRNPAIISYEITPISELLVGIPYADIKRRNMEKALVEY" -"123" -">sequence85" -"124" -"YYPTEDKYFICEIAIAAVSLKNGVEDVFHRIVKPGKLPLGYYGGALTHSKETHQMLELVQDEPYENNTREVFNEMTSFLKLWRGKGSDSIVYADEKTHEMITKVIDNFCQEFNYPDEIKVYNFQYLFFALRNSVAARTVWPTETYSSTELEKDLYSYTPDISCEFHEMSDISVYCSKSIVTRYCYTLCDHCCTDLNIQLVAGFHVPKNSRI" -"125" -">sequence86" -"126" -"ISAEVKSKFSKESLDVKVGKEVYLTSSVSVPRLEFCINPLKVKLSDEFYSKLNNVETHGELIKVFKEYGEFYPKRYILGGMITNHETQKFTTIENLESKLLSLSAGVNAAIGPVKVGGSVGGESATDEKKSKQNEENSSKKDVIGGDPSKTGSEWVSSLSDINNWGIIGIDVYPIMDLIKKNDNTLYKKLEKIKNS" -"127" -">sequence87" -"128" -"KTSTEAFVPAEIALIKYNLELGVLDKLHELINPVRLPLGLAHEALTYSEQTHELPTPPNAMGETDFYTVLQKILSFTDYNSKPHKKLAIMTDAKEVPVIESLLSQLNDDVKLEYQFLVIPLGEFFFHLKRATEKYGLDICTFPTKTVADILLKKDAYEYTSGIACDFHEKLGNQRFCALSKVVRWSYIISDNCCLDLSIDLIAGRHLPSNADT" -"129" -">sequence89" -"130" -"GFFNAMFEFTGCWQKDASITKSLAFDGWCITLYTVALSKAHIILKDHVKQAVPSTWEPAALARFIKKFGTHIVVGVKMGGKDVIYLKQQHSSSLQAVDVQKRLKEMSDQRFLDANGHSDISLADSYAKDNKVEAREQRLRFVESNPLNSYSSNEELVMMPKRRGGRDKDIISHSEWLNTVQAEPDVISMSFIPITSLLNGVPGCGFLNHAINLY" -"131" -">sequence90" -"132" -"SFSASTGYRDFAKEVSKKDTRTYMLKNYCMRYEAGVAQSNHLKWNVTLAFAAGVSQLPDVFDAHNPECACSAEQWRQDQNAEACTKTNVPIWISFIEQFGTHFLVRLFAGGKMTYQVTAKRSEVEKMRNMGIDVKTQLKMQLGGVSGGAGQGTSSKKNQSSSEYQMNVQKETLVIGGRPPGNVSDPAALAAWADTVEELPMPVKFEVQPLYHLLPVEKQEAFKQAVTFY" +>sequence3 +GAFNAMFDYHGCWHKDAAATGSLCFDGRFIELYAVEAPRAHLALLDRVKRDVPPFWDPAALAEFIDKYGTHVIAGVKMGGKDVVCIKQLKGSNLTQSDVQSRLKKLSDDKLAQDSPESLTARDDKFLLGLNGSLLLGPGSAAWRSFRPSVVSHKDDILSIHIRRGGVDNGQGHSNWLSTISGSPDVISMAFVPITSLLTGVRGCGFLNHAVNLY +>sequence4 +GLFNSCFDFGSDSWASDAGDTRCLAFDGYFISLLDLRLDCRPLALAGHVVADVPAAWDPSAIASFIEKYGTHIIVGLSMGGQDVVYVKQDKSSPLSPSVIKEHLDKLGDQLFTGTCTLPPSHCKSRDHKFKVPEAFNVFDAQMTRQRIEGMTAPMSCKEGVTVIYSKRGGDTAASNHSEWLPTVPLMPDAINFKLVPITSLLKGVAGVGFLSHAINLY +>sequence5 +KAITNDIYIPAEFAACEFSLKSGKSSLYSSHINPGQLIFGQGSDTLHHTSNTHQLPLPPNALGEANIGKLYVSIVEYLRGCQDGAGQPNEPLVVFTSTELVPVVRGCFRYLESDSDELQENIEVYDIQYLFYVLKKEVMDIADLPNEHINKSITDNLFVNDFFEYHSGISCQFHEDNDRGKYCTQSKVARWCYMFSDYMCGDLAIKPLPGKHMPPKQEP +>sequence6 +KLAEYSMEKTKNDKFSFASQSTSCVFYRSYRLSSSPTLSQEFRKAVRGLPKTYSPENKLKFYRLIDTFGTHYITKVKLGGEVQSVTSIRQCQASLQGLSTEEVQMCLEAEASATIKATVKTELKHCKKDTEKMESKSSFSSLFNDRFTEIKGGQTTEPDLLFSSDKDPSAYKEWLNTLPLIPDIISYSLNSLHELLPTSCPVRKDLRSAIRHY +>sequence7 +NPFSASIPYKGYFTDLEIKKRKYIVAENTCLHSYATYSLRESIKNINSDFLLDTENLPILSKSITEKTCSKLIYMYNSKNDQCIKFIKPWIDFFRKYGTHVIVSAHFGGKTINTLEVPIHKFEELKIYNYKYPIENNRYLNVFKDRLLLQKILKIEKGEYAYRGGSQDNYMEDEQAEKNNDNLEKKANDVLNKYENSTSNKINLDIKGGTKLNEDWKQLTYEKWRNSIYTNIAPIYLDLFSLSSFMHIEKKESYNNALLYY +>sequence8 +YSFSASAGYKNALKKLKIQNSIIFMMKIYCLRYYTGISTTTNTWEFTNNFRNALNKLPNTFDGLKEDNECTYEYYITKSHSPQCEKNVNKWMTFFKLHGTHVAHEMYLGGKIIIKVNIEKEEYNKMKETNLDMKTVFDFYFHKMGLSARKNRRIQKFINKMHGSKTVSILGGHPGLNIDDPSFFEKWINSIDKNSMPIRTKLLPFSFFMDDPNMIKAYNDALMFY +>sequence9 +QFSEKIFPIEIGISSYSLKENKEIASYHKLLYPGKFKNVFARTQMIHGIDARDPRLEQNYSLVCIELIKYIEQFPGLAFFVSKEESLAGDKKCIDEIFLRGNVPIPKQIRFITHIQLFDYWCSIQHIELHEKSSFILNHIFKQLECAERCEYHKKINQKYHCALSDARHTSLMELICMKSYGATIIGSDTLPSVKFV +>sequence10 +AKYSKSVKKLRRVSGKSYSFVRAKAQLELAQYMLKSNDLMLHPEFLHRLRALPLSYVYGEYRQIFQDYGTHYITEAALGGEFEYTIILNKERLEQSDYTLEDYKKCAQAGLKVGANIYGVYVSAGVHGGSCNGLLNEMGENTARGSMVEDFVSVVRGGTSESITALLSKKLPTPELMRLWGEGVQFNPDFIRRTTQPLYELVTSRDFSQASTLKRNLKRALSEY +>sequence13 +VLGGSRSDLAKFARSQHSVDKATFAIHEISCTYYSYRLADHPQLSAEFTKYLRRLPQRVQTKQDRGPYRRLIDTYGTHYIHQVQLGGKVRRITAFRTCLATLKGFAETDVKTCLNAELRMSLGFLPANVTLSNKCDNLLKGNMSMGFYQGFMTHKIEVIGGERYFPDILYQQDPSEAYDSWMNSLHDNPDVISYAIFPLHQLVPDSQIAANLRDAITEY +>sequence14 +YSKNKSVQRLRQYSETKDKTYMRVSGTVQLASFQMRTRGAMLSPTFIEDIKSLPRDYDKAEYFSILEMYGTHYTVSGTVGGKYDLVYVLDSIVMKSLDITTEDVTDCLKLNAGANIGGTENGAKVDVNPNVKTDICNKGGGETETEPRRTQKPVIESIISFVDGGSVEYVTALEEKLNKKEPVADVDDYIQWASSLKDSPTVINSKSNPIISLIPTDIKDAYIKTRNLERAIEEY +>sequence15 +GSQESEFFHNVTHYKSTDLGFVRLWSKVETAHFKMRSDKLMLHEDFYISLMDLPEQYDFGMYSRFFNTYGTHYVTQGTMGGTLEYALVLNKTKMAESKLQGEQAGRCFSASIGLSYPIGQGASVDLKLGVNPCSKDGTFNQGSDASSVMVEDIITLVKGGILDSTSGVMVVRNPETYRTWGASLKYNPTLIEHEIMPIYELVRFSTAADHVGARLANLRRAWDEY +>sequence16 +TDICTVPVEICIKPTLLNGTINIECFQTIINQPIPIQHFLNSKHYTDFEHGISQENNPVPQTDFDFLWKKINTFIKSNMSKYSDSSMLPIIICTPFISSVQCVEFLASQAKVSDVRRSIFNTMFSVDDFVECVNRFKEIIPNTNAIYNFYKPLVCWTCNNDFKCDFHKSNGTRTFCCSKTNSEYLASTLCDLYKTIKSKIFVASMPSQV +>sequence17 +DRRKFHKTVTESRAHRLIILKNKVELAQFQNTAPEYLTLAEGFWRALSSLPTTYDYAAYRQLFQTYGTHYFSEGSLGGEYQALLELTQHALATTSTTSREYERCWRKVKRRFLRKKVKTVCEKLTSSTAASYVTPWSPGTSMRNVPIKVDVVGGNPGLKRFLSILDLENPEENGRKYDDWASSVKDFPQIIEQKVRPLYELVKEVECAGLKKLHMKQALEEY +>sequence18 +VNYEHKLENKSLNKLLTKNNLSIKKINCSIHTSGMIISYQWKLKKSISILLNDIQNKLVKDSGHTSNSNPQKNQKNIEKDWYNIFNTYGTHVLTKITLGGKIIEINAVEGGQNITENTSIFGSKLDINFFKMSLNSNSKDKLHDLDKNKSEKIIILGGNAMTTDRKTTNNNGEINYDKKLDKQKWVETIKYNPVPIKFELTPLSYFIYQNFSDENLVNSFHYF +>sequence19 +HGDMPSLCEQRYVPCEIACVRYSLREGILGSFHDFIDPGELPRGFRYHCQSGSASTHQIPISGFELANSDYHNMFRKLCSFVCPTPCPVVPVYTKANDIYRVDWCLQWLANKAGMENHFRVQEVETLIIKFYQDKLQEEPSRPTVSRLLDVVQWDYSSNTRCKWHEDNDMWCCALASCKKIAYCISKALASVYGVTLTPAHLPNPERS +>sequence20 +VSLSGEYVPAELAIIKYSLNDGVMDSLNVLINPTDLPLGMALDAKTHSSSTHQLPVPPDALGEANYEKILRQILKFFKNTSGSKVVPPIFTWNKDIPMVDSILRGILEATDLDYVKFSILPLIDFFYNLKLATEDYGLDIKTFPSIHLAKALLEKDVYAYTAGIACDVHEQLNNQVACALSRVVRWAYVISDSCCLDVGIEMEKGRHLPHNMTT +>sequence21 +FSFSASTGYKNFVKSTATNKVRTYITKTYCLRYVGGIVDYHSLDTTDEFKKAVEALPDKFDSHSCTIETFKSNEDDSICAETVLPWMQFIKMFGTHFTTIVHLGGKITHQVQIDKSDVLHMQQNGINVDAAVKASISPVMVDSLQGGFASTSEKASLSQSNNLKYDKQVLVIGGDGLVDSKNANSLNNWAKELYKRPMPIKIKLESIKSLLGKKRELFDEALKFY +>sequence23 +SHSRSSQFASSHSRKDKFSFTTHNLKCSYYTFRIHSRPPLSKEFEESLKNLPSTYDHKNTSAFTQFLSVYGTHFIRRVRLGGHVNSITAIRTCQASMSQMSVQTVSNCLSVEAQANIKGVTVSAATQFCKTKSSKLKTGATFRQAFSDRSIEVLGGDGDVGDVLFNSNGVAGFKKWLASIKRVPGLVWYQISPLHLLVPDNPVLQETLSKAISHY +>sequence25 +KAIGNDIYMPAEFAACKFSLRSGRGPVYSSHINPGQLIFGQASDAQHHTSTTHQLPLPPKAMGESNMGSLYVNIVKYLRDCQGAGNPLVVFTTAELMPVVSGCFRYLQSDSDEVGEQIHVYDILYLFYVLKKEVMDIADLPHANINKCITDNFFFNDFFEYYSNIACQFHEDNDRGKYCTHSMVSRWCYTFCDYMCGDLAIKPLAGKHMPPVQEQ +>sequence26 +ASFSASADFKQMKDTLSQKDTQCIQSHATCTAFDLSFYNDINSLPLLSLQLVDKIQQLYSYSNYTNEKEYYYDFFDSWGTHVATSVRLGSLFGYQFKMSSSSVQQQSSLGFDASVGASLYGVKGKVSTSYAQQQLNSFQQSLKSWSSYSLGATPNANLDAAQWATQTLDTPMPIKTELTPIYTFISQYQNNADIPLNSTTMAYVVNAMQNY +>sequence27 +FSASATNEFSDSSLRKSENEFSRCQQSFDLWSISIPADIARLQNYVSDDFIKLINAINPESKDSIATVFNVYGSHVLMSGVMGGKAHVSASANKLTLTQKFEMSTIVQAKYEQLTSQLSVEDKLKYSEAFDSFSESGSYTYDILGGSPSLGALVFKNNSQGSSDDNLKNWIQSISSMPVLTKFIDQTSLMPVWLLCEDKTKADALKKY +>sequence29 +GSFSASTGYKKFINEVSKRTSKTYFIKSNCIKYTIGLPPYVPWEQTTAYMNAVGILPKEFTGLNEDSCAPDVYEQKKMTKQCKNVHQWIQFFKTYGTHIIVEAQLGGKITKIINVSNTAVNQMKKDGVSVKAQIQAQFGFASVGGSTSVSSDNSSKNDNSSYDMSEKLVVIGGNPIKDVTKEENLYEWSKTVSSNPMPIHIKLLPIYKSFDSEELKESYEQAVLYY +>sequence30 +KYNINRLLCYPAEIAITTFNMKEGIIYSDSKFVEFDERWAFGQDERDHRTMSERVNENEDLDELMHQLSSTIGIDHLSTDHNPESPFGVFEWLRSRIDIYPYAKILVDMNQFRFVYNGLKNIAKYHGFTGQTYFNENIKFNMVSIQDFTDVLLDYCSLLVARRWSDQDINNQYLRPNLVPNRDKNTICEYHETVPCPTRYNCMKAHNSRLVHHFFTIMKAHRLQNFRYSPPVHEPCIEDM +>sequence31 +NGKFSTENQRMKIHQVKDSSVTTRVQIRNFIYKVKVFPDFSLDVRFAQQAKEIADAIENNQSRYADYLSERMVMDFGTHVITSVDAGASLVEEDYLNSKYVSDNVSQSSSISAQAGLNFFDKLKFDISSHNSQQSSTLQGYQSNIRYSLIQSHGGGIPFYPGMTLQKWQESTRNNLVAIDRSGLPLQYFISPNMLPDLPQPTVRKVSHLVRSAIERY +>sequence32 +ISLGINHELDQFHQEITQNNKAVSVSQSYWAQYSLTTAPAFLMPLNPMFKQSLDALNRMAKEPTTDTQQTIYNQVINSFGTHYVTSAIMGGAAKIYTTLDQNYLKTVDIEQTKTQIGINFSYNVFQFKFGFNSTDLAQKLDENFKKNSNDIIIFSPEVDHISDPKAWSTWESTVPEKPQPVNTTVSYISDLAYEFPEVQAHLRKTIEFY +>sequence33 +KAVNTDIYIPAEFSACEFSLKTGVNSLYSTMIDPSQLIFGQTCDAMLYAAATHQLPLPPAALGESKMTKLYHSIQDYLRSRLERTDKNLKSLVVFTKTDDIDMVKSCFRFIKSGYHDEQSKRYDDDNDEENDQFKFFEAAASKFLPIVVYDIQYLFLALKLAAMDIGGLTLPKPNLYITDAFFSRDFYEFQDGIACWFHEDMDRSKYCTQSKVKRWAYTFCDYMCADLAIKMQPGKHMPPSYKA +>sequence34 +KALTGDIYVPAEFSACRYSLKGGISSNYSTMINPGHIIYGQSRDAQDHSKTTHKLPLPPQAFGETNMGKLYIDIFNWLSVRNEEKLDQDPVIVYTTPELMPVVKSCFRYLASEAEIDEDERKIMVFDIHHLFYTLKKSVLDVAGVTNDRINFHVTNNFFVKDFFEYTEGISCDYHEKIDRSKYCTNSMVKRWGFTFSDYMCADLAIPLQPGKHIPLKVKP +>sequence36 +NSFTGSLEYKNALMNFKSKRQKIYNKTEQCVRYQVGIPLNLKWGYTEYFNRTLSRLPILSSKVIKNCNIDNKLNLSDEECKSIKPWIKFFEVFGTHFNNQLTLGGKINQTMVFDSSTLEELKKKGIDIEAEVRTELGSGNVKLNLDMGGKKSRLDEIGQKKMSVLGGKMPNFPMDDNEFAHWAETVAENPMPIGVVSTSLKTLMHPAMHQSYDQALHQY +>sequence37 +VNGKFSTEFQRMKTLQVKDQAVTTRVQVRNRIYTVKTTPTSELSLGFTKALMDICDQLEKNQTKMATYLAELLILNYGTHVITSVDAGAALVQEDHVRSSFLLDNQNSQNTVTASAGIAFLNIVNFKVETDYISQTSLTKDYLSNRTNSRVQSFGGVPFYPGITLETWQKGITNHLVAIDRAGLPLHFFIKPDKLPGLPGPLVKKLSKTVETAVRHY +>sequence39 +SSHNSAFKQAIQASHKKDSSFIRIHKVIKVLNFTMKTKDLQLSDVFLKALNHLPLEYNAALYSRIFDDFGTHYFTSGSLGGVYDLLYQFSNEELKNSGLTQEEAKNCIRIETKKRYFIVTKTKVEHRCTTNRMSEKYEGSFLQGSEKSISLVKGGRSEYAAALAWEKGSSGPGEKTYSEWLESVKENPAVIDFELAPITDLVRNIPCAVTRRNNLRRAFREY +>sequence41 +YSGYNNDEYTHDDMLHNLNKHNKLLIKSYKCIVYKANLTSLNFLKNKNNDEIGLNFNGMLILNVLKKLNKNCNSEFDNQKCPISMFRNDPFDANCIRCIMPWMEFFKDYGTFMTKEITMGGVINKFYNIKKYEGSMRKEYKKKTIKQSSTFFHLSKSRSESLNEKKSGETNKEELEELYTLTIGPEPPGNVSNSKVISDWLEKVVHNPTPIDLELVPIKQIIPEKYLKIYENALKYY +>sequence43 +SGSRESAFLNKLSKYNEKKYSFIRIFTKVQTASFKMRRDNIMLDEVMLQSLMELPEQYNYGMYAKFIDDYGTHYITSGSMGGVYEYILVLNKENMTKSGVTSDDVTSCFGGSFGIDYDYTDNLQITGSLSGKHCKKLGGGHREDEESNMAVEDIISRVRGGSSGWGGGLTQNGSIITYRAWGRSLKYNPAVIDFEMKPIYEILRHTNLGPLEAKCQNLRRALDQY +>sequence44 +KALTTDVYVPAEFSASEYSFNEGIMSVYSTLIDPGQIIFGQGSDAQHHSSTTHNLPLPPNALGEKNMGKLYRNILEYLSKIQEGKDATKPFVVFTKTDMVPVVKSCFRYLACENQDGSYENGDQIQVLDIQYLLFILKKEVLDIAGVSDEKINLYVTDAYFLKDFFEFTPEISCQYHEENDRSKYCTQSLVMRWAYTFSDYMCSDLAISVQPGKHIPPKTKP +>sequence45 +SVAGSHSKVANFAAEKTYQDQYNFNSDTVECRMYSFRLVQKPPLHLDFKKALRALPRNFNSSTEHAYHRLISSYGTHFITAVDLGGRISVLTALRTCQLTLNGLTADEVGDCLNVEAQVSIGAQASVSSEYKACEEKKKQHKMATSFHQTYRERHVEVLGGPLDSTHDLLFGNQATPEQFSTWTASLPSNPGLVDYSLEPLHTLLEEQNPKREALRQAISHY +>sequence47 +KALTTDVYVPAEFAACEYSLKEGIRSIYSTMIDPGQIIFGQGSDALLHSSTTHDLPLPPNALGEKNMTKLYRNIVDYLSKCQGKGKTLVVFTPAENITMVKSCFRYLECDDDFRDGGEKIQVFDIQYLLFILKKEVMNVADLNDEKINKFATDAFFKKDFFEFTAGIACQYHEDNDRTKYCTQSMVTRWAYTFTDFMCGDLAITVQPGKHIPAQTK +>sequence48 +AKFSLSTNYSEISDLLKNNDNKLYVDKSYCFLLEAALPIHNSLKMTRSFATAMSKLTRDFKKHTKDCNAIKYSINKNNKDCKEIKNWMELFDQFGTHFSYNIKLGGRITFITQEEGSKDERGNEKSVDVGVGGKFEKDNKGVGIEGNVKFVFGNKRGESKNLSFKYTNILGGLPVSDISKESEYVKWIKSVYKYPMPIRTQFAPISKIFKSKALKDSYDEAFRFY +>sequence49 +GSFSASVGYASASNTISKKKFRMFILKSYCFKYVASLSQYSQWKLSDQFLRAINLLPSYFNSLEHDGKYCNAEELRDNKTGMDSCGKSVESWLYFFKNFGTHVSTVIHLGGKITQQVKISKNEYKSLSESGLSTSVSASVGFGLFKANASSSTDSKESSNEESSNSSIEKETVIIGGTTIYDPNDPSNFEKWADSIKNNPMPIKGQYEPLSRILPERLTKIYDEALSFY +>sequence50 +NIDGECMLAEMAMNEFSLFSGIVEKFHAIVGPWMPESESHRRRASRHALETHRIPLQNNFATITKKRLVEEILGRVEPSIACHQGVKVGLYSDACNEKTKIDLNIKNNFKDPGMLCDKNDRRFILVLQSELDLMVDSMKHLANNVGFHYDGFPVTPNCFVIVEAFVEAISDIMNEKIDVETMRWFSLLGQKVDAEDSVSPWETGTDFHCARHSEPKSNFCASVTVGRTCCIVYHVIGSFFRRYHLKKIPTAHQPSSSNS +>sequence51 +KALNGDVYQPAELSACRFSLKGGISSNYSTMINPGHIIFGQTSDAQDHSRTTHKLPLPPNAMGEKNLGNLYSDTLKWLSASNDEEDEQYDHPVIVYTTPELMPVVKSCFRYLACEGDTDKHAKKIIVYDICYLFLTLKKTVLDLVGVPSDHMNIHVTNSFFRRDFFEFSSGIACDYHEEVDRTKYCTKSMVLRWGYMISHYICGDLAIPLQPRKHVPIEVKH +>sequence52 +RTNTGVHLPAELAVVRYSLEGGVKDKLHMFINPGRLPIGMAYDAQRHAEEDHQLPLPPNAMGVSDYGDVAMRLFSFLLQNDDMPLLFTDETDVPRVESMLEHILSDHLSEIELRICPLAELFFRLKQNVELYMMDQTTFPSVYIAQQIITKDVYDYTKGISCDYHEEKDNVLYCPLSRCIRWAYIISDNCCQDMGIEPIPGKHVPLNANT +>sequence53 +FSGSLTCEFVKKSTQHAKNTVTCSTAAHSLYTLKEDDSSNPSEKRLDSCFRNWIENKLSANSPDSWSAFIQKFGTHYIASATFGGIGFQVLKLSFEQVEDLHSKKISLETAAANSLLKGSVSSSTESGYSSYSSTSSSHTVFLGGTVLPSVHDERLDFKDWSESVHLEPVPIQVSLQPITNLLVPLHFPNIGAAELSNKRESLQQAIRVY +>sequence54 +WAFTASSEFNHMQQKIEQTSATFVISMATCQIAQITQVPELAEFHQSFIDQLSALPVEYSAPQYLEFLSNFGTHYATDIILGSKVGYVYTLPPAIVDDFDQKKFKEIDLKQAATITSALLKGVIGQQILPKEQEAKAYSDVSKLSTQSFTIEIGPQSTENTPKDWLRETELEPTPIRYTLKSISELVSEGKGQLSSVKEYQKIGQNLKKALTDY +>sequence56 +VTSGGTYIPAEMGLVRYSLKDGVMDKLHMFIDPGKLPLGMAYDAKQHSESDHQLPIPPDAKGEKDNDEIILKLFSFLSQQEKMPPLFTETNDIRMVENILKGILNQGSMDENTLLVCPLSELFYQLKRATESFGLDIKTFPSVHIAQAIIQKDVYEYTKDISCEFHEDQGNGKYCPLSRCVRWAYIISDSCCLDLSIEMKPGRHLPMNADT +>sequence57 +KSMTNDIYIPAEFSACQFSLKSGICSMYSSHIDPGQLIFGQGSETMHHTKHTHQLPLPPNAMGESDIGRLYANIVEYLRACNPDAKPNDPLVVFATPEFMPIVKGCFRYLESDSEEPLATIHIYDIQYLLYVLKLEVLDSVDIRNVTVNRTATDSLFINDYFCYHLGISCQYHEDIDRCQYCTQSIISRWCYVFSDFMCGDLAITPLPGKHMPPKQEP +>sequence58 +SSSRSYTSHTNEIHKGKSYQLLVVENTVEVAQFINNNPEFLQLAEPFWKELSHLPSLYDYSAYRRLIDQYGTHYLQSGSLGGEYRVLFYVDSEKLKQNDFNSVEEKKCKSSGWHFVVKFSSHGCKELENALKAASGTQNNVLRGEPFIRGGGAGFISGLSYLELDNPAGNKRRYSAWAESVTNLPQVIKQKLTPLYELVKEVPCASVKKLYLKWALEEY +>sequence60 +KDACTPAELAVVQFTLKHGMRNIYHTLINPDGSQYATQEHVRATHQYPNALGNDDLEGILADLLEFVRLECGPEAELSPMFTLESQISVVNNALEFLNGGVASQLKVHPIEYLFYVLKKATCAAGILPPPASFHITNAQFNLDPHEFLSDIGCEFHKQRDLTAHCAKSYVTRWAFAFADYMCSDLAIKMLPNRHMPNRLDT +>sequence63 +IDLANEPLYREAVKASQQKDSVFYRVHQVIATSTFKVKSSDLYLSDPFLQFLNSLPLEYNYALYRHIFQLFGTHYFSSGTLGGKYDLLFQFDREELKTFGLKESDSEYCLSDDDTLVTFFYNRHKQRNTCGNISMKTKYEGSMVKASERCITSVQGGRTEFAAALAWEKKGVSPQSTVYTDWIKSTIENPVVINYELLPLVNLVRGISCAVTKRRHFHRALEEY +>sequence65 +HPFNDSNYYKMLVKRINRGDSIIIEKKLCSKYFSFINDINKNDLDTFFLTTLNELGDNYQNIKDDTYKCSLQYYKMNNMNKYSENCLKTITPWISFFNMYGTHVISGVYYGGKIIHNLYFENNNLKKKEYKIRMYKSRLNPFSTINSNLYFGSSLSKEKIIYIRERNLIMDGGVHINPYNINEVNMENKKKNIYVNNVEKNLYDQKKKYRNYYNFYELKDDVRKRNYYNSWKDTIEWEQAKPVKLNLVPLSEFINSEEGKSAYYMALEFY +>sequence67 +CEQRFLPCEIGCVKYSLQEGIMADFHSFINPGEIPRGFRFHCQAASDSSHKIPISNFERGHNQATVLQNLYRFIHPNPGNWPPIYCKSDDRTRVNWCLKHMAKASEIRQDLQLLTVEDLVVGIYQQKFLKEPSKTWIRSLLDVAMWDYSSNTRCKWHEENDILFCALAVCKKIAYCISNSLATLFGIQLTEAHVPLQ +>sequence68 +GKFSEENTRMKIHQVRGNSVTTRVQVRNHLYTVNAYPDFTLDSRFSQQISELADAIENNQTRQAMYLSEKVILEYGTHVITSIDAGATLVQEDYIKRSYVSDTNSERSSVSASAGINFFNMVNFNFGSKETEQTSETLTYQQNITYSLVQSHGGALFYQGITMQKWQESTQNHLVAIDRSGLPIHYFLNPAVFPDLPVPTLHKLAFSVQKAAERY +>sequence70 +GMFNNMFAFSKCWPKDASSVKTLAYDGWFISLYSVEIVRKQLTLRDEVKREVPSSWDSAALAGFIEKYGTHVVVGVTMGGKDVIHVKQMRKSNHEPEEIQKMLKHWGDERFCVDPVESKSPASVYSGKPKEENLLQWGLQPFGTSVSSAVVMHTKNEEIMRVCIRRGGVDLGQSHERWLSTVSQAPNVISMCFVPITSLLSGLPGTGFLSHAVNLY +>sequence71 +GNFNATFGFQSGSWATDAANVKSLGLDASVVTLFNLHIHNPNRLRLTDRVRNAVPSSWDPQLLARFIERYGTHVITGVSVGGQDVVVVRQDKSSDLDNDLLRHHLYDLGDQLFTGSCLLSTRRLNKAYHHSHSQPKFPEAFNVFDDKQTVAFNNFSINSQNGITVICAKRGGDGRAKSHSEWLITVPDKPDAINFNFIPITSLLKDVPGSGLLSHAMSLY +>sequence72 +KPFSASMPYKSYFADLEIKKKKYALAQNMCVLNYATYDLKESGNNINKDFVLDIEKLPILTKNQMKLCTKVLYMNNNLHCSEGIKSWMKFFEKYGTHVVLSAHFGGMSFNTMEITKRKIEEIKIYKYKYSLWNNPYLNIFKSGSLFQDLSINVDGHKENKKNNSNNNINIDEKKKNDAYIKNDVLIEQYRDNINLEIRGGNNFDEKWRNLTYLVWKNSIYSNIVPIHLDLYSLNTFMPIEKKESYDMALLFY +>sequence73 +EFSAEFMFLNNISKYTNKEMGFVQLMSKIQTSQFKMRSKDLVLDEDMLWALSDLPDHYHFGAYSQFFNEYGTHYVTEGTMGGLMDYVAVVNINEMEENQMTGQMIGSCIGGSFGLVFMEKIKATVKGKSCGKFTSNEKTSDESHSAIKDVFGFVKGGNTASSAGSLGIKDAKSYKDWGKSLKYNPALIEFEILPIYELLRLSTAAEQLSSKLPHVKMAWEEY +>sequence74 +ELPDDMGYMPCEIGVVEYSLQEGITREFHRFIQPGKPPLGYRYLCQSTSDNTHQIPIEGFELAEGDYHRLWTDLCKFTSPNGRDFPPLYVQVTHTSMCEWCLDWLSEMAGEYNRFHVYELDSLVKDLYEHGEGHAPSLSMIASILNTSVFDYEDGSSCEYHASKEVKYCALGAVKRFCFSISDSMAQVYDLQLTARHLPERPEN +>sequence75 +FKFSASAKFKKLQDVSKSGKSKMFINKSYCFKYVAGISTSLKWDFTLGFQSSLGRLSDFKGLEKDSICKPFIYREDPKNENCQELGISDWMELFNTFGTHVATKIYLGGKIFTTLEIKKSQEKKLSDQGLDVRAILSAKIKDTDIDSNVEVSTIKSKNAGDFLLDTKKSTFVLGGDIYGHGKTIEFAEWARSVADHAMPIKAEFTPISHFIDKNLRDAYNKAYLYY +>sequence76 +HPFNSSNYYRMLVERIEKGYSIIIDKKICSRYFVALKNVDSSKLDPFFINMLNDLEKNYKNININKYKCSVHSYKKNKYDQNCLRTITPWITFFNLYGTHLVSEVYYGGKIINILYSEYYNNIYNSEQVQIYKKRLNPFTSGSKLGSFYFGSIISKKQNSTNQKDNDNMLTYIKEKNTIYDGGEDIKEYKDGEGKVLMINGMEDEWEKTINGKYAKPIKLILKPFSDFIKTNDGKVAYYKALEYY +>sequence77 +GRFRASVDYQNMQNDMASGTYQYIVSNSRCSVFQLDLIDSPTYHPQFSNDILLNLQQLALNQNNANNTEANAYYDFFDNWGTHVVTSVDLGSLFGYKFKMLKTDVQSMQNQGIDVSASATLFNVRGRTNTQLEQNSLNSFSQSIQSWTSYSIGATPDLNNDPANWATQTLTKPMPIKSSITPYHEALKIFTQGGNNILSSTQILQLYSKLRMY +>sequence79 +FLGEIENRFDMSDDKSSKRTNEYISYDINNTLYRITLKGNVPLSEQFQEDLNSLDATTLFEKYGTHYLKSTWIGGRISFSTTIDTYGMTDDMRKKFAFVTKRKVGNWTGTSDVELTREEKDISEKMKSNSIVRVWGGDPKLGRDIERAIQGHTVSDIYQQWGQTVEERPYISDFDHGQGLVPIYELATGTRKEQLKEQWEAY +>sequence82 +GKFSASSDYQEVQDGLNSANIQYIESQARCSIFQLDVYNSPSQNAQLTPQLQQALFTLAFNQTSQNDYYDFIDTWGTHVVTSVNLGSRFGYKYQMDKYQSNQLTQQGVNLSVSASYFSSSGSASGAYNQTQIQNFTQAMTSWSSYSIGATPDANQDPLSWAQQTLDTPMPINISILSFDDFLNKFSFSVNGLTSSQLNTVISNLSQY +>sequence83 +SQSRMTHEVIESAQKIDSKYFKVVNTVELAQFKMRRNGLNPSDIFLRRMKDLPVYYNYLDYSFLIEDFGTHYFSSGSLGGQYEYVYRYSRADLSHSGLTEEEQKSCLSAEAKASFFSFSGSSSGSRCKENALSQRNSGSFTLSASESFSHVKGGSSESAGQLAFANGPNPQKYEAWIQDVKRNPAIISYEITPISELLVGIPYADIKRRNMEKALVEY +>sequence85 +YYPTEDKYFICEIAIAAVSLKNGVEDVFHRIVKPGKLPLGYYGGALTHSKETHQMLELVQDEPYENNTREVFNEMTSFLKLWRGKGSDSIVYADEKTHEMITKVIDNFCQEFNYPDEIKVYNFQYLFFALRNSVAARTVWPTETYSSTELEKDLYSYTPDISCEFHEMSDISVYCSKSIVTRYCYTLCDHCCTDLNIQLVAGFHVPKNSRI +>sequence86 +ISAEVKSKFSKESLDVKVGKEVYLTSSVSVPRLEFCINPLKVKLSDEFYSKLNNVETHGELIKVFKEYGEFYPKRYILGGMITNHETQKFTTIENLESKLLSLSAGVNAAIGPVKVGGSVGGESATDEKKSKQNEENSSKKDVIGGDPSKTGSEWVSSLSDINNWGIIGIDVYPIMDLIKKNDNTLYKKLEKIKNS +>sequence87 +KTSTEAFVPAEIALIKYNLELGVLDKLHELINPVRLPLGLAHEALTYSEQTHELPTPPNAMGETDFYTVLQKILSFTDYNSKPHKKLAIMTDAKEVPVIESLLSQLNDDVKLEYQFLVIPLGEFFFHLKRATEKYGLDICTFPTKTVADILLKKDAYEYTSGIACDFHEKLGNQRFCALSKVVRWSYIISDNCCLDLSIDLIAGRHLPSNADT +>sequence89 +GFFNAMFEFTGCWQKDASITKSLAFDGWCITLYTVALSKAHIILKDHVKQAVPSTWEPAALARFIKKFGTHIVVGVKMGGKDVIYLKQQHSSSLQAVDVQKRLKEMSDQRFLDANGHSDISLADSYAKDNKVEAREQRLRFVESNPLNSYSSNEELVMMPKRRGGRDKDIISHSEWLNTVQAEPDVISMSFIPITSLLNGVPGCGFLNHAINLY +>sequence90 +SFSASTGYRDFAKEVSKKDTRTYMLKNYCMRYEAGVAQSNHLKWNVTLAFAAGVSQLPDVFDAHNPECACSAEQWRQDQNAEACTKTNVPIWISFIEQFGTHFLVRLFAGGKMTYQVTAKRSEVEKMRNMGIDVKTQLKMQLGGVSGGAGQGTSSKKNQSSSEYQMNVQKETLVIGGRPPGNVSDPAALAAWADTVEELPMPVKFEVQPLYHLLPVEKQEAFKQAVTFY diff --git a/motif1.fasta b/motif1.fasta index 5aaa491..d982f8f 100755 --- a/motif1.fasta +++ b/motif1.fasta @@ -1,45 +1,22 @@ -"V1" -"1" -">sequence2" -"2" -"MSLKPFTYPFPETRFLHSGSSVYKFKIRYGDSIRGEDIENKEVIVQELEDSIRVVLGNLDSLQPFATEHFVVFPYKSKWERVSHLKFKHGEIVLIPYPFVLTLYVAKKPRVZEDELKWFNENLSTGKPIDDSPLGLVPAERKAARAMKKKRKRMELSVSPSRPGLDRAKMRTSSQGPSKKKFLMETSRNMERNTQQKCQETPAFDGTDVQEQGSRWEDNLAGEITPPVQQSNPPPPAGPTDLGTSGFFGF" -"3" -">sequence11" -"4" -"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRAKIGISSQSPSKKKPLMETRRNREGKTHQEWQETPAFNITDVQEQDSKSEDSPAGQIIPPLQQNNPLPPKGPTELATGGFFGF" -"5" -">sequence12" -"6" -"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIQDKEVIVQELEDSIRVVLGNLDNLQPFATEHFVVFPYKSRWERVAHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWIHEDLSPGKPVNDCPLGLVLPERTAAGAMLRKRKRGQVPSSPGRPGLDRTGKEKPSRNGRRLQRLISPMSRTRVGSGNREGCQGRLSHQCRRTIHLHLKDPQSWEPVASLGF" -"7" -">sequence35" -"8" -"MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSVRAEEITDKEVIIQELEDSIRAVLGNLDNLQPFITEHFIVFPYKSKWERVSHLKFKHEEVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLPPGKPINDSPLGSAVAEKKAAGDAGKKRKLVEEHGSPRGTALPRSVAEGKAESQSTEATLKKDQNRKKTQQETWKTVTSDTTDVQTQDSKRGHNLPGAMVPALQQSSSPPPQEPGTRSFFGF" -"9" -">sequence38" -"10" -"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRTGKEKPIRNGKRPQHLISLMSRNRILSQRTAQQGRSFPHCSKTIHFHLKDPQSWQLVASLGF" -"11" -">sequence46" -"12" -"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIENKEVIIQELEDSIRVVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEIILIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKPISDSPLGLVPAEKKAVGAVMRKRKHMDEPSSPSRPGLDRTGKEKPNKDCRRLWPLISLVSRNKILSGGTACQGQLSHPCSTTHLHLRSEQPAASLGF" -"13" -">sequence59" -"14" -"MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSIRTVATLLKSFQIYLFQDSIRAVLENLDNLQPFVTEHFIVFPYKSKWERVSHLKFKHEDVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGRPINDSPLGLVVVEKKAAGASKKQKRKLVEQHSSPGGARQPRDKMRSSSQRPSTKKPPMGTRRNRERKPQQERQKTVASDTTDVQEQHSKWGHNLPGAIVPPLQQNNSPPPKELGIRSFFGF" -"15" -">sequence64" -"16" -"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIEDKEVIVQELEDSIRAVLANMDSLQPFVTEHFIVFPYKSKWERVSHLKFKHGESILTPYPFVFTLYIAKKPRVZEDEMKWFAEDLPSGKPADDIPLELVLAETEAEEATMRKWKRKLMEEPSSPSRQGPHRAKMETSSEASSNKKPLKESKRSTDEEAQQEYQDTPASNAIAVKEQDAALGHGLQGLVVPPLQHSSPPPPKEPGARGFLGF" -"17" -">sequence66" -"18" -"MSLKAFTYPLPETRFLHAGSSVYKFKIRYGSSVRGEEIEDKKIVSQELEDSIRAVLGNLDNLQPFTTDHFVIFPYKSKWERVSHLRFKHGAALLEPYPFVCTLYVAKKPRVZEDEMKWAPAGGNGGPTNSAPLHLHKTQKEQDRPGTETSRKKEPPAPPSRGGERRTSLEQSWKELADSPELLLQLTRNWTGESASEKGEAEDSDISFLKDHGSGSSLRHQQKSPPKPSSPPSEGPPKQKHAGFLGF" -"19" -">sequence78" -"20" -"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVRELEDSIRVVLGNLDNLQPFTTEHFIIFPYKSKWERVSHLKFKHGEVVLVPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKLINDSPLGLVSAEKKSAEAMMRKRRHTEVPSSPRKSGRFFPHLRAKVETSSEAPSKKKPPMETRRTWNDNEQQETPAFDATDVQEQGPKWGDSLAGQMAPSLQWNNPPPPKGPKELGTTGFFGF" -"21" -">sequence81" -"22" -"MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVQELEDSIRAVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEAVLVPYPFVFTLYVAKKPRVZEDEMKCFHENLSPGKSMNSSPLGLVLAERKTAEAVLKKRKRGEVPSSPARPGLDRAEMGTSSQGLSKKKPPMETRRNRERKTQQECQKTPAFDVTDVQDQDSKWEDSLVGKTIPPSQQNNPPPAEGPTELGTSGFFGF" +>sequence2 +MSLKPFTYPFPETRFLHSGSSVYKFKIRYGDSIRGEDIENKEVIVQELEDSIRVVLGNLDSLQPFATEHFVVFPYKSKWERVSHLKFKHGEIVLIPYPFVLTLYVAKKPRVZEDELKWFNENLSTGKPIDDSPLGLVPAERKAARAMKKKRKRMELSVSPSRPGLDRAKMRTSSQGPSKKKFLMETSRNMERNTQQKCQETPAFDGTDVQEQGSRWEDNLAGEITPPVQQSNPPPPAGPTDLGTSGFFGF +>sequence11 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRAKIGISSQSPSKKKPLMETRRNREGKTHQEWQETPAFNITDVQEQDSKSEDSPAGQIIPPLQQNNPLPPKGPTELATGGFFGF +>sequence12 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIQDKEVIVQELEDSIRVVLGNLDNLQPFATEHFVVFPYKSRWERVAHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWIHEDLSPGKPVNDCPLGLVLPERTAAGAMLRKRKRGQVPSSPGRPGLDRTGKEKPSRNGRRLQRLISPMSRTRVGSGNREGCQGRLSHQCRRTIHLHLKDPQSWEPVASLGF +>sequence35 +MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSVRAEEITDKEVIIQELEDSIRAVLGNLDNLQPFITEHFIVFPYKSKWERVSHLKFKHEEVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLPPGKPINDSPLGSAVAEKKAAGDAGKKRKLVEEHGSPRGTALPRSVAEGKAESQSTEATLKKDQNRKKTQQETWKTVTSDTTDVQTQDSKRGHNLPGAMVPALQQSSSPPPQEPGTRSFFGF +>sequence38 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNNIRGEEIEDKEVIVQELEDSIRVVLGNLDSLQPFATEHFIIFPYKSKWESISHLKFKHGEIVLVPYPFVFTLYVAKKPRVZEDEMKWFHESLSAGKPIKDSPLGLVLTERKAAGAMMRKRKQVEVLSSPSRPGLDRTGKEKPIRNGKRPQHLISLMSRNRILSQRTAQQGRSFPHCSKTIHFHLKDPQSWQLVASLGF +>sequence46 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIENKEVIIQELEDSIRVVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEIILIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKPISDSPLGLVPAEKKAVGAVMRKRKHMDEPSSPSRPGLDRTGKEKPNKDCRRLWPLISLVSRNKILSGGTACQGQLSHPCSTTHLHLRSEQPAASLGF +>sequence59 +MSLKPFTYPFPETRFLHAGPSVYKFKIRYGHSIRTVATLLKSFQIYLFQDSIRAVLENLDNLQPFVTEHFIVFPYKSKWERVSHLKFKHEDVVLIPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGRPINDSPLGLVVVEKKAAGASKKQKRKLVEQHSSPGGARQPRDKMRSSSQRPSTKKPPMGTRRNRERKPQQERQKTVASDTTDVQEQHSKWGHNLPGAIVPPLQQNNSPPPKELGIRSFFGF +>sequence64 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEIEDKEVIVQELEDSIRAVLANMDSLQPFVTEHFIVFPYKSKWERVSHLKFKHGESILTPYPFVFTLYIAKKPRVZEDEMKWFAEDLPSGKPADDIPLELVLAETEAEEATMRKWKRKLMEEPSSPSRQGPHRAKMETSSEASSNKKPLKESKRSTDEEAQQEYQDTPASNAIAVKEQDAALGHGLQGLVVPPLQHSSPPPPKEPGARGFLGF +>sequence66 +MSLKAFTYPLPETRFLHAGSSVYKFKIRYGSSVRGEEIEDKKIVSQELEDSIRAVLGNLDNLQPFTTDHFVIFPYKSKWERVSHLRFKHGAALLEPYPFVCTLYVAKKPRVZEDEMKWAPAGGNGGPTNSAPLHLHKTQKEQDRPGTETSRKKEPPAPPSRGGERRTSLEQSWKELADSPELLLQLTRNWTGESASEKGEAEDSDISFLKDHGSGSSLRHQQKSPPKPSSPPSEGPPKQKHAGFLGF +>sequence78 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVRELEDSIRVVLGNLDNLQPFTTEHFIIFPYKSKWERVSHLKFKHGEVVLVPYPFVFTLYVAKKPRVZEDEMKWFHENLSPGKLINDSPLGLVSAEKKSAEAMMRKRRHTEVPSSPRKSGRFFPHLRAKVETSSEAPSKKKPPMETRRTWNDNEQQETPAFDATDVQEQGPKWGDSLAGQMAPSLQWNNPPPPKGPKELGTTGFFGF +>sequence81 +MSLKPFTYPFPETRFLHAGPNVYKFKIRYGNSIRGEEVENKEVIVQELEDSIRAVLGNLDNLQPFATEHFIVFPYKSKWERVSHLKFKHGEAVLVPYPFVFTLYVAKKPRVZEDEMKCFHENLSPGKSMNSSPLGLVLAERKTAEAVLKKRKRGEVPSSPARPGLDRAEMGTSSQGLSKKKPPMETRRNRERKTQQECQKTPAFDVTDVQDQDSKWEDSLVGKTIPPSQQNNPPPAEGPTELGTSGFFGF diff --git a/motif2.fasta b/motif2.fasta index dc57988..94d29da 100755 --- a/motif2.fasta +++ b/motif2.fasta @@ -1,53 +1,26 @@ -"V1" -"1" -">sequence1" -"2" -"DEFIALMHGSDPVRVELTRLENELRDKERELGEAQTEIRALRLSERAREKAVEELTDELEKMFEKLKLTESLLDSKNLEVKKINDEKKAAMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKQKEAALLDAERTVEIAMAKAAMVDDLQNKNQELMKQIEICHEENKILDKLQRQKVAEVKKLSLTVKELEEAVLRGGATANVVRDYQRQVQEVNDQKKTLECELAAQWWRNYGGARAKVTANRVAVVVANEWKDSNDKVMPVKQWLEERRFLQGEMQQLRDKLAVAERTARSEAQLKEKYQLRLKVLEDGLRGPPSGSSRLPTEGKSFSNGPSRRLSLGGADNMSKLSPNGLLARRSPSFHSRSSLSSSSSLVLKHAKGTSKSFDGGTRSLDRSKINGNGAHLLNRSTDAVRDCETNDSWKGNADEGTIENTNSNTDESNKETANNKSAEMVSGFLYDMLQKEVISLRKACHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQEVKARRLGSSKGTGSSQV" -"3" -">sequence22" -"4" -"LAHSDPIVLEFNRLQNQLKEKDRELGVASSEIKALRATIVLKDKALEQFRNEVNKLDERLGVIENLLKQKDLEIKKLTSEKKDALAAQFAAEAALRRVHANQKDDDTVPIEDVIAPLEADIKMYKIEIGRLQEDNKALERHIKSKESALLEAERILRSALERALIVEEVQNQNFELKRQIEICQEENKILDKTNRQKVLEVEKLSQTIQELEEAILAGGVAANAIRDYRRQISELNEEKRTLERELAAQWWRNYGGARVKVSANRVATVVANEWKDENDKVMPIKQWLEERRLLQAETQRLRDKLAISERTAKAEAQLKDKLKLRLKTLEEGLKQVSSFSENPYLSCRSPKPEKSNHILGFLSGNGGLKRRSTSQPRGSCIGKTSPLMPPNVENGAADAAGELKGVNSLKKKYASGENMLRKSLWASRSKVADIGGKENTEMKSNTDMHIDKFNNDTAVSADAKIKGGAKEETQNVGSAGFDSEDMVSAFLYDRLQREVINLRKSCEVKNNTLTAKDDEIKMLMRKVDALSKAIEVESKKIKREAAAREKEAISTKADENKKIRNTDSSKRRVA" -"5" -">sequence24" -"6" -"SSDPIVLELNRLENHLKDNDRELGIAHAEIKALKVTERLKEKAVEELNDDLKKLDEKLRFTENLLEDKNLEIKKLVSERRDALAAQFAAEATLRRVHANQKDEDYIPLDAVLAPMESEIRMCRNEISVLQEDKKALERLTKSKELALLETERMLKIAIERALLVEDLQNQNLELKRQIDICQEENRILDKANRQKVAEVEKLSQTIHELEESILAGGAAANAVRDYQRQILEMNEERRTLERELAAQWWRNYGGARVKILANRVATVVANEWKDDNDKVMPVKQWLEERKVLQGEIQRLRDKLNVSERTAKAESQLKDKFKLRLKTLEEGLKQVTTSSPNTEGSHLKQTVKPEPVLGYLSSNMGPRKRSQSQPRASFNAEQSTVQQRPNVTSENSNSNRTLEHVNSLKYKYISGKNLVKKNLWAPRNKLVDDVGKENSERKEDVGLEEFASVGPEVSKDFSAEAHSMQSTPEKDDLNVDCEDIVSGFLYDKLQKEVLNLRKSSQEKDGLLTAKDEEIKMLVKKIDTLTKAMETELKKMRRESASKERELTPRRVQKDPLHKSSTMIISKRAVKSV" -"7" -">sequence28" -"8" -"EDIIHLLHGSDPIKVELNRLENEVRDKDRELCEAHAEIKALRQTERLKEKAVEELFDEREKLQEKLKAMEIALENKNLDLKRTNDERKSALAAQAAAEATLRRLHASQKDEDLLPLEAILAPVEAELKSTRNDFLKLQDDNKALDRLTKSKEAALLEAERAVQIAEAKASLVDDLQNRNQELLKQIEICQEENKILDKMHRQKVAEVEKLSTTVAELEEALLAGGAAANAARDFERQVHHLMEEKRTLERELAAQWWRNYGGARAKVTANRVAVVVANEWKDANDKVMPVKQWLEERRFMQGEMQQLRDKLATTERTARSEAQLKEKLQVRLKVLEEGLRTSTNGSTRKHDDFLRSGTNGASVRRQSTGGSDIGNGVARRRPSMSSASQMRGSVSGSTILKNGKFGSKAFDGSKSLDAGRFKAYANGCEEPRKVSSAASGAGGGGGGGGGGGDVKPEAGKVEGATVAAADDNVSVLFYDMLQKEVVTLRKLGHEKDQSLKDKDDAIEMLSKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQENRARRLSIAKGSVNSSHA" -"9" -">sequence40" -"10" -"EDLLNLLHGSDPVKVELNRLENEVRDKDRKLAEATAECKVLKQRERLREKAVEELAEELDKVDEKLKAAEDLLESKNLELKKLNDEKKAALAAQFAAEATLRRVHAAQKDEQLPSIEEILSPLEAELKIARQEIAKLQDTNRALDRLTKSKEAALLEIERAIDAAEAKASQVDDLLNRNQELMKQIEICQEENKIMDKMHRQKTAEIEKLSSTVAELEEAVLAGGAAVNAARDYQRQAHELLEGKKTLERELAAQWWRNYGGARAKITANRVAVVVANEWKDANDKVMPVKQWLDERRFMQGEMQQLRDKLASAERTAKNESQLKDKFQMRLKVLEESLKPVTNGAPRRTEEVRSSSTTRRSTSGSEEASKLLANGSRRQRSAVTQVRASMASQTLMRATNGRMTSKSFDGGRSLDAGTTRLRAFSNGFEEVPVKPDSVEAKSEVEAVKSENGTTNQVSGSSSSVEDPVSGVLYDLLQKEVVNLRKASYEKDQSLKDKDDAIEMLSKKVDTLSKALEVEGKKMRREVQAMEKEVATLRAEKDQTRNPRRLSSGTGTVNSSSK" -"11" -">sequence42" -"12" -"NEFITLLHGSDPVKVELNRLENEVRDKDRELGEAQAEIKALRLSERLREKAVEELTDELSKVEEKLKLTESLLESKNLEIKKINDEKKASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNKALDRLTKSKEAALLEAERTVQVALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNEERKTLDRELAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERRFLQGEMQQLRDKLAITERAAKSEAQLKEKYHLRLKVLEESLRGSSSNTRSMPEGRSTSNGPSRRQSLGGADNFSKFTSNGFLSKRTPTSQLRSSLSSNSVLKHAKGTSKSFDGGTRSLDRGSRALLNGSSPNCSFNQPCDETKDTEAANMWKGNSDEKPVEFPVTETEDTVPGVLYDLLQKEVVALRKAGHEKDQSLKDKDDAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEHENRAKRFGSSKGPVGAAQL" -"13" -">sequence55" -"14" -"DLMNHFNGSDPVRLELTRLENEVRDKSRVLAEAQAEIKSLRLSDRQKQKAVDELSDKLEKVDEKLKGTLILLDNKNLEMKKLNDERKAALAAQTAAEATLRRVHASQKDNDMPSLEVILAPLEAELKIARDSAVVVVTLQISKLQETNRALDRLTKSKEAALIESERVIKAAEAKASMVDDLQNRNQELLKQIEICQEENKILDKMHRSKVNEVEKLSATVRDLEEAVLAGGAAVNAARDYQRQVHELMEIKRTLERELAAQWWRNYGGARAKISANRVATVVANDWKDESEKVMPVKQWLEERRFLQGEMQQLREKLASAERTCKSEAQLKEKVQLRLKVLEEGLKSGNGTVRRGAGAGGTVEAKRSSSVTSNGSVRKGSGSEEGAKVLANGSRARRSAVSQLRAMGGPLVKNGRLTSKSFDGGGGGRSSSGGSYDAGGMAALKPFTNGFEELRAGIKTESRSCSGEAAGDAGEGAGDTVSGVLYDMLQKEVISLRRASQEKDQSLKDKDNAIEMLSKKVDTLGKAMEVEAKKMRREVTVMEKEVASMRVDKDQERRMRRLSMMKEPVNSSQR" -"15" -">sequence61" -"16" -"EDVINLLHGSDPVRVELNRLENEVRDKDRELGDAHAEIKALKYSERLKEKAVEELTDELQKVDGKLKATEALLESKNLEIKKINDERKAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARLEAAKLQDDNRALDRLTKSKETALLEAERTVEIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVKKLTQTVCELEEAVLAGGAAANAVRDYRRKVQEMNDERKILDRELAAQWWRNYGGSRAKVTANRVAVVVANEWKDANDKVMPVKQWLEERKFFQGEMQLLRDKLAVAERTAKAEAQLKEKYQLRFKVLEERLRASPSGNLRTTSEGRSISNGPSRRQSLGGAENLSRSASNGFALRRTANSQSGSIRSNSASVLLRNAKISSRSFDGGSRSLDRDKVIPNAARKHEVLTDTNDQIQNAKTIGTHEASTNGNRSEKTKSELDDSVSGVLYDMLQKEVITLRRACHEKDQSLKDKDDAIEMLAKKVDTLNKAMGVEAKKMRREVAAMEKEVAAMRVSKEHDPRARRPSAPRGSQ" -"17" -">sequence62" -"18" -"DDFISLFHGSDPVRVELTRLQNELREKDRELGDALAEIKSLRNSERLKEKGVEELTDELIKVDEKLKAAEALLESKNLEIKKINEEKRAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARMEVAKLQDDNRALGRLTKSKEAALLEAERTVQIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMLRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDERKILEREVAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERKFFQGEMQQLRDKLAIAERTAKAEAQMKEKYQLRFKVLEERVKTSNGNSKFTVSDGRNIATGPSRRQSFGGAESLSASSSNGYQSRKTSISRPGSLRSNSANVLLKHAKLSSRSFDGGSRNLERERPTSDANGLDNMPRNSNIQTITSETITTHEESANGTPVKKSKSENEDYVSGMLYDMLQKEVISLRKACHEKDLTLKDKDDAIEMLAKKVDTLSKAMEVEARKMRREVASMEKEVAAMRISKEHDHRARRASAPRGAVNSQSI" -"19" -">sequence69" -"20" -"EEFINMLHGSDPVRVELCRLENEVRDRDRELSEAQAEIKALRLSERAREKAVEELTEEVNKMDEKLKLTESLLENRNLEIKKINDEKKAALAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKSKEAALLEAERTVQIALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDEMKTLDRELAAQWWRNYGGSRAKVSANRVAVVVANEWKDSNDKVMPVKQWLEERRFMQGEMQQLRDKLVIAERTARSEAQLKEKFQLRLKVLEDGSRMSASGTYRTTIEGKSVSNGPSRRQSLGGADNVPKSVNGFLSKRPSFQMRSSVSSSTVLKHAKGASKSFDGGTRSLDRSKVLLTGAGLSLNRSSDATGDGVTHESWKKIPDEKTNDFPNVDSDDCVSGLLYDMLQKEVITLRKACHEKDQSLKDKDDAIEMLAKKVDMLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEQDNKSKRLGGSKGLANSSQL" -"21" -">sequence80" -"22" -"DDFINLLHGSDPVKVELNRLENEVKDKDRELGEAQAEIKALKLSERLREKAVEELTDELQKVDEKLKAAGALIESKNLEIKKINDEKKASLAAQYAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELRLARLEGAGSPYQVKGAALLEAERTVQVALAKAALVDDLQNKNQELMKQIEICQEENKILDKLHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKFMEMNEEKKILDRELAAQWWRNYGGARTKVTANRVAVVVANEWKDANDKVMPVRQWLEERRFLQGEMQQLRDKLAIAERTAKSEAQLKERYHLRLKVLEDGLKASPSGHIRPSEVRSVSNGRSRRQSLGGAENFSRLSSNGLSRRTPASSPSNNISTVLKHAKGSSRSFDGGNRLSEKNKVCLNNGVVPNSSLNTAVEEHRRTENSNTCKENQDVKQSDTSKADADDYVSGLLYDMLQKEVIALRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVATMEKEVAAMRVGKGHDLRTKRLSNSKVTSQL" -"23" -">sequence84" -"24" -"NEFITLLHGSDPVKVELNRLENDVRDKDRELSESQAEIKALRLSERQREKAVEELTEELGKMSEKLKLTENLLDSKNLEIKKINEEKRASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARHEIVKLQDDNRALDRLTKSKEAALLDAERTVQSALAKASMVDDLQNKNQELMKQIEICQEENRILDKLHRQKVAEVEKFTQTVRELEEAVLAGGTAANAVRDYQRKFQEMNEERRILDRELAAQWWRNYGGARAKVSASRVATVVANEWKDGSDKVMPVKQWLEERRFLQGEMQQLRDKLAIADRAAKSEAQLKEKFQLRLRVLEESLRGPSSSGNRSTPEGRSMSNGPSRRQSLGGADIIPKLTSNGFFSKRSPSSQFRSLNASTSTILKHAKGTSRSFDGGSRSLDRSKVLTNEPRSKFPLNQSSEGTSGGGSPNSTKQGDSEKAAGTNNDSVPGVLHDLLQKEVITLRKAANDKDQSLRDKDEAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVSAMRVDNKGSDSRTRRHSTNSKGASTTAQL" -"25" -">sequence88" -"26" -"DDFVNFLHGSDPVKIELNRLQNEVIDKNRELVDAQAEIKALKLTDRIKEKALEELTEELRKMVEKFQASEAALENKNLEIKRVVDEKKAALAAQFAAEATLRRVHAAQKDEELPPLEAILSPLEAEIKQLRQEVSKLQDDNRALERLTKSKEAALLEAERDVQSAYFKASLVDELQNRNQELMKQIEISLEENKILDKINRQKIAEVEKLGQTVRDLEEALLSGAAAANAVRDYQRQVSELKGEKRTLERTLAAQWWRNYGGSRAKVVENRVAVVVANEWKDSDGKVMPVKQWLEERRFLMGEMQQLRDKLSIAERTAKTEAQLKEKFQLRLKVVEDGLRSSFNGGVRSSELQNCSNGVSRRLSLGGFENSTKLSSNSFGTKKVPSLTRSSTMSSTSSSALLKHAKGASKSFDGSKSSSEGQSIDGNKSFSNGLDDPCFGNNTDESSMNTINNSGREICCNKQSEFAEPTSTDLVSGILYDMLQKEVIVLRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVASMRADNEHGQRGRRLSGSSKGLLNNAHM" +>sequence1 +DEFIALMHGSDPVRVELTRLENELRDKERELGEAQTEIRALRLSERAREKAVEELTDELEKMFEKLKLTESLLDSKNLEVKKINDEKKAAMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKQKEAALLDAERTVEIAMAKAAMVDDLQNKNQELMKQIEICHEENKILDKLQRQKVAEVKKLSLTVKELEEAVLRGGATANVVRDYQRQVQEVNDQKKTLECELAAQWWRNYGGARAKVTANRVAVVVANEWKDSNDKVMPVKQWLEERRFLQGEMQQLRDKLAVAERTARSEAQLKEKYQLRLKVLEDGLRGPPSGSSRLPTEGKSFSNGPSRRLSLGGADNMSKLSPNGLLARRSPSFHSRSSLSSSSSLVLKHAKGTSKSFDGGTRSLDRSKINGNGAHLLNRSTDAVRDCETNDSWKGNADEGTIENTNSNTDESNKETANNKSAEMVSGFLYDMLQKEVISLRKACHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQEVKARRLGSSKGTGSSQV +>sequence22 +LAHSDPIVLEFNRLQNQLKEKDRELGVASSEIKALRATIVLKDKALEQFRNEVNKLDERLGVIENLLKQKDLEIKKLTSEKKDALAAQFAAEAALRRVHANQKDDDTVPIEDVIAPLEADIKMYKIEIGRLQEDNKALERHIKSKESALLEAERILRSALERALIVEEVQNQNFELKRQIEICQEENKILDKTNRQKVLEVEKLSQTIQELEEAILAGGVAANAIRDYRRQISELNEEKRTLERELAAQWWRNYGGARVKVSANRVATVVANEWKDENDKVMPIKQWLEERRLLQAETQRLRDKLAISERTAKAEAQLKDKLKLRLKTLEEGLKQVSSFSENPYLSCRSPKPEKSNHILGFLSGNGGLKRRSTSQPRGSCIGKTSPLMPPNVENGAADAAGELKGVNSLKKKYASGENMLRKSLWASRSKVADIGGKENTEMKSNTDMHIDKFNNDTAVSADAKIKGGAKEETQNVGSAGFDSEDMVSAFLYDRLQREVINLRKSCEVKNNTLTAKDDEIKMLMRKVDALSKAIEVESKKIKREAAAREKEAISTKADENKKIRNTDSSKRRVA +>sequence24 +SSDPIVLELNRLENHLKDNDRELGIAHAEIKALKVTERLKEKAVEELNDDLKKLDEKLRFTENLLEDKNLEIKKLVSERRDALAAQFAAEATLRRVHANQKDEDYIPLDAVLAPMESEIRMCRNEISVLQEDKKALERLTKSKELALLETERMLKIAIERALLVEDLQNQNLELKRQIDICQEENRILDKANRQKVAEVEKLSQTIHELEESILAGGAAANAVRDYQRQILEMNEERRTLERELAAQWWRNYGGARVKILANRVATVVANEWKDDNDKVMPVKQWLEERKVLQGEIQRLRDKLNVSERTAKAESQLKDKFKLRLKTLEEGLKQVTTSSPNTEGSHLKQTVKPEPVLGYLSSNMGPRKRSQSQPRASFNAEQSTVQQRPNVTSENSNSNRTLEHVNSLKYKYISGKNLVKKNLWAPRNKLVDDVGKENSERKEDVGLEEFASVGPEVSKDFSAEAHSMQSTPEKDDLNVDCEDIVSGFLYDKLQKEVLNLRKSSQEKDGLLTAKDEEIKMLVKKIDTLTKAMETELKKMRRESASKERELTPRRVQKDPLHKSSTMIISKRAVKSV +>sequence28 +EDIIHLLHGSDPIKVELNRLENEVRDKDRELCEAHAEIKALRQTERLKEKAVEELFDEREKLQEKLKAMEIALENKNLDLKRTNDERKSALAAQAAAEATLRRLHASQKDEDLLPLEAILAPVEAELKSTRNDFLKLQDDNKALDRLTKSKEAALLEAERAVQIAEAKASLVDDLQNRNQELLKQIEICQEENKILDKMHRQKVAEVEKLSTTVAELEEALLAGGAAANAARDFERQVHHLMEEKRTLERELAAQWWRNYGGARAKVTANRVAVVVANEWKDANDKVMPVKQWLEERRFMQGEMQQLRDKLATTERTARSEAQLKEKLQVRLKVLEEGLRTSTNGSTRKHDDFLRSGTNGASVRRQSTGGSDIGNGVARRRPSMSSASQMRGSVSGSTILKNGKFGSKAFDGSKSLDAGRFKAYANGCEEPRKVSSAASGAGGGGGGGGGGGDVKPEAGKVEGATVAAADDNVSVLFYDMLQKEVVTLRKLGHEKDQSLKDKDDAIEMLSKKVDTLTKAMEVEAKKMRREVAAMEKEVAAMRVDKEQENRARRLSIAKGSVNSSHA +>sequence40 +EDLLNLLHGSDPVKVELNRLENEVRDKDRKLAEATAECKVLKQRERLREKAVEELAEELDKVDEKLKAAEDLLESKNLELKKLNDEKKAALAAQFAAEATLRRVHAAQKDEQLPSIEEILSPLEAELKIARQEIAKLQDTNRALDRLTKSKEAALLEIERAIDAAEAKASQVDDLLNRNQELMKQIEICQEENKIMDKMHRQKTAEIEKLSSTVAELEEAVLAGGAAVNAARDYQRQAHELLEGKKTLERELAAQWWRNYGGARAKITANRVAVVVANEWKDANDKVMPVKQWLDERRFMQGEMQQLRDKLASAERTAKNESQLKDKFQMRLKVLEESLKPVTNGAPRRTEEVRSSSTTRRSTSGSEEASKLLANGSRRQRSAVTQVRASMASQTLMRATNGRMTSKSFDGGRSLDAGTTRLRAFSNGFEEVPVKPDSVEAKSEVEAVKSENGTTNQVSGSSSSVEDPVSGVLYDLLQKEVVNLRKASYEKDQSLKDKDDAIEMLSKKVDTLSKALEVEGKKMRREVQAMEKEVATLRAEKDQTRNPRRLSSGTGTVNSSSK +>sequence42 +NEFITLLHGSDPVKVELNRLENEVRDKDRELGEAQAEIKALRLSERLREKAVEELTDELSKVEEKLKLTESLLESKNLEIKKINDEKKASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNKALDRLTKSKEAALLEAERTVQVALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNEERKTLDRELAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERRFLQGEMQQLRDKLAITERAAKSEAQLKEKYHLRLKVLEESLRGSSSNTRSMPEGRSTSNGPSRRQSLGGADNFSKFTSNGFLSKRTPTSQLRSSLSSNSVLKHAKGTSKSFDGGTRSLDRGSRALLNGSSPNCSFNQPCDETKDTEAANMWKGNSDEKPVEFPVTETEDTVPGVLYDLLQKEVVALRKAGHEKDQSLKDKDDAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEHENRAKRFGSSKGPVGAAQL +>sequence55 +DLMNHFNGSDPVRLELTRLENEVRDKSRVLAEAQAEIKSLRLSDRQKQKAVDELSDKLEKVDEKLKGTLILLDNKNLEMKKLNDERKAALAAQTAAEATLRRVHASQKDNDMPSLEVILAPLEAELKIARDSAVVVVTLQISKLQETNRALDRLTKSKEAALIESERVIKAAEAKASMVDDLQNRNQELLKQIEICQEENKILDKMHRSKVNEVEKLSATVRDLEEAVLAGGAAVNAARDYQRQVHELMEIKRTLERELAAQWWRNYGGARAKISANRVATVVANDWKDESEKVMPVKQWLEERRFLQGEMQQLREKLASAERTCKSEAQLKEKVQLRLKVLEEGLKSGNGTVRRGAGAGGTVEAKRSSSVTSNGSVRKGSGSEEGAKVLANGSRARRSAVSQLRAMGGPLVKNGRLTSKSFDGGGGGRSSSGGSYDAGGMAALKPFTNGFEELRAGIKTESRSCSGEAAGDAGEGAGDTVSGVLYDMLQKEVISLRRASQEKDQSLKDKDNAIEMLSKKVDTLGKAMEVEAKKMRREVTVMEKEVASMRVDKDQERRMRRLSMMKEPVNSSQR +>sequence61 +EDVINLLHGSDPVRVELNRLENEVRDKDRELGDAHAEIKALKYSERLKEKAVEELTDELQKVDGKLKATEALLESKNLEIKKINDERKAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARLEAAKLQDDNRALDRLTKSKETALLEAERTVEIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVKKLTQTVCELEEAVLAGGAAANAVRDYRRKVQEMNDERKILDRELAAQWWRNYGGSRAKVTANRVAVVVANEWKDANDKVMPVKQWLEERKFFQGEMQLLRDKLAVAERTAKAEAQLKEKYQLRFKVLEERLRASPSGNLRTTSEGRSISNGPSRRQSLGGAENLSRSASNGFALRRTANSQSGSIRSNSASVLLRNAKISSRSFDGGSRSLDRDKVIPNAARKHEVLTDTNDQIQNAKTIGTHEASTNGNRSEKTKSELDDSVSGVLYDMLQKEVITLRRACHEKDQSLKDKDDAIEMLAKKVDTLNKAMGVEAKKMRREVAAMEKEVAAMRVSKEHDPRARRPSAPRGSQ +>sequence62 +DDFISLFHGSDPVRVELTRLQNELREKDRELGDALAEIKSLRNSERLKEKGVEELTDELIKVDEKLKAAEALLESKNLEIKKINEEKRAALAAQFAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELKLARMEVAKLQDDNRALGRLTKSKEAALLEAERTVQIALAKASLVDDLQNKNQELMKQIEICQEENKILDKMLRQKVAEVEKLTQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDERKILEREVAAQWWRNYGGARAKVTANRVATVVANEWKDANDKVMPVKQWLEERKFFQGEMQQLRDKLAIAERTAKAEAQMKEKYQLRFKVLEERVKTSNGNSKFTVSDGRNIATGPSRRQSFGGAESLSASSSNGYQSRKTSISRPGSLRSNSANVLLKHAKLSSRSFDGGSRNLERERPTSDANGLDNMPRNSNIQTITSETITTHEESANGTPVKKSKSENEDYVSGMLYDMLQKEVISLRKACHEKDLTLKDKDDAIEMLAKKVDTLSKAMEVEARKMRREVASMEKEVAAMRISKEHDHRARRASAPRGAVNSQSI +>sequence69 +EEFINMLHGSDPVRVELCRLENEVRDRDRELSEAQAEIKALRLSERAREKAVEELTEEVNKMDEKLKLTESLLENRNLEIKKINDEKKAALAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARQEIAKLQDDNRALDRLTKSKEAALLEAERTVQIALAKASMVDDLQNKNQELMKQIEICQEENKILDKMHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKVQEMNDEMKTLDRELAAQWWRNYGGSRAKVSANRVAVVVANEWKDSNDKVMPVKQWLEERRFMQGEMQQLRDKLVIAERTARSEAQLKEKFQLRLKVLEDGSRMSASGTYRTTIEGKSVSNGPSRRQSLGGADNVPKSVNGFLSKRPSFQMRSSVSSSTVLKHAKGASKSFDGGTRSLDRSKVLLTGAGLSLNRSSDATGDGVTHESWKKIPDEKTNDFPNVDSDDCVSGLLYDMLQKEVITLRKACHEKDQSLKDKDDAIEMLAKKVDMLTKAMEVEAKKMRREVAAMEKEVAAMRVEKEQDNKSKRLGGSKGLANSSQL +>sequence80 +DDFINLLHGSDPVKVELNRLENEVKDKDRELGEAQAEIKALKLSERLREKAVEELTDELQKVDEKLKAAGALIESKNLEIKKINDEKKASLAAQYAAEATLRRVHAAQKDDEMPPIEAIIAPLEAELRLARLEGAGSPYQVKGAALLEAERTVQVALAKAALVDDLQNKNQELMKQIEICQEENKILDKLHRQKVAEVEKLSQTVRELEEAVLAGGAAANAVRDYQRKFMEMNEEKKILDRELAAQWWRNYGGARTKVTANRVAVVVANEWKDANDKVMPVRQWLEERRFLQGEMQQLRDKLAIAERTAKSEAQLKERYHLRLKVLEDGLKASPSGHIRPSEVRSVSNGRSRRQSLGGAENFSRLSSNGLSRRTPASSPSNNISTVLKHAKGSSRSFDGGNRLSEKNKVCLNNGVVPNSSLNTAVEEHRRTENSNTCKENQDVKQSDTSKADADDYVSGLLYDMLQKEVIALRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVATMEKEVAAMRVGKGHDLRTKRLSNSKVTSQL +>sequence84 +NEFITLLHGSDPVKVELNRLENDVRDKDRELSESQAEIKALRLSERQREKAVEELTEELGKMSEKLKLTENLLDSKNLEIKKINEEKRASMAAQFAAEATLRRVHAAQKDDDMPPIEAILAPLEAELKLARHEIVKLQDDNRALDRLTKSKEAALLDAERTVQSALAKASMVDDLQNKNQELMKQIEICQEENRILDKLHRQKVAEVEKFTQTVRELEEAVLAGGTAANAVRDYQRKFQEMNEERRILDRELAAQWWRNYGGARAKVSASRVATVVANEWKDGSDKVMPVKQWLEERRFLQGEMQQLRDKLAIADRAAKSEAQLKEKFQLRLRVLEESLRGPSSSGNRSTPEGRSMSNGPSRRQSLGGADIIPKLTSNGFFSKRSPSSQFRSLNASTSTILKHAKGTSRSFDGGSRSLDRSKVLTNEPRSKFPLNQSSEGTSGGGSPNSTKQGDSEKAAGTNNDSVPGVLHDLLQKEVITLRKAANDKDQSLRDKDEAIEMLAKKVETLTKAMEVEAKKMRREVAAMEKEVSAMRVDNKGSDSRTRRHSTNSKGASTTAQL +>sequence88 +DDFVNFLHGSDPVKIELNRLQNEVIDKNRELVDAQAEIKALKLTDRIKEKALEELTEELRKMVEKFQASEAALENKNLEIKRVVDEKKAALAAQFAAEATLRRVHAAQKDEELPPLEAILSPLEAEIKQLRQEVSKLQDDNRALERLTKSKEAALLEAERDVQSAYFKASLVDELQNRNQELMKQIEISLEENKILDKINRQKIAEVEKLGQTVRDLEEALLSGAAAANAVRDYQRQVSELKGEKRTLERTLAAQWWRNYGGSRAKVVENRVAVVVANEWKDSDGKVMPVKQWLEERRFLMGEMQQLRDKLSIAERTAKTEAQLKEKFQLRLKVVEDGLRSSFNGGVRSSELQNCSNGVSRRLSLGGFENSTKLSSNSFGTKKVPSLTRSSTMSSTSSSALLKHAKGASKSFDGSKSSSEGQSIDGNKSFSNGLDDPCFGNNTDESSMNTINNSGREICCNKQSEFAEPTSTDLVSGILYDMLQKEVIVLRKASHEKDQSLKDKDDAIEMLAKKVDTLTKAMEVEAKKMRREVAAMEKEVASMRADNEHGQRGRRLSGSSKGLLNNAHM From 544ba48fb1f1425b13bde80fae98946a8f7f8f52 Mon Sep 17 00:00:00 2001 From: Bhavana Palakurthi Date: Thu, 16 Nov 2017 05:49:09 +0000 Subject: [PATCH 4/5] Challenge Part 1 Completed --- challenge1.txt | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 challenge1.txt diff --git a/challenge1.txt b/challenge1.txt new file mode 100644 index 0000000..400495a --- /dev/null +++ b/challenge1.txt @@ -0,0 +1,70 @@ +# Problem 1: +# +# align each of the reference files with muscle into .align file +#NOTE:all the files must be in the same place for convenience or else need to specify +#the path to each file. Here all my files are in the current working directory + +$ ./muscle3.8.31_i86win32.exe -in sigma.ref -out sigma.align +$ ./muscle3.8.31_i86win32.exe -in sporecoat.ref -out sporecoat.align +$ ./muscle3.8.31_i86win32.exe -in transporter.ref -out transporter.align + +# make HMM profiles from each alignment with hmmbuild +$ ./hmmbuild transporter.hmm transporter.align +$ ./hmmbuild sigma.hmm sigma.align +$ ./hmmbuild sporecoat.hmm sporecoat.align + + +# +# run hmmsearch with the --tblout option and use the +# index variable to specify a unique output file +# do the hmmsearch step 3 times, one per HMM profile + +./hmmsearch --tblout Rosigma.hits sigma.hmm Roseobacter.fasta +./hmmsearch --tblout Rosporecoat.hits sporecoat.hmm Roseobacter.fasta +./hmmsearch --tblout Rotransporter.hits transporter.hmm Roseobacter.fasta + +./hmmsearch --tblout Rhsigma.hits sigma.hmm Rhizobium.fasta +./hmmsearch --tblout Rhsporecoat.hits sporecoat.hmm Rhizobium.fasta +./hmmsearch --tblout Rhtransporter.hits transporter.hmm Rhizobium.fasta + +./hmmsearch --tblout Lsigma.hits sigma.hmm Limnohabitans.fasta +./hmmsearch --tblout Lsporecoat.hits sporecoat.hmm Limnohabitans.fasta +./hmmsearch --tblout Ltransporter.hits transporter.hmm Limnohabitans.fasta + +./hmmsearch --tblout Csigma.hits sigma.hmm Clostridium.fasta +./hmmsearch --tblout Csporecoat.hits sporecoat.hmm Clostridium.fasta +./hmmsearch --tblout Ctransporter.hits transporter.hmm Clostridium.fasta + +./hmmsearch --tblout Bsigma.hits sigma.hmm Bacillus.fasta +./hmmsearch --tblout Bsporecoat.hits sporecoat.hmm Bacillus.fasta +./hmmsearch --tblout Btransporter.hits transporter.hmm Bacillus.fasta + +./hmmsearch --tblout Asigma.hits sigma.hmm Arthrobacter.fasta +./hmmsearch --tblout Asporecoat.hits sporecoat.hmm Arthrobacter.fasta +./hmmsearch --tblout Atransporter.hits transporter.hmm Arthrobacter.fasta + +./hmmsearch --tblout Vsigma.hits sigma.hmm Verrucomicrobia.fasta +./hmmsearch --tblout Vsporecoat.hits sporecoat.hmm Verrucomicrobia.fasta +./hmmsearch --tblout Vtransporter.hits transporter.hmm Verrucomicrobia.fasta + + +# +# cat all of the hmmsearch output files +# remove the comment lines (grep -v "#") +# use cut or awk to get the columns you want (1st, 3rd, 5th) + + +cat Vsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Rhsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Rosporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Lsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Fsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Csporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Bsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Asporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table + + +# trim the 1st column contents to just the last bit that is a "species" code +cat transporter.table | awk '{print substr($1,length($1)-3,4),$2,$3}' >> hmmOut +cat sporecoat.table | sed 's/tr|A6BZD2|A6CT85_9BACI/BACI/g' >> hmmOut +cat sigma.table | awk '{print substr($1,length($1)-3,4),$2,$3}' >> hmmout \ No newline at end of file From aee992ad9089673f3c63d749797c43ad9a2c12ea Mon Sep 17 00:00:00 2001 From: Bhavana Palakurthi Date: Thu, 16 Nov 2017 05:56:56 +0000 Subject: [PATCH 5/5] Challenge 1 modified version --- challenge1.txt | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/challenge1.txt b/challenge1.txt index 400495a..dd14e90 100644 --- a/challenge1.txt +++ b/challenge1.txt @@ -53,7 +53,6 @@ $ ./hmmbuild sporecoat.hmm sporecoat.align # remove the comment lines (grep -v "#") # use cut or awk to get the columns you want (1st, 3rd, 5th) - cat Vsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table cat Rhsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table cat Rosporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table @@ -63,6 +62,26 @@ cat Csporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table cat Bsporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table cat Asporecoat.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sporecoat.table +cat Vtransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Rhtransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Rotransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Ltransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Ftransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Ctransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Btransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table +cat Atransporter.hits | grep -v "#" | awk '{print $1,$3,$5}' >> transporter.table + +cat Vsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Rhsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Rosigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Lsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Fsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Csigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Bsigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table +cat Asigma.hits | grep -v "#" | awk '{print $1,$3,$5}' >> sigma.table + + + # trim the 1st column contents to just the last bit that is a "species" code cat transporter.table | awk '{print substr($1,length($1)-3,4),$2,$3}' >> hmmOut