Previous changeset 4:ae4d5733272f (2015-10-16) |
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/splitfasta commit 31945d5d8c5ebee64ebf29c6ea022fb831f47274" |
modified:
splitFasta.xml |
added:
split_fasta.py test-data/ID1.fasta test-data/ID2.fasta test-data/ID3.fasta test-data/part1.fasta test-data/part2.fasta test-data/part3.fasta test-data/part4.fasta test-data/sample1.fasta test-data/sample2.fasta |
removed:
splitFasta.py test-data/ID1_result1.fasta test-data/ID2_result1.fasta test-data/ID3_result1.fasta test-data/test.fasta tool_dependencies.xml |
b |
diff -r ae4d5733272f -r 733ca84b21ee splitFasta.py --- a/splitFasta.py Fri Oct 16 16:13:34 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,13 +0,0 @@ -#!/usr/bin/env python -import os -import sys -from Bio import SeqIO - -if __name__ == "__main__": - inpath = sys.argv[1] - os.mkdir('splits') - with open(inpath, 'r') as handle: - for record in SeqIO.parse(handle, 'fasta'): - header = os.path.join('splits', record.id + '.fasta') - with open(header, 'w') as handle2: - SeqIO.write([record], handle2, 'fasta') |
b |
diff -r ae4d5733272f -r 733ca84b21ee splitFasta.xml --- a/splitFasta.xml Fri Oct 16 16:13:34 2015 -0400 +++ b/splitFasta.xml Mon Sep 21 15:40:14 2020 +0000 |
[ |
@@ -1,35 +1,59 @@ -<tool id="rbc_splitfasta" name="Split Fasta" version="0.2.0"> +<tool id="rbc_splitfasta" name="Split Fasta" version="0.4.0"> <description>files into a collection</description> <requirements> - <requirement type="package" version="1.65">biopython</requirement> + <requirement type="package" version="1.76">biopython</requirement> </requirements> - <stdio> - <exit_code range="1:" /> - </stdio> - <command interpreter="python"> + <command detect_errors="aggressive"> <![CDATA[ - splitFasta.py $inputFile + #if $splitmode.splitmode_select == "each": + python $__tool_directory__/split_fasta.py '$inputFile' + #else if $splitmode.splitmode_select == "chunks": + python $__tool_directory__/split_fasta.py '$inputFile' $splitmode.num_chunks + #end if ]]></command> <inputs> <param name="inputFile" type="data" format="fasta" label="Fasta file to split"/> + <conditional name="splitmode"> + <param name="splitmode_select" type="select" label="Split mode"> + <option value="each">Each sequence in its own dataset</option> + <option value="chunks">Split into a number of chunks</option> + </param> + <when value="chunks"> + <param name="num_chunks" type="integer" value="10" label="Number of chunks to split into" /> + </when> + <when value="each"/> + </conditional> </inputs> <outputs> - <collection name="splitted_fasta" type="list" label="Sequence collection in FASTA format"> + <collection name="splitted_fasta" type="list" label="${tool.name} on ${on_string}"> <discover_datasets pattern="(?P<designation>.*)" directory="splits" ext="fasta" visible="false"/> </collection> </outputs> <tests> <test> - <param name="inputFile" value="test.fasta" /> - <output_collection name="splitted_fasta"> - <element name="ID1.fasta" file="ID1_result1.fasta" ftype="fasta" /> - <element name="ID2.fasta" file="ID2_result1.fasta" ftype="fasta" /> - <element name="ID3.fasta" file="ID3_result1.fasta" ftype="fasta" /> + <param name="inputFile" value="sample1.fasta" /> + <param name="splitmode|splitmode_select" value="each" /> + <output_collection name="splitted_fasta" count="3"> + <element name="ID1" file="ID1.fasta" ftype="fasta" /> + <element name="ID2" file="ID2.fasta" ftype="fasta" /> + <element name="ID3" file="ID3.fasta" ftype="fasta" /> + </output_collection> + </test> + <test> + <param name="inputFile" value="sample2.fasta" /> + <param name="splitmode|splitmode_select" value="chunks" /> + <param name="num_chunks" value="4" /> + <output_collection name="splitted_fasta" count="4"> + <element name="part1" file="part1.fasta" ftype="fasta" /> + <element name="part2" file="part2.fasta" ftype="fasta" /> + <element name="part3" file="part3.fasta" ftype="fasta" /> + <element name="part4" file="part4.fasta" ftype="fasta" /> </output_collection> </test> </tests> <help><![CDATA[ - Takes an input file and writes each consecutive two lines to a separate file, in a dataset collection. + Takes an input FASTA file and writes entries (i.e. sequences) to separate datasets, which are organized in a dataset collection. + There are two modes: 1) each sequence is written to its own data set which is named by the ID of the sequence or 2) The file is split into a given number of chunks which are numbered. ]]></help> <citations> <citation type="bibtex"> |
b |
diff -r ae4d5733272f -r 733ca84b21ee split_fasta.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/split_fasta.py Mon Sep 21 15:40:14 2020 +0000 |
[ |
@@ -0,0 +1,50 @@ +#!/usr/bin/env python + +import os +import sys +from Bio import SeqIO + +num_chunks = 0 +if len(sys.argv) == 3: + num_chunks = int(sys.argv[2]) + input_filename = sys.argv[1] +elif len(sys.argv) == 2: + input_filename = sys.argv[1] +else: + exit("Usage: split_fasta.py <input_filename> [<num_chunks>]") + +os.mkdir('splits') + +if num_chunks != 0: + # if splitting into chunks we need to count how many records are in the + # input file + record_count = 0 + with open(input_filename) as input_file: + for line in input_file: + if line.lstrip().startswith('>'): + record_count += 1 + + records_per_chunk = round(float(record_count) / num_chunks) + +count = 1 +with open(input_filename) as input_file: + + chunk_record_count = 0 # how many lines have we written to the output file + records = [] + for record in SeqIO.parse(input_file, 'fasta'): + records.append(record) + if num_chunks == 0 or (count < num_chunks and + len(records) >= records_per_chunk): + if num_chunks == 0: + output_filename = os.path.join('splits', record.id) + else: + output_filename = os.path.join('splits', 'part{}'.format(count)) + SeqIO.write(records, output_filename, 'fasta') + count += 1 + records = [] + + if records: + # this only applies for the mode where input file is + # split into chunks + output_filename = os.path.join('splits', 'part{}'.format(count)) + SeqIO.write(records, output_filename, 'fasta') |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID1.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ID1.fasta Mon Sep 21 15:40:14 2020 +0000 |
b |
@@ -0,0 +1,3 @@ +>ID1 +MSNSEASSTCPIPSRSIHEKSWSPLPDSYSQTPGGTVFSTTPGGTRIIYDRKFLLECRNS +PIARTPPCCLPDIPGVTRPSLQIIEQEEDSKDLSIDDSQFVIDI |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID1_result1.fasta --- a/test-data/ID1_result1.fasta Fri Oct 16 16:13:34 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->ID1 desc -GATACA |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID2.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ID2.fasta Mon Sep 21 15:40:14 2020 +0000 |
b |
@@ -0,0 +1,3 @@ +>ID2 +MADVVLGVGTGVFIITLIWILTLALTIILSRATGPTKLGIIPVVLLALIITLVLVFFPRA +AEVPAPQRAAQIVDMFFIGRYVLLSLVSLVFLAALFMLLPLHFLEPIYAKPLRTH |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID2_result1.fasta --- a/test-data/ID2_result1.fasta Fri Oct 16 16:13:34 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ ->ID2 desc -GATACAGATACAGATACAGATACAGATACA |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID3.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ID3.fasta Mon Sep 21 15:40:14 2020 +0000 |
b |
@@ -0,0 +1,5 @@ +>ID3 +MAVDNFLFGQCILYFLAFLFGFIAVVPLSENGDDFQGKCLLFTEGIWQNENMTMGKQRFI +VEEWGPESSCRFITFVGIVSLILSAVQAWRTFFFLCKGHDDSLFHSFLNLLLSLLVLFVV +FVAGTISSVGFSIWCDSVTENGAMPSSCEDLQDTDLELGVENSSFYDQFAIAQFGLWSAW +LCWLGLTVLAFLKVYHNHRQQELLESLVQEKELLLGHPLQRSSYVYNRNAMI |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID3_result1.fasta --- a/test-data/ID3_result1.fasta Fri Oct 16 16:13:34 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,3 +0,0 @@ ->ID3 desc -GATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACA -GATACA |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/part1.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/part1.fasta Mon Sep 21 15:40:14 2020 +0000 |
[ |
b'@@ -0,0 +1,182 @@\n+>NP_001007355.1 gi|55925472|ref|NP_001007355.1| eukaryotic translation initiation factor 4E-binding protein 3 [Danio rerio]\n+MSNSEASSTCPIPSRSIHEKSWSPLPDSYSQTPGGTVFSTTPGGTRIIYDRKFLLECRNS\n+PIARTPPCCLPDIPGVTRPSLQIIEQEEDSKDLSIDDSQFVIDI\n+>NP_956692.1 gi|41055339|ref|NP_956692.1| transmembrane protein 218 [Danio rerio]\n+MADVVLGVGTGVFIITLIWILTLALTIILSRATGPTKLGIIPVVLLALIITLVLVFFPRA\n+AEVPAPQRAAQIVDMFFIGRYVLLSLVSLVFLAALFMLLPLHFLEPIYAKPLRTH\n+>NP_001003767.1 gi|57524633|ref|NP_001003767.1| transmembrane protein 179 [Danio rerio]\n+MAVDNFLFGQCILYFLAFLFGFIAVVPLSENGDDFQGKCLLFTEGIWQNENMTMGKQRFI\n+VEEWGPESSCRFITFVGIVSLILSAVQAWRTFFFLCKGHDDSLFHSFLNLLLSLLVLFVV\n+FVAGTISSVGFSIWCDSVTENGAMPSSCEDLQDTDLELGVENSSFYDQFAIAQFGLWSAW\n+LCWLGLTVLAFLKVYHNHRQQELLESLVQEKELLLGHPLQRSSYVYNRNAMI\n+>NP_001002700.1 gi|50540464|ref|NP_001002700.1| fatty-acid amide hydrolase 2-A [Danio rerio]\n+MALTRFERFLGRLLRAVVWILFAAFKLFAPQQRHGVSRLPPITNPLLLLSAMQLARKIRR\n+KEVTSVEVVQAYIDRIQEVNPLINAMVKDRFSAALQEAAQVDKLIEEETGGEDVLEDRLP\n+LLGVPITVKEAFALQGMPNSTGLLTRRDLVSGADAPSVALLKRAGAIPLGVTNCSELCMW\n+LESHNHLYGITNNPYDFERIVGGSSGGEGSILGAGSSVIGIGSDIGGSIRIPCFFNGIFG\n+HKPSVGIVNNEGQYPPASGQQMGFLCTGPMCRYAEDLIPMLSIMGGPNAEKLSLFTEVDL\n+KKLRFFSVPHNGGSHLVSPVEPQLLHAQKMVVKRLEADLGVKVQELLIPQLKYSFQIWGT\n+MMASPGKDGKPPTTFAELMSEGGKKVWPAWELFKWFLGFSSHTLAAIGLALVELFQSSHP\n+SPFIMQQKESLQQELEELLGTDGVLLYPSHPLIAQKHHHPIFTPFNFSYTGIFNILGLPV\n+TQCPLGLSAEGLPLGVQIVAGKLQDRLSLATALYLEKAFGGWREPGKTTIKP\n+>NP_001003555.1 gi|57525887|ref|NP_001003555.1| centromere protein P [Danio rerio]\n+MEQKYEEDIQKLQQEIEMLEAEQEETLRSIFVQHGDRLQQGVKSACEERGGGGAQQHTLS\n+KLITEVRELEKDLRRQTEINGITLNECFVKTLHKSERKLIQQLRLAGHCGLLLFQVEFAV\n+TEIQEDNVLHRRVTELNIVVDGVEFKDFSAFVSRVEDTKDLLLFFRTLRTFSERCEDRRQ\n+TFQHFQEKYPDVVNLPEGCRSEIMIIRSPQLPGISMTLFWKIHVSKEGVVKPLLDLLLKM\n+PDQALELDTKKVMEKASDYFQSLLQLLGVEASIEGLIRTVCS\n+>NP_997599.1 gi|47058959|ref|NP_997599.1| protein dispatched homolog 2 [Danio rerio]\n+MESGSISRQREDAEMPDSSTTEGPSLEAPQSEIPEVSLCPPDSDSTESQMCPVEIEENQT\n+KSSSPFNSHSSTQLERQVSQGSAYHSPPHKKCPCCGHQQPSQSDVCPGQMNALHQADCAA\n+SPVKTLYSCSPSRLPSCHTKMQCHWLHGSHDGSNHKPVQHHMVTVRNDGLHRIPRSYSQV\n+IVEYPMTVLISCTLVLFACSLAGILTGPLPDFSDPLLGFEPRGTDISVRLATWTRLKQNT\n+GPGKPLSPVPWQLTEKTTTGKDTIKSEPQFRERSRRMLHRDNAEHNFFCNAPGERYAQLV\n+FRSGNSASLWSLKAIYSMCQMEQTQIRSGPQFDKLCQVKSEFYGSMVKNECCPSWSLGNY\n+LAVLNNISSCFSLTSQQVSESLGLLRFCAPYYHDGSLIASCTERSKFGRCASVPHRCKLS\n+SIFQILHYLVDKDFLGPQTVEYKVPSLKYSIVFLPVEKSDSLMNIYLDHLEGHKLTYNNT\n+TITGMDLGIKQKLFKYYLARDSIYPVLAALALLITIGLYLKSLFIAAMSLVAVILSLSTS\n+YFFYKVAFRLTFFPLLNLAAVFVLLGSCLNQALTFVDFWKLQLSHNPPAVPEKRMNRVLQ\n+EMGYLIIVSGLTSSVTFYSGYISSITAVRCYAVYLGSASLINTLFALVWLPCTLILQERY\n+AVLSSNTVGKVAWKPCCSKNAGGFWETSSRKRCLFTFRQKLRTLGRGFSDTSNLLFLKIL\n+PCGVVKFRYIWICWFAVLAAGGTYISCVDPGMKLPTSDSRTTQLFRSSHPFERYDAEYRH\n+QFMFERMKDGEDEPMMLTLIWGIVPSDNGDHFDPKSNGSLSVDPGFNMSSLQAQIWLRDL\n+CGKIQNQTFYSPLSAEQDTAEDNVCFVEHLIHWVSIRRCSESEDAFSFCCNNIPFPYPPR\n+VFEQCLSMMVAEQHAEGRLPSAGGLRFDSEGRIAALVVIFKTVQLYSFNYNRMSQFYQEI\n+LSWFNREISKAPAGLQRGWFVSQLGLYDLQQCLSSETLEVAGFSVALTFALLLLTTWNIP\n+LSVYVSIAVAGSVFATVGLLVLLEWQLNGVEALFISAAAGLSVDFVANYCISYSLAPHSD\n+RLGRVAHSIKRMGCPVATGAGAYFCVGIIMLPATALLFRKLGIFLLLVKCVACGFATFFF\n+QSLCCFFGPQNNCGRITLPCVTQQSTENILSSCSATEPGTNNPAANGAFGCGKGSRVRRS\n+FNKENEGFLCPNQQHHRKRQPVGGREPEQNELQPLACQLSDSFENSTCTSKLSNRPSVLS\n+DDIQFCGLSPKQDYDRVSIEADSTEMCSRHLKGCNPPPALQTSSPYKENMLRLPQDACKE\n+KVLCKKCRGQSRGGLQLWNVSLSSSSSMDEIMITQTTDTVNERSLSMDDHIHKRLLSCQS\n+QSSIEGLEESNDTCLTEVEAAIPQAGKIEDEFQPGHLNGKRDTLRLSLKETVYDLASPGS\n+GRVRTAQSDVPVILPNSKPDMPDVWIKREGKGEGGS\n+>NP_001013313.1 gi|61651744|ref|NP_001013313.1| coiled-coil domain-containing protein 115 [Danio rerio]\n+MRVDENLRLDEQLLLFMEQLEALEEKRQRLNSLIEEGWFSIAKARYSMGNKQVSALQYAS\n+EMQPLAHVETSLLEGGTAEFKCERSENKAEEQKTKTIEDIGAKETGLRRRVHTKQKEVKE\n+GEQDTDEVKTKTDSPTPEHRNPLKWFGILVPQNLKQAQSAFKEVITLSVEIASLQSTILA\n+TRKEMQVQMKEKQERTEKAQLEVKEE\n+>NP_991238.1 gi|45387769|ref|NP_991238.1| pituitary homeobox 3 [Danio rerio]\n+MDFNLLTDSEARSPALSLSDSGTPQHDPGCKGQDNSDTEKSHQNHTDESNPEDGSLKKKQ\n+RRQRTHFTSQQLQELEATFQRNRYPDMSTREEIAVWTNLTEARVRVWFKNRRAKWRKRER\n+NQQAELCKNGFGAQFNGLMQPYDDMYSGYSYNNWATKSLASSPLSAKSFPFFNSMNVSPL\n+S'..b'I\n+>XP_006779747.1 gi|583968576|ref|XP_006779747.1| PREDICTED: corticosteroid 11-beta-dehydrogenase isozyme 2-like [Neolamprologus brichardi]\n+MEDYTLPFWIYLVIVTVFIGGAMKKILASHLNTTSTVVAWLGATVLVERLWAFCLPAMLL\n+LVLFGITFCIYYATKTSQPRAMLPAHGKAVIITGCDSGFGNATAKHLDSLGFEVFATVLD\n+LNGDGAKELQRTCSHRLTLLQVDITQPQQVQQALLDTKAKLGLKGLWALVNNAGVCVNFG\n+EVELSLMSNYRGCMEVNFFGTLSITKAFLPLLRQTKGRIVTISSPAGDQPFPCLAAYGAS\n+KAALNLITETLRHELEPWGVQVSTILPSSYRTAQSTNSAYWEKQHKHLLQNLSPALLEDY\n+GEEYMTETKDLFQTFAKHTTTNLQPVVDTIVQALLAPQPQPRYFAGAGLSLMYFLYAYFP\n+YSMSNNFLKKKFLKKNVIPRALRKQSAFDLNLSLHNNNNEEKLQQM\n+>XP_006779748.1 gi|583968578|ref|XP_006779748.1| PREDICTED: transient receptor potential cation channel subfamily M member 1-like [Neolamprologus brichardi]\n+MYIRVSFDSKPDSLLHLMVKDWQLELPTLLISVHGGLQNFDLPPKLKQVFGKGLIKAAVT\n+TGAWIFTGGVSTGVIRHVGDALKDHSSKSRGKVCAIGIAPWGIVENKEDLIGRDVTRPYQ\n+TMSNPLSKLSVLNSSHSHYILADNGTCGKYGAEVRLRRQLEKHISLQKINTRLGQGVPVV\n+CLIVEGGPNVISITLESLKEEPPVPVVVCDGSGRASDILSFAHRYCEEDG\n+>XP_006779749.1 gi|583968580|ref|XP_006779749.1| PREDICTED: chymotrypsin B-like [Neolamprologus brichardi]\n+MAFLWIVSCLAFVGAAYGCGTPAIPPRVTGYARIVNGEEAVPHSWPWQVSLQQTNGFHFC\n+GGSLISEQWVVTAAHCNVRTYHNVIVGEHNKGYGSTENIQVLKPAKVFTHPSWNPQTINN\n+DITLIKLASPARLGTNVSPVCLADTTDSFAAGMKCVTTGWGLTRYNAPSTPNNLQQAALP\n+LLSNEECKKHWGSNISDVMICAGGAGATSCMGDSGGPLVCQKDNVWTLVGIVSWGSSRCS\n+TSTPAVYARVTKLRGWVDQILASN\n+>XP_006779750.1 gi|583968582|ref|XP_006779750.1| PREDICTED: agouti-related protein-like [Neolamprologus brichardi]\n+MFGTVLLCCWSFGLLPLASSLVHGNLPLDEGPVAGRRTETFLSEIERSQVPDRMHEPALL\n+PVDSVEDHFLMDTGSYDEDTSAALQLQGRAMRSPRRCIPHQQSCLGYPLPCCDPCDTCYC\n+RFFNAICYCRRVGHVCPPRRT\n+>XP_006779751.1 gi|583968584|ref|XP_006779751.1| PREDICTED: EMILIN-1-like [Neolamprologus brichardi]\n+MAALPLLLLLVLWTCGNAKGAFPLRQSYNLYTNGHAHGARAASRHRNWCAFVVTKTVSCV\n+VEDGVETYVKPDYHPCSWGSGQCSRVVVYRTYMRPRYKVAYKMVTEMDWKCCHGYSGADC\n+NIGPVGGGGTQISTTRPQPGQGGGTTSGQGGGGHSYGGGSSGSGQSGGNADNEKMRQLEE\n+KIRSLTKNLQDLQSTMSTMNERLQEEGGRNGFGERSSGGRNPADAAQPEIKETIHSIQTK\n+LDQLDNRTQAHDKTLVSINNHLVNGKGNELEGGASGGSLSEGRLNSLKEEILSKLERRVS\n+LSCSSCQAGVEDLRKQQQQDRERIRALEKQMNAMDVQYRQSLDGLRRDVVRSQGCCDIIS\n+DLQDRVTDAERKISTASENFDILQNRLDREISGQGGTSENTGSRGQGLPVGGETGGHGRD\n+AMITEEHLNNRLKDLERRVNSTMQKTEESCSYLENHVKDYFHRELDELRSVFLERFDDQA\n+DRITDVELDVEQVKDSISDHDKRLSKLENTTSQMSWRLEKCGCVASEQGGGGEGRGRGDG\n+GYGGGSWGAGGGGSTGEGKDGGNRGDGGGTWGAGGGGGGSTGGGGRWGGTGGGLPGTGGE\n+KDNSTKKSLEWRVVANEDQIRHFNTQLKDLSMSGDSLYDKVLDLTDDVGKIKALTGDHGE\n+HFNRIVTVVEMLGEDCELCGKVEKELQKMRNYSQNALSNIQNHINRIQNRMDSEGDSCFQ\n+MCSVLQSEVSVLRDDVRRCTNQCKSNPDMTTGVDHARPGGTDDNSGPLDPAKPLDGHSVI\n+EGINNNHLKTLQGELSNVILTFSSINDTLKGLEHTVQKHDSVITDLGNTKDKIISEIDKV\n+QQELTEHIEDNRNRLDKMDRDIRRFESTVLEMGDCKRSGDGLEKRLSKLEGVCGRLDGVS\n+DSILKIKEGLNKHVSSLWTCVSGLNDTVIRHGGLLDFIQDGQDDIHSRVKNLNSSLNQVS\n+RDLQSFSEHDLTGPPGPQGPQGHPGERGFNGPPGLPGPPGFPGPRGEIGPHGPKGETGLP\n+GADAQIPKLSFSAALTAPMDRAGTIVFDKVFVNEGNFYNPRTGIFTAPVDGNYYFSAVLT\n+GHRNEKIEAVLSKSNYGMARVDSGGYQPEGLENNPVAEAKVNPGSLAVFSIILPLQTQDT\n+VCIDLVMGKLAHSVEPLTVFNGMLLYENK\n+>XP_006779752.1 gi|583968586|ref|XP_006779752.1| PREDICTED: zinc finger protein 507-like isoform X1 [Neolamprologus brichardi]\n+MEEITNVITHSSAASSSSSTSGSHTRQTKEKQPSQGFQQKTADDSLIQVIKKLSKIVEKR\n+PQRRCASGGQKRALQVGERGAEQGGGSICKKIKRNLKDEVGVERSTDDSSLPSPWSGDDN\n+NNVTTAVAEVAANPNSSDLKRTVTCYQCSLCPHLSQTLPLLKEHLKQHNEQHSDLILMCS\n+ECHFTSRDHEQLEAHVRMHFDNGDNQKRKYPVSEAKEEVLKNQDVDLTGDNCSAGTEVKK\n+SSVSNAKELPQKKKWYSYEEYGLYRCLICSYVCSQQRMLKTHAWKHAGLVDCSYPIFEDE\n+DGGSAKREVQAAPNNASAREEIVVLQDKSLQKLPTGFKLQLCMPVAVEDKQEVVNLQGSH\n+LSESPKTEEEDEYPIKDMTSEEPAVEVQVTTEAETEVELGGHHESTSATDSLLSSAQKII\n+NRSPNSAGHINVIVERLPSAEDSVMASNPLLLSPDVDGDKSLLEKKAEEQEHVEGVKDEV\n+VLCYSPGNANKSQHLGADIKPSIAKSNDLPRDENVPPAGRKRTHSESLRLHSLAAEVLVA\n+MPMRTPELPNSGAKVALKTVAAQAQSPQAGQKPTEGAAAGQKASDVGTAAAMLNCNEGRE\n+ETLGSLGLGKGDDDGPAANGGISLSLLTVIERLRERSDQNTSDEDILKELQDNAQFQSGA\n+GVVAANGAGSYVCSSVPGMDGLVGSPDSGLVDYIPGSDRPYRCRLCRYSSGNKGYIKQHL\n+RVHRQREPYQCPICEHIASDSKDLENHMIHHCKSRMYQCKQCPDAFHYKSQLRNHEREHH\n+SFSGDVEMLTPVAETAAAMEETERVTYEEGSPQKMFKCDVCNYTSSTYVGVRNHRRIHNS\n+DKPYRCCSCDFATTNMNSLKSHMRRHPQEHQAVQLLEQYRCSLCGYVCSHPPSLKSHMWK\n+HAGDQNYNYEQVNKAINEAISQSSR\n' |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/part2.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/part2.fasta Mon Sep 21 15:40:14 2020 +0000 |
[ |
b'@@ -0,0 +1,235 @@\n+>XP_006779753.1 gi|583968588|ref|XP_006779753.1| PREDICTED: zinc finger protein 507-like isoform X2 [Neolamprologus brichardi]\n+MEEITNVITHSSAASSSSSTSGSHTRQTKEKQPSQGFQQKTADDSLIQVIKKLSKIVEKR\n+PQRRCASGGQKRALQVGERGAEQGGGSICKKIKRNLKDEVGVERSTDDSSLPSPWSGDDN\n+NNVTTAVAEVAANPNSSDLKRTVTCYQCSLCPHLSQTLPLLKEHLKQHNEQHSDLILMCS\n+ECHFTSRDHEQLEAHVRMHFDNGDNQKRKYPVSEAKEEVLKNQDVDLTGDNCSAGTEVKK\n+SSVSNAKELPQKKKWYSYEEYGLYRCLICSYVCSQQRMLKTHAWKHAGLVDCSYPIFEDE\n+DGGSAKREVQAAPNNASAREEIVVLQDKSLQKLPTGFKLQLCMPVAVEDKQEVVNLQGSH\n+LSESPKTEEEDEYPIKDMTSEEPAVEVQVTTEAETEVELGGHHESTSATDSLLSSAQKII\n+NRSPNSAGHINVIVERLPSAEDSVMASNPLLLSPDVDGDKSLLEKKAEEQEHVEGVKDEV\n+VLCYSPGNANKSQHLGADIKPSIAKSNDLPRDENVPPAGRKRTHSESLRLHSLAAEVLVA\n+MPMRTPELPNSGAKVALKTVAAQAQSPQAGQKPTEGAAAGQKASDVGTAAAMLNCNEGRE\n+ETLGSLGLGKGDDDGPAANGGISLSLLTVIERLRERSDQNTSDEDILKELQDNAQFQSGA\n+GVVAANGAGSYVCSSVPGMDGLVGSPDSGLVDYIPGSDRPYRCRLCRYSSGNKGYIKQHL\n+RVHRQREPYQCPICEHIASDSKDLENHMIHHCKSRMYQCKQCPDAFHYKSQLRNHEREHH\n+SFSGDVEMLTPVAETAAAMEETERVTYEEGSPQKMFKCDVCNYTSSTYVGVRNHRRIHNS\n+DKPYR\n+>XP_006779754.1 gi|583968590|ref|XP_006779754.1| PREDICTED: probable C-mannosyltransferase DPY19L3-like isoform X1 [Neolamprologus brichardi]\n+MTTLRQRKGSKGKEPSPAAELQSQQHNCCSEHHPEKILHGDWSWGAIIWTSVGWSVSVGL\n+GLLCCIYVATLHENDLWFSNIKEVEREISFRTECGLYYSYYKQMLHAPSIQEGLKEMIHD\n+NLTESKRTINLLQRMNIYQEVFLSVLYRLLPIQSYLEPVYFYIYTVFSLQAVYVIALYLT\n+AWLLSGSWLAGALTGVWYILNRVDTTRVEFTISLRENWSLPFFALQVTAITCYLRPQLTT\n+LQQKVMVWLMYVTTFCFCLTWQFNQFILLVQALVIYTLDCGDFLTTTQVTTLYLVQVSSL\n+LSVWFLQFCNSMILGSLVLSFIVAALFIRHCQPGVKTGSLVVRLGKVLLHSALVLLLTVT\n+INYLAKKALQLQSDEHIFKFIKSKFALGSTRDFDASLYLCEEAFGLLPLDTLERLAGTLL\n+LYPYVLTLLLLCGMLVAAALQNLSRPNRGSTEEKKGAREGQVAAFRPDVAYNVLHTLFYG\n+LLAFSTMRMKYIWTGHMCAVAAYGVCGTELWTVLLSALRCNTKLLLRLVRYVAPVVMIGF\n+LYYKFWPKLMEELSELREFYDPDTVELMTWISTKTPKQAVFAGSMQLLAGIKLCTGRVLT\n+NHPHYEDKDLRERTRQVYQVYARRSPEEVYDILKAIGADYVVLENSICYERRHRRGCRLR\n+DLLDLANGHIMDGPGENDPDLVPATHPRFCDAIKTDAAYNALFTRTFQNKTFHVYRLKKK\n+RKKNTKGSSEPSVTQ\n+>XP_006779755.1 gi|583968592|ref|XP_006779755.1| PREDICTED: probable C-mannosyltransferase DPY19L3-like isoform X2 [Neolamprologus brichardi]\n+MTTLRQRKGSKGKEPSPAAELQSQQHNCCSEHHPEKILHGDWSWGAIIWTSVGWSVSVGL\n+GLLCCIYVATLHENDLWFSNIKEVEREISFRTECGLYYSYYKQMLHAPSIQEGLKEMIHD\n+NLTESKRTINLLQRMNIYQEVFLSVLYRLLPIQSYLEPVYFYIYTVFSLQAVYVIALYLT\n+AWLLSGSWLAGALTGVWYILNRVDTTRVEFTISLRENWSLPFFALQVTAITCYLRPQLTT\n+LQQKVMVWLMYVTTFCFCLTWQFNQFILLVQALVIYTLDCGDFLTTTQVTTLYLVQVSSL\n+LSVWFLQFCNSMILGSLVLSFIVAALFIRHCQPGVKTGSLVVRLGKVLLHSALVLLLTVT\n+INYLAKKALQLQSDEHIFKFIKSKFALGSTRDFDASLYLCEEAFGLLPLDTLERLAGTLL\n+LYPYVLTLLLLCGMLVAAALQNLRPNRGSTEEKKGAREGQVAAFRPDVAYNVLHTLFYGL\n+LAFSTMRMKYIWTGHMCAVAAYGVCGTELWTVLLSALRCNTKLLLRLVRYVAPVVMIGFL\n+YYKFWPKLMEELSELREFYDPDTVELMTWISTKTPKQAVFAGSMQLLAGIKLCTGRVLTN\n+HPHYEDKDLRERTRQVYQVYARRSPEEVYDILKAIGADYVVLENSICYERRHRRGCRLRD\n+LLDLANGHIMDGPGENDPDLVPATHPRFCDAIKTDAAYNALFTRTFQNKTFHVYRLKKKR\n+KKNTKGSSEPSVTQ\n+>XP_006779756.1 gi|583968594|ref|XP_006779756.1| PREDICTED: probable C-mannosyltransferase DPY19L3-like isoform X3 [Neolamprologus brichardi]\n+MCRGLKEMIHDNLTESKRTINLLQRMNIYQEVFLSVLYRLLPIQSYLEPVYFYIYTVFSL\n+QAVYVIALYLTAWLLSGSWLAGALTGVWYILNRVDTTRVEFTISLRENWSLPFFALQVTA\n+ITCYLRPQLTTLQQKVMVWLMYVTTFCFCLTWQFNQFILLVQALVIYTLDCGDFLTTTQV\n+TTLYLVQVSSLLSVWFLQFCNSMILGSLVLSFIVAALFIRHCQPGVKTGSLVVRLGKVLL\n+HSALVLLLTVTINYLAKKALQLQSDEHIFKFIKSKFALGSTRDFDASLYLCEEAFGLLPL\n+DTLERLAGTLLLYPYVLTLLLLCGMLVAAALQNLSRPNRGSTEEKKGAREGQVAAFRPDV\n+AYNVLHTLFYGLLAFSTMRMKYIWTGHMCAVAAYGVCGTELWTVLLSALRCNTKLLLRLV\n+RYVAPVVMIGFLYYKFWPKLMEELSELREFYDPDTVELMTWISTKTPKQAVFAGSMQLLA\n+GIKLCTGRVLTNHPHYEDKDLRERTRQVYQVYARRSPEEVYDILKAIGADYVVLENSICY\n+ERRHRRGCRLRDLLDLANGHIMDGPGENDPDLVPATHPRFCDAIKTDAAYNALFTRTFQN\n+KTFHVYRLKKKRKKNTKGSSEPSVTQ\n+>XP_006779757.1 gi|583968598|ref|XP_006779757.1| PREDICTED: MTSS1-like protein-like isoform X1 [Neolamprologus brichardi]\n+MLGEITHLQAIIDDLTVLTTDPHKLPPASEQVIKDLKGSDYSWSYQTPPSSPSSSGSRKS\n+SMCSSVNSTHSSASRSSGGGGSGGVGGGGSLPHSPTSSSSSSCRYRSSLPHQPPPPGGIA\n+AHRLSSVSSHDSGFVSQDANIYSKPPSPMPSDITSQKSSSSASSEASETCQSVSECSSPT\n+TFGSSFATFRPALFHSGSTRPLSVILPVPASPPYIRPPGSSSSSPTSKVPMWKDWSKAGQ\n+YEQPVAAA'..b'DRYGEQGLREGGGGGPGMDDIFSHIFGGGLFGFMGGQSSRSRNGGRRRGEDMVH\n+PLKVSLEDLYNGKTTKLQLSKNVLCSTCNGQGGKTGAVQKCTACRGRGMRIMIRQLAPGM\n+VQQMQSVCTDCNGEGEVISEKDRCKKCEGKKVVKEVKILEVHVDKGMKHGQKITFGGEAD\n+QAPGVEPGDIVLVLQEKEHETYRRDGNDLFMNHKIGLVEALCGFQFMLKHLDGRQIVVKY\n+PAGKVIEPGSVRMVRGEGMPQYRNPFEKGDLYIKFDVQFPDNNWISPEKLGELEDMLPSR\n+SEPPIISGDTEEVDLQDYDVSQSSSSGNRREAYNDSSDEEGSHHGSGVQCAHQ\n+>XP_006779770.1 gi|583968632|ref|XP_006779770.1| PREDICTED: low-density lipoprotein receptor-related protein 3-like [Neolamprologus brichardi]\n+MGLTELPLLLPLLGLLWLRCALLCAGCSEQVEIHTERRGVIYSPSWPLNYPAGVNCSWHI\n+QGGQGEVITISFRNFDLAESGKCTGDWLLLTPTWKRESRLCGSVLPQPFISTRGRVWLFF\n+HSQANSSGQAQGFRLSYIRGHLGQSSCQSDEFLCGNGKCLPRSWKCNGQDECGDASDERS\n+CLPTPTEAQPGLCPFGSLPCTEGQSTRCLPTALRCNGARDCHDGSDELGCPDTTCGKRLG\n+NFYGSFASPDFFRANRSGDTELRCSWLLDTQDPKPIVLQLDLQLGPGDLLHVYDGLLQRA\n+EHLLQVFSYHNNRRPALLESSRGQMSVLYMAQPHSPGHGFNATYQVKGYCFPGERPCGSD\n+QGCYSERQRCDGYWHCPSGRDEEGCPMCPDGEFPCEGGTGMCYPASERCNNQKRCPDGSD\n+EKNCYDCQPGNFHCGTNLCIFETWRCDGQEDCLDGSDERDCLAAVPRKVITAALIGSLVC\n+SLLLVIALGCALKLHSLRNREYRAFETQMTRMEADFVQREAPPSYGQLIAQGLIPPVEDF\n+PVYNPTQASVLQNLRLAMRRQIRRHSTRRSTSSSSRRRLGHLWNRLFRSGGRGRGHAPLL\n+DPPGPTQITLGLHSYRTVGEQGPQSRAVPAGGSDVVGVDLPESPASPLSFHSVDSPEEEE\n+DLSPVSRDGSRAAESSPPTPCQSDSSVQSGLPLSPQEASVPLCPPRASRKLVLELAVNLK\n+GVSLRRYSPLGPLSPISPPVFPSSSQTPSTQPQPQGSEVTSPTEPLFSSVKPEDSDSQFT\n+VNVPSRDETKPEARSSLCRFGRSISEEGGDLGRETLC\n+>XP_006779771.1 gi|583968634|ref|XP_006779771.1| PREDICTED: LOW QUALITY PROTEIN: rhophilin-2-like [Neolamprologus brichardi]\n+MTDALLSNGINDGGGDKNYFKKGCNPFAQTGRSKLQNTRASLNQQIIKQMRMRAGAENLL\n+KATSNSKVKEMVLLELSYVNSNLQLLMSELEGLNSSVEVYQNNQSSTQRILVPVFLNETT\n+VEFSILKIXSDFILEHYSEDGKTFEDEIADFMDLRQACRTPSRSEAGVELLGKYYSHLPL\n+IESRFFSPTRQTGIFFTWYTAFLGLKYQQNHICLIXFCFLFFFLLLFSVKSLMIXITSTN\n+CSFLALIRFQLVPTALSCPGVLNNLKETFTHTPSYDMSPAMLSMLIRLMLAQAQECLFEK\n+IALPGIRNQFYSLMKVAQEAAKVSEIYDQVHQCMIQTPVKDNVPFFWSTMSQIKTNHYRS\n+MAHYFVASALLDHQLGPGDDEDKQEKTLSQVYDSLPEGCTALDILKKKDERQRIGKAHIR\n+RAIFGHEEALRIYGLCKNTNNLEVLQEILKASHQRSVNKHSENENEEEFADYMEAPKIIS\n+KTEHKAEMEFPAAAKVKVIDFFQRLGPQSVFSAKQRWTAPRTIRVRSDDRDLGFTLKGDS\n+PVQVVSLDPLCAAAADGLKEGDYIITVGDTECKWMSVSDVMRLLKDVDEEGIDIQVVSMM\n+DNSTAMPTKSATFCGNLPKTYSMICLAYNEDDKNSKVRKVAKKSSFLSWGLKNKMKSAST\n+LSLPTADKAGALPWNKPCPTFPSSSSYNNDSGLY\n+>XP_006779772.1 gi|583968636|ref|XP_006779772.1| PREDICTED: E3 ubiquitin-protein ligase RNF182-like [Neolamprologus brichardi]\n+MKDSAAETSGVEEGESHTLGQEHDLKMSCPQTEFEEKESPPPEELECKICYQRYNVHHRK\n+PKILDCLHRVCARCLIKILDIADSAGCISCPFCRHQTEITEQEISALPDDVNIVSHLVMR\n+DKSWNSDQNREVVLTPKSFSSSSPSHDSSNCLVITIMEVQRDSQHSPSQNGSSDVYAEQS\n+LDSVSIGSNGPADQDALSKFCNHVPRILVWLLGFLYFGSLPLGIYLLVIQRVTLGIVCVS\n+LVPSSLTVCLVYGFCQCLCQGMCDCSSRG\n+>XP_006779773.1 gi|583968638|ref|XP_006779773.1| PREDICTED: centrosomal protein of 89 kDa-like [Neolamprologus brichardi]\n+MLRFSFRREKDKEFKHIAHGLIPAASIAPKPAVPRTPPPRSPNPSPERPRSALAAAILSS\n+SLTGQTWAIPPARLMSLSESGQSESFTSEPNISTALYTRDRWSEDLVSRPRLSSPDQSEG\n+ELEDKEQEVVDEEDGEEHVYHTLDRRQNSSLTESVYALPLKAKSVFKSTTPLPTQTSGRR\n+ESSPDFTEETSGQSPEPKEKKMSVRKTLENWKDDVPTTPTISTAGHPRQASQAKSPKDLR\n+ELPPEPSNTYSELRKKVVRDRREKNTRMVDKEKLQEERLQRLEREISDSKAFSNQRSSAG\n+SQAELQNLRQHAQELVDENDALKLTVHRLNVELSHYQARFRPLSKEEHSKVSGLPNTGSP\n+PPWLVDMKYSSPLLLAYEDRMNEKDAILQTTEENMEKLHVQLEEVIKENEKLHDEITKTG\n+AVNQKDCYQIQQQAVLVLQENQVLINQLEAQHAEAKDTHSRHNTEVAKVSKKMMLLEVEN\n+QRLEGDLEESRRELQKNKRDLQVLQARLKDAVTWDEHCSIAGKLRRQLEQHESRSKDGID\n+KLLLRVSNLQEENRILALDKAQLTAKTRAMEAELELSRQASRKAERRMSMLKQQKAECVL\n+KEEKTRHYLGAVISVAEHISQERDRLLHMASSLQQEKQRFISRILSGTVRFGKLQEEVKV\n+YRSQASTRLAALEEAVEGRTVSYQTEILHLQTLLRERQEAEEKLLQSKREIEEELEVVWE\n+AATRENQQMRETLLDSKLTGDLHSWPAHAPDEITTSSQQQQHKHGLDFYC\n+>XP_006779774.1 gi|583968640|ref|XP_006779774.1| PREDICTED: myocyte-specific enhancer factor 2A-like [Neolamprologus brichardi]\n+MGRKKIQITRIVDERNRQVTFMKRKFGLMKKAYELSVLCDCEIALIIFNGSNKLFQYAST\n+DMDKVLLKYTEYNEPHESRTNSDIVEALNKKEHRGCDSPDADASYVLTPNTEEKYKKINE\n+EFDNMMKTHKISTGQQQQQHQQHFMHVAPGSMAYSHSGGGGATSQALAAATAALADGGIL\n+PSPHSHLHRNINSSQRPPSAGGGLQGSSELALQNGSGPTVNGFGKIIPSKSPPPPPPHGN\n+SMVPTSRKTDLRVVIPHSKGMMQTLNNQRMSSSQSSQPLSTPVVSITTPSLPHQSLVYAG\n+IGSAYNDYSLNSGELSGFNSAAGPSLSSMAAWEQQQLSSMG\n' |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/part3.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/part3.fasta Mon Sep 21 15:40:14 2020 +0000 |
[ |
b'@@ -0,0 +1,343 @@\n+>XP_006779775.1 gi|583968642|ref|XP_006779775.1| PREDICTED: COUP transcription factor 2-like [Neolamprologus brichardi]\n+MAMVAWRNTEAVGDSQGTLSSPVSQVAPLSLPGELTGHMNPAPSLEIPQTAAAPQGAPPP\n+NPSGNTVATTTNNNNSTSSSSSSSSLSMDKQQSQQIECIVCGDKSSGKHYGQFTCEGCKS\n+FFKRSVRRNLTYTCRANRNCPIDQHHRNQCQYCRLKKCLKVGMRREAVQRGRIPTQSYHG\n+QFALTNGDPLQCHSYLSGYISLLLRAEPYPTSRFGSQCLQNNNILGIENICELAARMLFS\n+AVEWARNIPFFPDLQVPDQVALLRLTWSELFVLNAAQCSMPVHAAPLLAAAGLHASPMSA\n+DRVVAFMDHIRVFQEQVEKLKVLHVDSAEYSCIKAIVLFTTDACGLSDVAHVEGLQEKSQ\n+CALEEYVRSQYPNQPNRFGKLLLRLPSLRSVSSSVIEQLFFVRLVGKTPIETLIRDMLLS\n+GSSFNWPYMAIQ\n+>XP_006779776.1 gi|583968644|ref|XP_006779776.1| PREDICTED: solute carrier organic anion transporter family member 3A1-like [Neolamprologus brichardi]\n+MQVKNQICTERSSNDDPEQDDNQKKTSCFSNIKIFLVSECALMLAQGTVGAYLVSVLTTL\n+ERRFNLQSADVGVIASSFEIGNLALILFVSYFGAKAHRPRLIGCGGIVMALGALLSALPE\n+FLTHQYEYEAGDSWHAEDGRDVCSNISRSENRDSGFKCGNRANTNMMYLLLIGAQVLLGI\n+GATPVQPLGVSYIDDHVHRKDSSLYIGILFSTLVFGPACGFILGSVCTKVYVDAVFIDTS\n+TLDITPDDPRWIGAWWGGFLLCGALLFLSALFMFGFPQALDEQDMDSGAESEQAMLPSSL\n+SLEFQGSKPNGAIHGFDINSGLSVCQHLRVIPRVTRHLLSNPVFSCITLAACMEIAVVAG\n+FAAFLGKYLEQQFNLTTSSANQLLGMTAIPCACLGIFLGGLLVKKLNLSALGAVRMAMLV\n+NLVSTACYVSFLFLGCDTGPVAGVTVAYGNETLQSWQQPESACISNCNCYTASVSPVCGS\n+NGVTYLSACFAGCTKPNLTNCACISSNSEEAVALPGKCPSPGCQQAFLTFLCVICVCSMI\n+GAMAQTPSVIILIRTVSPELKSYALGVLFLLLRLIGFIPPPLIFGMGIDSTCLFWSSVCG\n+EKGACMLYDNVAYRHLYVSIAIVLKSSAFLLYTTTWQCLRKNYRKYIKNNEGYLTPTELF\n+ASNVTLDNLGKDITQNPTNRTKFIYNLEDRETCDNMESVL\n+>XP_006779777.1 gi|583968646|ref|XP_006779777.1| PREDICTED: gonadotropin-releasing hormone II receptor-like isoform X1 [Neolamprologus brichardi]\n+MNGSSCCDPAAVMYQQRSGLDLNASCEWPDPHCNWTSVDGALQLPTFSTAAKIRVIVTFI\n+LCGISTFCNLAVLWAANGHKRKSHVRVLIINLTAADLLVTFIVMPVDAVWNITVQWLAGD\n+LACRFLMFLKLQAMYSCAFVTVVISLDRQSAILNPLGIAMVRKRNRVMLMVAWIMSALLS\n+IPQMFIFHNVTITYPANFTQCTTRGSFVTHWQETAYNMFTFCCLFLLPLVIMIICYTRIF\n+VQISKQMTKKNMPSNEPHLRCSKNNIPKARMRTLKMSIVIVICFIVCWTPYYLLGLWYWF\n+FPDDLEGKVSHSLTHILFIFGLFNACLDPIIYGLFTIRFQKGLRNCYRKAAVMSSLETNA\n+VIMESLKCTGSVLPSKRGMTSGEKDISSEQAEAKSTDNSV\n+>XP_006779778.1 gi|583968648|ref|XP_006779778.1| PREDICTED: gonadotropin-releasing hormone II receptor-like isoform X2 [Neolamprologus brichardi]\n+MNGSSCCDPAAVMYQQRSGLDLNASCEWPDPHCNWTSVDGALQLPTFSTAAKIRVIVTFI\n+LCGISTFCNLAVLWAANGHKHAVWNITVQWLAGDLACRFLMFLKLQAMYSCAFVTVVISL\n+DRQSAILNPLGIAMVRKRNRVMLMVAWIMSALLSIPQMFIFHNVTITYPANFTQCTTRGS\n+FVTHWQETAYNMFTFCCLFLLPLVIMIICYTRIFVQISKQMTKKNMPSNEPHLRCSKNNI\n+PKARMRTLKMSIVIVICFIVCWTPYYLLGLWYWFFPDDLEGKVSHSLTHILFIFGLFNAC\n+LDPIIYGLFTIRFQKGLRNCYRKAAVMSSLETNAVIMESLKCTGSVLPSKRGMTSGEKDI\n+SSEQAEAKSTDNSV\n+>XP_006779779.1 gi|583968650|ref|XP_006779779.1| PREDICTED: lactoylglutathione lyase-like [Neolamprologus brichardi]\n+MSDKGLSDEAVAAVCKDGDPITKDFMMQQTMLRVKDPNKSLDFYTRILGMTLLQKFDFPS\n+MRFSLFFLGYEDKKEIPADVKEKTAWTFSRRATLELTHNWGSESDDSQSYHNGNSDPRGF\n+GHIGIAVPDVYAACKLFEEQGVTFVKKPDDGKMKGLAFIQDPDGYWIEILSPNNMVSITS\n+K\n+>XP_006779780.1 gi|583968652|ref|XP_006779780.1| PREDICTED: bifunctional glutamate/proline--tRNA ligase-like isoform X1 [Neolamprologus brichardi]\n+MALNLTINTSNPPLGALLTAEHVKSSVQVSVEEGKDTRLHISDSVQFSDDNSICRYLARV\n+APALGLYGSNMMEQTEVDHWLEFSARSLCNQPDLTVALAELDKALSLRTFLVGHALTLAD\n+LSVWAALKDHGEWPKQGKSFSHVSRWFFFLSSQVPFTAVGNKYASKKASMNKTKSEGKKA\n+DVGKFVELPGAEMGKVVVRFPPEASGYLHIGHAKAALLNQHYQVTFKGKLIMRFDDTNPE\n+KEKEDFEKVILEDVAMLQIHPDQFTYTSDHFPIIMKFAEKLLAEGKAYIDNTPPEQMKQE\n+REQRVESTCRNNSVEQNMKMWSEMKAGTEYGQTCCMRAKIDMNSNNGCMRDPTLYRCKNA\n+AHPRTGNTYNIYPTYDFACPIVDSLEGVTHALRTTEYHDRDEQFYWIINALGLRKPYVWE\n+YARLNLNNTVLSKRKLTWFVDQGYVDGWDDPRFPTVRGVLRRGMTVEGLKQFIAAQGGSR\n+SVVNMEWDKIWSFNKKLPVSCLKVIDPVAPRYTALSSSYVVPVSVPEATEEMKEIAKHPK\n+NAEVGMKEVWFGPRVLIEGADAETFTEGETVTFINWGNLIITKINKGADGKVLSMKASLN\n+LDNKDYKKTTKITWLAETNNSLPVPAICVNYQPLISKAVITKDDNFKDYINKHSKLEEKM\n+LGDPCLKNLKKGDIIQLQRRGFYICDQPYEPLSPNSCKESPCVLIYIPDGHTKEMPTAGS\n+KEKSKTQASDNTPASPAKAPKTSVPAPASAPAADLFSSIVAQGEAVRLLKAAKAPKDEVD\n+KAVKQLLSLKEQFKQQTGVEYKPGMAPPASTPAPPTSSSDSTSCPYTRVVQQGELVRKLK\n+AEQAPKDQIDAAVKQLLALKAEFKKLTGQDYKPGMATPAPSASSPVTATSSSSPPSSSSG\n+LYEHVAQQGEVVRKLKSEKAPKDQVDAAVKQLLALKEEYKRITGQEYKPGATPP'..b'DEDTFGVSIAVG\n+LAGFACVLLLVLFVLINKYGRRSKFGMKGPVAVISGEEDSASPLHHVNHGIITPCTLDAS\n+PDAVVIGMTRIPVVENPQYFRHGHNCNKPATLVQHIKRRDIILKRELGEGAFGKVFLAEC\n+YNLSPTKDKMLVAVKTLKDPNLSARKDFQREAELLTNLQHDHIVKFYGVCVDGDPLIMVF\n+EYMKHGDLNKFLRAHGPDAMILVDGQPLQSNGELGLSQMLHIATQIASGMVYLASQHFVH\n+RDLATRNCLVGNGLLVKIGDFGMSRDIYSSDYYRVGGHTMLPIRWMPPESIMYRKFSTES\n+DVWSFGVILWEIFTYGKQPWFQLGNNEVIECITQGRVLERPRICPKEVYDIMLGCWQREP\n+QQRLNIKDIQKVLFAMGKATPVYLDILG\n+>XP_006779794.1 gi|583968681|ref|XP_006779794.1| PREDICTED: synaptic vesicle glycoprotein 2B-like [Neolamprologus brichardi]\n+MDDPYRNNVNQQMTEGGDYTYTQDGGGQDGYPYQTDYPPQDEDAASDATEGADEDDQMYE\n+GEYQGIPHPDEIKEARRAARVEARRKARMAAQQEEEEENLPEQYETIMEDCGHGRFQWML\n+FFVLGLALMADGVDGFVVGFVLPSAEKDMCISNADKGLLGLLVYVAMMVGALVWGGLCDK\n+MGRRKCLIYVLTIDLVFSFLSCFAQGYGFFLFLRFCSGFGIGGSIPIVYTYFTEFLQMDK\n+RGEHLSWLCMFWMFGGLYASFTAWGIIPHYGWGFAIGTHIQMHSWRLFILVCLFPALAAL\n+IGLVFMPESPRFLLENARHDEAWMILRQVHDTNWKAKGEPERVFTVTNIKTPQTQDDEFI\n+EIQSETGTAFQRWTVRKMTMLQQVMANIMSLSAPELRLQGLLLVIVWFCLAFSYHGLGVW\n+FPDMIKYMQYEEYESKVRIFHRERVERFHFNFSLVNQIHREGEYIHDKFANIEIKSVKFE\n+SSLFENCYFEDVKSTNTFFENCTIKNTVFYNTDLWQDKFKNCRMENATFLHPKKGCHLNF\n+QEENDIVIYMVSFLGSLAVLPGNIISALFMDKIGRIRIIGGSMLASSACTFLLLLSFSQG\n+AVICWQCLFYGVSVAAWNGLEVISVELYPSSKRGTAFGILNGICKFAAIIASSIFAAFIG\n+ITKIIPIFLAFAALVCGGMVALKLPETREKILS\n+>XP_006779795.1 gi|583968683|ref|XP_006779795.1| PREDICTED: AP-3 complex subunit sigma-2-like [Neolamprologus brichardi]\n+MIKAILIFNNHGKPRLIRFYQYFAEDMQQQIIRETFHLVSKRDDNVCNFLEGGSLIGGSD\n+YKLIYRHYATLYFVFCVDSSESELGILDLIQVFVETLDKCFENVCELDLIFHMDKVHYIL\n+QEVVMGGMVLETNMNEIVAQVEVQNRMEKSEGGLSAAPARAVSAVKNMNLPEIPRNINIG\n+DINIKVPSLSPF\n+>XP_006779796.1 gi|583968685|ref|XP_006779796.1| PREDICTED: synaptosomal-associated protein 25-B-like isoform X1 [Neolamprologus brichardi]\n+MADESDMRNELADLQTRADQIADESLESTRRMLALVEESKDAGIRTLVMLDEQGEQLERI\n+EEGMDQINKDMKDAEKNLNNLGQFCGLCSCPCNKIKGGGQAWGGNQDGVVNSQPGARVVD\n+EREQMAISGGFIRRVTNDARENEMDENLEQVGGIIGNLRHMALDMGQEIDTQNRQIDRIM\n+DKADSNKTRIDEANQRATKMLGSG\n+>XP_006779797.1 gi|583968687|ref|XP_006779797.1| PREDICTED: synaptosomal-associated protein 25-B-like isoform X2 [Neolamprologus brichardi]\n+MADESDMRNELADLQTRADQIADESLESTRRMLALVEESKDAGIRTLVMLDEQGEQLDRV\n+EEGMNKVNADLKEAEKDLKDIGQCCGLICPCIKKIKGGGQAWGGNQDGVVNSQPGARVVD\n+EREQMAISGGFIRRVTNDARENEMDENLEQVGGIIGNLRHMALDMGQEIDTQNRQIDRIM\n+DKADSNKTRIDEANQRATKMLGSG\n+>XP_006779798.1 gi|583968689|ref|XP_006779798.1| PREDICTED: protein FAM219B-like [Neolamprologus brichardi]\n+MMNDILEEPEKDSLLEAQQDSQGLSGPSSGTRPKSIDGGIRPVEKRGPYIMSRAPAIHLK\n+LQKHREMARKALKKKALSPGPPVTHQPRQGAKRMVKYNKGYAALSQHAEDTLVAIDSDSD\n+EEIDFEQYSSGYSSAEIHPDLSKQLLQDGYRLDEIPDDEDLDLIPPKAMGSSVCCCSEGP\n+SCPIQ\n+>XP_006779799.1 gi|583968691|ref|XP_006779799.1| PREDICTED: semaphorin-7A-like [Neolamprologus brichardi]\n+MFLEIWKMRFSLVACLFFLHICCLAVGNDRSPRMIFTEKEAAMNRLDLLHGPPVRILLEE\n+KPDTVLAVGKTYLNTYNIKNQNKNQTRMQLENCNRNCSYDITLAHLMEDAKKLFVCGTIH\n+DETVCCNSNLTEQPPICKDIKDISSFNIKEGDLSALAESKQSTDLYITRSGSDESVGIHK\n+FGKARVGPKNHHKEQHYVGLVLSKREEDPSQNRVYGFYREKTKDDGLFSEMWLPFVTQVC\n+MTDVGGPKNNLQYTWTSQMNARLFCGDQERKQHFSELVDVSTVDADRWQYTKIYALFRNE\n+WGMSAVCVYTIEDISKIFENSPFNGYTKKQMDRPRMCAPDSSKLSVDTLKNIDKTSEMEQ\n+LVHPVGNPGLLFFNHRNYTHIQVDSKPNSRGGLEWLQFLTVNNGGIHKVLQNESHTFVIA\n+EYQPFKQKAHVLSIILQSTFKKLYVNNGSQLVQLDVADCSQYGDTCQDCMLSRDPYCGWN\n+GTQCIRETEGSWHDAATGNLSICNEHNASNYKGDPVPVPRYSKYFLQCPVSSRHAQYSWQ\n+HDENSTACSSGKEQCLYLIDNMDSECKGTYKCISQEMGYSKVLVQYELQVENDAKTQPYK\n+RLWPNKAEGRKTSPVIWVCLMMALIKSLSF\n+>XP_006779800.1 gi|583968694|ref|XP_006779800.1| PREDICTED: cytochrome P450 1A1-like [Neolamprologus brichardi]\n+MALMILPFIGALSVSHVLVAVTTACLVYMIIKNAQNKIPEGLQQLPGPKPFPIIGNVLEL\n+GSRPYLSLTSMSKRYGDVFQIQIGMRPVVVLSGNETVRQALIKQGDEFAGRPDLYSFRYI\n+NDGKSLSFSTDQAGIWRARRKLAYSALRSFSNLDSTTPEYSCALEEHISKETEYLIKELN\n+TVMKTKGSFDPFRYVVVSVANVICGMCFGRRYDHHDDELVSLVNLSDDFVKVVGSGNPAD\n+FIPLLQYLPSTKMKKFMSLNARFSKFVQKLVTEHYATFDKDNIRDITDSLIDHCEDRKLD\n+ENANIQMSDEKIVGIVNDLFGAGFDTISTALSWSLMYFVAYPEIQNRLFEEIKEKVGLDR\n+MPVFSDRNNLPLLEAYILELFRHSSYLPFTIPHCTTKDTSLNGYFIPKDTCVFINQWQIN\n+HDPEMWEDPFSFKPERFLNADGTEVNKVEGEKVMTFGLGKRRCIGEVIARNEVFLFLAIL\n+IQKLNFQALPGDQLDLTPEYGLTMKHKRYHLRATMRVRNEQ\n' |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/part4.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/part4.fasta Mon Sep 21 15:40:14 2020 +0000 |
[ |
b'@@ -0,0 +1,245 @@\n+>XP_006779801.1 gi|583968696|ref|XP_006779801.1| PREDICTED: enhancer of mRNA-decapping protein 3-like [Neolamprologus brichardi]\n+MATDWVGSVVSIDCGATLGVYQGEVSSVDRVSQTISLKHPYHNGVKCPVPEVTFSAMDIK\n+DLKFLDIQNKVNKTSAGKDTATEPSYISTGRHGQTNKTNHSLAISNSSGLSSNPRKGSSN\n+SRGTTQSTPRRSNVRNGGAGGQRSKNDECFGDGTDENLDTDFDFEGNLALFDKAAVFSQI\n+DGASSNSNKLQHHNTQAEQKTQSYRHDENILEVKPVTYRQITVPQHGGKEYCTDTGLVVP\n+TVPYELHKQLLAAAERWGLSLERRLEAVGVCSSQMALTLLGGPNRLTPKNVHQRPTVVLL\n+CGPHVQGAQGISCGRHLANHEVEVILFLPNFVKMQESVTSEVNLFSKTSGKQVSSVKDLP\n+MSPVDLVINCLDCHENPLLKEQSWYQSVADWANKNRAPVLSIDPPVSEQPQSVDAKWTLS\n+LGLPLPLADKDSRVYLCDIGLPKMVYQEVGINYHSPFGCKFVIPLHSV\n+>XP_006779802.1 gi|583968698|ref|XP_006779802.1| PREDICTED: tyrosine-protein kinase CSK-like isoform X1 [Neolamprologus brichardi]\n+MSGIHVPWSTGTECVAKYNFQTANEQDLPFCKGDVLTIIGVTRDPNWYRARNQVGREGTI\n+PANYVQKREGVKSGGKLSLMPWFHGKITREQAERLLYPPETGLFLVRESTNYPGDYTLCV\n+SCDGKVEHYRIIYHNGKLTIDEEEYFENLMQLVEHYTKDADGLCTRLIKPKLMEGTVAAQ\n+DEFSRSGWALNRKELKLLQTIGKGEFGDVMVGDYRGTKVAVKCIKNDATAQAFIAEASVM\n+TQLRHNNLVQLLGVIVEERGSLYIVTEYMAKGSLVDYLRSRGRTVLGGDCLLKFSLDVCE\n+AMEYLEANNFVHRDLAARNVLVSDDNIAKVSDFGLTKEASSIQDTAKLPVKWTSPEALRE\n+KRFSTKSDVWSYGILLWEIYSFGRVPYPRIPLKEVVPRVEKGYKMDAPDGCPAVVYDLMK\n+QCWTLDPVMRPSFRMLREKLQHIRAKELYL\n+>XP_006779803.1 gi|583968700|ref|XP_006779803.1| PREDICTED: tyrosine-protein kinase CSK-like isoform X2 [Neolamprologus brichardi]\n+MSGIHVPWSTGTECVAKYNFQTANEQDLPFCKGDVLTIIGVTRDPNWYRARNQVGREGTI\n+PANYVQKREGVKSGGKLSLMPWFHGKITREQAERLLYPPETGLFLVRESTNYPGDYTLCV\n+SCDGKVEHYRIIYHNGKLTIDEEEYFENLMQLVEHYTKDADGLCTRLIKPKLMEGTVAAQ\n+DEFSRSGWALNRKELKLLQTIGKGEFGDVMVGDYRGTKVAVKCIKNDATAQAFIAEASVM\n+TQLRHNNLVQLLGVIVEERGSLYIVTEYMAKGSLVDYLRSRGRTVLGGDCLLKFSLDVCE\n+AMEYLEANNFVHRDLAARNVLVSDDNIAKVSDFGLTKEASSIQDTAKLPVKWTSPEALRE\n+KRFSTKSDVWSYGILLWEIYSFGRVPYPRIPLKEVVPRVEKGYKMDAPDGCPAVVYDLMK\n+QCWTLDPVMRPSFRMLREKLQHIRAKELYL\n+>XP_006779804.1 gi|583968702|ref|XP_006779804.1| PREDICTED: complexin-3-like [Neolamprologus brichardi]\n+MAFMVKHVVGGQLKNLTGGLTEEKSEGEKSDAAAQGMTQEEFEQYQQQLEEEKKEREAHY\n+AQKKAERATVRTHFREKYRLPKNEMDETQIQQAGDDVVLPTELAKMIAEDNEEETHKQSV\n+LGQLSNIQNVDIDQLKDKAQATLEDLKKQTENCSLM\n+>XP_006779805.1 gi|583968704|ref|XP_006779805.1| PREDICTED: growth arrest-specific protein 1-like [Neolamprologus brichardi]\n+MKCWCSALALLPWVLVALDAQLICWQALLRCHDEPECELAYNQYMTACEGNIKGTRKQCP\n+SHCISALIRLNHTRSGPDLETCDCAQDLDCLDAKRAIEPCLPRRHPKDAGGIGCMEARQR\n+CEEDSNCHTSLTAYLSYCGQLFNGRKCSSKCKATIQQMLFIPNGMLLNRCICDGVERPFC\n+EVVKENMSKLCSIGDHSVVSDPTKDYEDPYEDDYSKNDKEVDFSENSSASQSLSRGVLPL\n+CLLTARILY\n+>XP_006779806.1 gi|583968706|ref|XP_006779806.1| PREDICTED: serine/threonine-protein kinase ULK3-like [Neolamprologus brichardi]\n+MASTSSFAPPKLSDFILTERLGSGTYATVYKAYRKGNSREVVAVKVVGKKTLNKASTENL\n+LTEIEILKTVRHPHIVQLKDFQWDAENIYLILEWCSGGDLSRFIRSRRILPESVTRRFLQ\n+QIACALQFLHERNISHLDLKPQNILLSGSILKLADFGFAQYMSPWDEQSVLRGSPLYMAP\n+EMVCRRQYDSRVDLWSVGVILYEALFGRAPFASKSYAELEEKIRSNQPIELPPGARVSKD\n+CRDLLLRLLERNPDARITFAEFFTHPFVDMEHMPSAESIVKAKKLVLQAIQKDQEGERSE\n+ALSLYCSALEHFVPAIYYETNCQRKEALRQKVRQYVSRAEELKALVASDNRLSFEQARTS\n+RDILREMSKDQPRLLAALEMASTAIAKEESGSDDLEALDMYQQCLGELLLGLAAEPQGRR\n+RELLHSEIKSLMSRAEYLKKHIKMQETQRDVSLDRESLAESVRSSCCLQ\n+>XP_006779807.1 gi|583968708|ref|XP_006779807.1| PREDICTED: TM2 domain-containing protein 3-like [Neolamprologus brichardi]\n+MATVCQIWRPDRGRCLKSYGIIAVLFMDLMLQCVNGSLSTTNVETHYTRDGPFITSPVVP\n+DASSVFPADEDTSKCPSGGLCHRLPAHCIQCDYHLKCTYGKPTLFTCRPKKGVHCIGESG\n+HQQTNFSLNITCQFCWQLDPSQYRCTNSTNCMTVSCPRKRYNATCDVLDHVHCLGKRRFP\n+KRLFCNWTGGYKWSTALALSITLGGFGADRFYLGQWREGLGKLFSFGGLGIWTLIDVLLI\n+GVGYVGPVDGSLYI\n+>XP_006779808.1 gi|583968710|ref|XP_006779808.1| PREDICTED: la-related protein 6-like [Neolamprologus brichardi]\n+MYALVNAFMRCLSFLLPPSWLYVSFCLWVGNECEETLQRPNPRARFKSREPLTYEEVKAA\n+AKAAAEAEAQGGSRPSVSPGPDCVSLAATSPAAPKGPSSGLIWIGGLWRAVERVFGAPWV\n+LLRHHLCPKRRRAALGAPYPVCAFELGKIKSFQRGAAAAAAAAKIVDVKGPGETTFTYSK\n+NMSGSVGVPSVNSTECASDASAEQGIDEVITVDQLSQEMGTVTITVAIQAAEDEEPEEVT\n+SNNADFLGGSCSEDEIGRHDKSSGAGTSGGELEEESWQPPDPELIQKLVTQIEYYLSDEN\n+LEHDAFLLKHVRRNKLGFVSVKLLTSFKKVKHLTRDWRTTAYALRHSKILELNDEGRKVR\n+RKSAVPVFASESLPSRMLLLSDLQRWPE'..b'NDQLIRCITEYMQKGRAVECVQYQQILHRNIVY\n+LATIADASPDSAASTSNCTSNDTSASAAAVNGHTEGS\n+>XP_006779820.1 gi|583968735|ref|XP_006779820.1| PREDICTED: uncharacterized protein KIAA0355-like [Neolamprologus brichardi]\n+MYCCSAQESKMDYKRRFLLGGSKQKVQQHQQYQMPELSRTLSASLASSCSASSPMGTGVG\n+MSGSCHPPPSGTSTAVADIQQGISKYLDALNVFCRASAFLTDLFSSVFRNSHYSKAAMQL\n+KDVQEHVMEAASRLTAAIKPEIAKMLMELSAGAANFKDQNDFSLQDVEVLGRCFLTVMQV\n+HFQFLSQALQKVQPVAQSCLAEALAQAQERCANARSQSSDLGPLTELEEASRSWKGAAEA\n+TARLRERGRDGCLAGIQVQQLFCSNNTTIPEHQLKELNMKIDSALQAYKAALESLGHSEY\n+ALKAGFHLNPKAVEAALQGCCSEAEAQQAGRMQTTSQPIQCELPTIPVQIGSHFLKGVSF\n+NESAAENLKLKTHTMLQLIKEALGQNGVTPRDDSPVTEVLNQVCPSSWRGACKTAVQLLF\n+AQAGLVVVDTAQIENKEAYAPQITLEGSKVVVQVPSTWCLKEDPATMSLLQRSLDPEKTL\n+GLVDVLYTAVFDINRWKERKEQALPTIQIQLQRESPDYGIPTDLPPGTSSKTSSGLPKTI\n+SKLTSKFTKKVSSSSNSGGSFSIPSTPSRSMLTTSNSEDKAKGLGHSDGRLQSILQMGSL\n+PCTSDSTQQNQLANGSVSEDQGMNLPTDQEMQDVIDFLSGFNMGKSQQASPLVKRRNSVA\n+SANPAELKPPSGPSQATSSISHSALQPPAQTLPQPQPQPQPSQPVQKQQPQPNPQPPPPQ\n+QQQPQQQQQPPPPPPQQPSPQAQHLYYQHLLQPITQQQAPPPQLPPQQTPPQVLPQQRVA\n+SKWLGTSGQQPPPQGPPAGLSPLGPIGQWASSGLPDLSSDLYSLGLVSTYMDSVVSEMLG\n+QKPQGPRNNTWPNRDQSEGVFGVLGDTLPFDPAVGSDPEFARYVAGVSQAMQQKRQVQHI\n+RRPSNTRSNWPMPDEQHRTWSHPEYFNEGDAVNSGWSANQGDSASSSDETSSANGDSLFS\n+MFSGPDLVAAVKQRRKHSCGEPEVCTLPSPPLHHIGDDSQDSKTKTWPPKAPWQHSTHTN\n+TMPNPSSSLYQMNIPPSSQWGDSMPMLQSPVWSTASDCPPSTGISSGFPFTQQQQQQQQQ\n+QHKPMTKGFKSFPVKHEHRPSYLHQY\n+>XP_006779821.1 gi|583968737|ref|XP_006779821.1| PREDICTED: glucose-6-phosphate isomerase-like [Neolamprologus brichardi]\n+MGLTQDPNFQKLQEWYTAHALGLNMRHMFEADKERFNKLSLTLKTEDGDILLDYSKNLIT\n+EDVMKMLVDLAKSRGIEAAREKMFTGEKINFTEGRAVLHVALRNRSNTPIMVDGKDVMPD\n+VNKVLEKMKGFCHKVRSGEWKGYTGKAITDVVNVGIGGSDLGPLMVTEALKPYSKDGPRV\n+WFVSNIDGTHIAKTLAQLNPETTLFIIASKTFTTQETITNAESAKAWFLEHAKDKAAVAK\n+HFVALSTNGPKVKDFGIDTENMFEFWDWVGGRFSLWSAIGMAIALHIGFDNFEKLLSGAH\n+WMDKHFRTAPLDKNAPILLALLGIWYINFFHAETQAMLPYDQYMHRFTAYFQQGDMESNG\n+KYITNHGTRVNYHTGPIVWGEPGTNGQHAFYQLIHQGTRMVPCDFLIPAQSQHPIRDNLH\n+HKVSLMLERYLSKXXALMKGKTTEEAKKELEASGLSGEALEKILPHKVRRIKRNDLIKDN\n+EPAALLMARNSNKLKPKLKRRAPCRVAFTKRDSPSKNSVNRC\n+>XP_006779822.1 gi|583968739|ref|XP_006779822.1| PREDICTED: Wilms tumor protein 1-interacting protein homolog [Neolamprologus brichardi]\n+MEHYQEDLGLRATKLMEDLSLYDAYQDGMYDARRDLVINPDLDFSAPALVEHKAKPMNGT\n+SVLHQQHHTVENFSSGNKVYNAAPVRPVNCNRTVPVDFCAPQRDAVYNEDGCCTKSEVAL\n+PCYTGTSERHRRYSLEVQGHRYSTGSTFDGVPLNKPVAVPGNRCNSVCIASSHDGRYNAT\n+SPRSSLASSLSSQEQSKHASPRSSISSPRTSLVVPGQERYTSPRSSLVHCEGNSVLSPRS\n+SYASTASDTSKHSSPRASLNSCDCCSKPNSNRTSGISMGYDQRHTSPRSSTASQYSFTTS\n+PRSSYSDSRYGPVVNQDLEGVLHSAPLASPRSSICSQDGSARPGASANCVVSPRSSISSH\n+SSRSSRSSRGSMSTYPDLQLPSPRSSMLGTSLHEDTLLQEFGDSNGVQNRIHLQGLSAVP\n+EPQQQSGQTGGTADIPSGSPSSYSYVMPSKTASSGQRFKLPYQVTPSRESGPSQAEKRLE\n+ALTLELEKELEMHMKKEYFGICVKCGKGVYGASQACQAMGNLYHTNCFTCCSCGRRLRGK\n+AFYNVNGKVYCEEDFLYSGFQQTAEKCFVCGHLIMEMILQALGKSYHPGCFRCVVCKEGL\n+DGVPFTVDVENNIYCVKDYHTVFAPKCASCNQPILPAQGSEETIRVVSMDKDYHVECYHC\n+EDCGLQLNDEERHRCYPLEGHLLCHDCHILRLQSQVPAHAPPSYPLHVTEL\n+>XP_006779823.1 gi|583968741|ref|XP_006779823.1| PREDICTED: short-chain dehydrogenase/reductase family 42E member 1-like isoform X1 [Neolamprologus brichardi]\n+MGTASKETFLITGGSGYFGNRLALSLLKKGAKVILFDIIPPSQELPEDVVFVQGDIREYP\n+DVEKAVTGVDCVFHIASYGMSGREQLNRQLIEAVNVQGTQNILKACVEHGVSRLIYTSTF\n+NVVFGGQVIENGDESLPYLPLHLHPDHYSRTKSLADMAVLKANGTVLKGCSGLLSTCALR\n+PAGIYGPGEQRHLPRIVDYIEKGIFRFVYGKPSSLVEFVHVDNLVSAHVLAAEALTPEKQ\n+HRAAGQAYFISDGRPVNNFEFFRPLVEGLGYRFPTLRLPISLIYFFAFLTEMIHCLIGPF\n+YNFQPLLTRTEVYKTGVTHYFSMAKAKAELGYEPREYNLDEVVQWFRSRGHGKKCHRSFL\n+SRLLLNVLFVSALVAVSLSFLPVVGS\n+>XP_006779824.1 gi|583968743|ref|XP_006779824.1| PREDICTED: short-chain dehydrogenase/reductase family 42E member 1-like isoform X2 [Neolamprologus brichardi]\n+MGTASKETFLITGGSGYFGNRLALSLLKKGAKVILFDIIPPSQELPEDVVFVQGDIREYP\n+DVEKAVTGVDCVFHIASYGMSGREQLNRQLIEAVNVQGTQNILKACVEHGVSRLIYTSTF\n+NVVFGGQVIENGDESLPYLPLHLHPDHYSRTKSLADMAVLKANGTVLKGCSGLLSTCALR\n+PAGIYGPGEQRHLPRIVDYIEKGIFRFVYGKPSSLVEFVHVDNLVSAHVLAAEALTPEKQ\n+HRAAGQAYFISDGRPVNNFEFFRPLVEGLGYRFPTLRLPISLIYFFAFLTEMIHCLIGPF\n+YNFQPLLTRTEVYKTGVTHYFSMAKAKAELGYEPREYNLDEVVQWFRSRGHGKKCHRSFL\n+SRLLLNVLFVSALVAVSLSFLPVVGS\n' |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/sample1.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample1.fasta Mon Sep 21 15:40:14 2020 +0000 |
b |
@@ -0,0 +1,11 @@ +>ID1 +MSNSEASSTCPIPSRSIHEKSWSPLPDSYSQTPGGTVFSTTPGGTRIIYDRKFLLECRNS +PIARTPPCCLPDIPGVTRPSLQIIEQEEDSKDLSIDDSQFVIDI +>ID2 +MADVVLGVGTGVFIITLIWILTLALTIILSRATGPTKLGIIPVVLLALIITLVLVFFPRA +AEVPAPQRAAQIVDMFFIGRYVLLSLVSLVFLAALFMLLPLHFLEPIYAKPLRTH +>ID3 +MAVDNFLFGQCILYFLAFLFGFIAVVPLSENGDDFQGKCLLFTEGIWQNENMTMGKQRFI +VEEWGPESSCRFITFVGIVSLILSAVQAWRTFFFLCKGHDDSLFHSFLNLLLSLLVLFVV +FVAGTISSVGFSIWCDSVTENGAMPSSCEDLQDTDLELGVENSSFYDQFAIAQFGLWSAW +LCWLGLTVLAFLKVYHNHRQQELLESLVQEKELLLGHPLQRSSYVYNRNAMI |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/sample2.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample2.fasta Mon Sep 21 15:40:14 2020 +0000 |
[ |
b'@@ -0,0 +1,1005 @@\n+>NP_001007355.1 gi|55925472|ref|NP_001007355.1| eukaryotic translation initiation factor 4E-binding protein 3 [Danio rerio]\n+MSNSEASSTCPIPSRSIHEKSWSPLPDSYSQTPGGTVFSTTPGGTRIIYDRKFLLECRNS\n+PIARTPPCCLPDIPGVTRPSLQIIEQEEDSKDLSIDDSQFVIDI\n+>NP_956692.1 gi|41055339|ref|NP_956692.1| transmembrane protein 218 [Danio rerio]\n+MADVVLGVGTGVFIITLIWILTLALTIILSRATGPTKLGIIPVVLLALIITLVLVFFPRA\n+AEVPAPQRAAQIVDMFFIGRYVLLSLVSLVFLAALFMLLPLHFLEPIYAKPLRTH\n+>NP_001003767.1 gi|57524633|ref|NP_001003767.1| transmembrane protein 179 [Danio rerio]\n+MAVDNFLFGQCILYFLAFLFGFIAVVPLSENGDDFQGKCLLFTEGIWQNENMTMGKQRFI\n+VEEWGPESSCRFITFVGIVSLILSAVQAWRTFFFLCKGHDDSLFHSFLNLLLSLLVLFVV\n+FVAGTISSVGFSIWCDSVTENGAMPSSCEDLQDTDLELGVENSSFYDQFAIAQFGLWSAW\n+LCWLGLTVLAFLKVYHNHRQQELLESLVQEKELLLGHPLQRSSYVYNRNAMI\n+>NP_001002700.1 gi|50540464|ref|NP_001002700.1| fatty-acid amide hydrolase 2-A [Danio rerio]\n+MALTRFERFLGRLLRAVVWILFAAFKLFAPQQRHGVSRLPPITNPLLLLSAMQLARKIRR\n+KEVTSVEVVQAYIDRIQEVNPLINAMVKDRFSAALQEAAQVDKLIEEETGGEDVLEDRLP\n+LLGVPITVKEAFALQGMPNSTGLLTRRDLVSGADAPSVALLKRAGAIPLGVTNCSELCMW\n+LESHNHLYGITNNPYDFERIVGGSSGGEGSILGAGSSVIGIGSDIGGSIRIPCFFNGIFG\n+HKPSVGIVNNEGQYPPASGQQMGFLCTGPMCRYAEDLIPMLSIMGGPNAEKLSLFTEVDL\n+KKLRFFSVPHNGGSHLVSPVEPQLLHAQKMVVKRLEADLGVKVQELLIPQLKYSFQIWGT\n+MMASPGKDGKPPTTFAELMSEGGKKVWPAWELFKWFLGFSSHTLAAIGLALVELFQSSHP\n+SPFIMQQKESLQQELEELLGTDGVLLYPSHPLIAQKHHHPIFTPFNFSYTGIFNILGLPV\n+TQCPLGLSAEGLPLGVQIVAGKLQDRLSLATALYLEKAFGGWREPGKTTIKP\n+>NP_001003555.1 gi|57525887|ref|NP_001003555.1| centromere protein P [Danio rerio]\n+MEQKYEEDIQKLQQEIEMLEAEQEETLRSIFVQHGDRLQQGVKSACEERGGGGAQQHTLS\n+KLITEVRELEKDLRRQTEINGITLNECFVKTLHKSERKLIQQLRLAGHCGLLLFQVEFAV\n+TEIQEDNVLHRRVTELNIVVDGVEFKDFSAFVSRVEDTKDLLLFFRTLRTFSERCEDRRQ\n+TFQHFQEKYPDVVNLPEGCRSEIMIIRSPQLPGISMTLFWKIHVSKEGVVKPLLDLLLKM\n+PDQALELDTKKVMEKASDYFQSLLQLLGVEASIEGLIRTVCS\n+>NP_997599.1 gi|47058959|ref|NP_997599.1| protein dispatched homolog 2 [Danio rerio]\n+MESGSISRQREDAEMPDSSTTEGPSLEAPQSEIPEVSLCPPDSDSTESQMCPVEIEENQT\n+KSSSPFNSHSSTQLERQVSQGSAYHSPPHKKCPCCGHQQPSQSDVCPGQMNALHQADCAA\n+SPVKTLYSCSPSRLPSCHTKMQCHWLHGSHDGSNHKPVQHHMVTVRNDGLHRIPRSYSQV\n+IVEYPMTVLISCTLVLFACSLAGILTGPLPDFSDPLLGFEPRGTDISVRLATWTRLKQNT\n+GPGKPLSPVPWQLTEKTTTGKDTIKSEPQFRERSRRMLHRDNAEHNFFCNAPGERYAQLV\n+FRSGNSASLWSLKAIYSMCQMEQTQIRSGPQFDKLCQVKSEFYGSMVKNECCPSWSLGNY\n+LAVLNNISSCFSLTSQQVSESLGLLRFCAPYYHDGSLIASCTERSKFGRCASVPHRCKLS\n+SIFQILHYLVDKDFLGPQTVEYKVPSLKYSIVFLPVEKSDSLMNIYLDHLEGHKLTYNNT\n+TITGMDLGIKQKLFKYYLARDSIYPVLAALALLITIGLYLKSLFIAAMSLVAVILSLSTS\n+YFFYKVAFRLTFFPLLNLAAVFVLLGSCLNQALTFVDFWKLQLSHNPPAVPEKRMNRVLQ\n+EMGYLIIVSGLTSSVTFYSGYISSITAVRCYAVYLGSASLINTLFALVWLPCTLILQERY\n+AVLSSNTVGKVAWKPCCSKNAGGFWETSSRKRCLFTFRQKLRTLGRGFSDTSNLLFLKIL\n+PCGVVKFRYIWICWFAVLAAGGTYISCVDPGMKLPTSDSRTTQLFRSSHPFERYDAEYRH\n+QFMFERMKDGEDEPMMLTLIWGIVPSDNGDHFDPKSNGSLSVDPGFNMSSLQAQIWLRDL\n+CGKIQNQTFYSPLSAEQDTAEDNVCFVEHLIHWVSIRRCSESEDAFSFCCNNIPFPYPPR\n+VFEQCLSMMVAEQHAEGRLPSAGGLRFDSEGRIAALVVIFKTVQLYSFNYNRMSQFYQEI\n+LSWFNREISKAPAGLQRGWFVSQLGLYDLQQCLSSETLEVAGFSVALTFALLLLTTWNIP\n+LSVYVSIAVAGSVFATVGLLVLLEWQLNGVEALFISAAAGLSVDFVANYCISYSLAPHSD\n+RLGRVAHSIKRMGCPVATGAGAYFCVGIIMLPATALLFRKLGIFLLLVKCVACGFATFFF\n+QSLCCFFGPQNNCGRITLPCVTQQSTENILSSCSATEPGTNNPAANGAFGCGKGSRVRRS\n+FNKENEGFLCPNQQHHRKRQPVGGREPEQNELQPLACQLSDSFENSTCTSKLSNRPSVLS\n+DDIQFCGLSPKQDYDRVSIEADSTEMCSRHLKGCNPPPALQTSSPYKENMLRLPQDACKE\n+KVLCKKCRGQSRGGLQLWNVSLSSSSSMDEIMITQTTDTVNERSLSMDDHIHKRLLSCQS\n+QSSIEGLEESNDTCLTEVEAAIPQAGKIEDEFQPGHLNGKRDTLRLSLKETVYDLASPGS\n+GRVRTAQSDVPVILPNSKPDMPDVWIKREGKGEGGS\n+>NP_001013313.1 gi|61651744|ref|NP_001013313.1| coiled-coil domain-containing protein 115 [Danio rerio]\n+MRVDENLRLDEQLLLFMEQLEALEEKRQRLNSLIEEGWFSIAKARYSMGNKQVSALQYAS\n+EMQPLAHVETSLLEGGTAEFKCERSENKAEEQKTKTIEDIGAKETGLRRRVHTKQKEVKE\n+GEQDTDEVKTKTDSPTPEHRNPLKWFGILVPQNLKQAQSAFKEVITLSVEIASLQSTILA\n+TRKEMQVQMKEKQERTEKAQLEVKEE\n+>NP_991238.1 gi|45387769|ref|NP_991238.1| pituitary homeobox 3 [Danio rerio]\n+MDFNLLTDSEARSPALSLSDSGTPQHDPGCKGQDNSDTEKSHQNHTDESNPEDGSLKKKQ\n+RRQRTHFTSQQLQELEATFQRNRYPDMSTREEIAVWTNLTEARVRVWFKNRRAKWRKRER\n+NQQAELCKNGFGAQFNGLMQPYDDMYSGYSYNNWATKSLASSPLSAKSFPFFNSMNVSPL\n+'..b'NDQLIRCITEYMQKGRAVECVQYQQILHRNIVY\n+LATIADASPDSAASTSNCTSNDTSASAAAVNGHTEGS\n+>XP_006779820.1 gi|583968735|ref|XP_006779820.1| PREDICTED: uncharacterized protein KIAA0355-like [Neolamprologus brichardi]\n+MYCCSAQESKMDYKRRFLLGGSKQKVQQHQQYQMPELSRTLSASLASSCSASSPMGTGVG\n+MSGSCHPPPSGTSTAVADIQQGISKYLDALNVFCRASAFLTDLFSSVFRNSHYSKAAMQL\n+KDVQEHVMEAASRLTAAIKPEIAKMLMELSAGAANFKDQNDFSLQDVEVLGRCFLTVMQV\n+HFQFLSQALQKVQPVAQSCLAEALAQAQERCANARSQSSDLGPLTELEEASRSWKGAAEA\n+TARLRERGRDGCLAGIQVQQLFCSNNTTIPEHQLKELNMKIDSALQAYKAALESLGHSEY\n+ALKAGFHLNPKAVEAALQGCCSEAEAQQAGRMQTTSQPIQCELPTIPVQIGSHFLKGVSF\n+NESAAENLKLKTHTMLQLIKEALGQNGVTPRDDSPVTEVLNQVCPSSWRGACKTAVQLLF\n+AQAGLVVVDTAQIENKEAYAPQITLEGSKVVVQVPSTWCLKEDPATMSLLQRSLDPEKTL\n+GLVDVLYTAVFDINRWKERKEQALPTIQIQLQRESPDYGIPTDLPPGTSSKTSSGLPKTI\n+SKLTSKFTKKVSSSSNSGGSFSIPSTPSRSMLTTSNSEDKAKGLGHSDGRLQSILQMGSL\n+PCTSDSTQQNQLANGSVSEDQGMNLPTDQEMQDVIDFLSGFNMGKSQQASPLVKRRNSVA\n+SANPAELKPPSGPSQATSSISHSALQPPAQTLPQPQPQPQPSQPVQKQQPQPNPQPPPPQ\n+QQQPQQQQQPPPPPPQQPSPQAQHLYYQHLLQPITQQQAPPPQLPPQQTPPQVLPQQRVA\n+SKWLGTSGQQPPPQGPPAGLSPLGPIGQWASSGLPDLSSDLYSLGLVSTYMDSVVSEMLG\n+QKPQGPRNNTWPNRDQSEGVFGVLGDTLPFDPAVGSDPEFARYVAGVSQAMQQKRQVQHI\n+RRPSNTRSNWPMPDEQHRTWSHPEYFNEGDAVNSGWSANQGDSASSSDETSSANGDSLFS\n+MFSGPDLVAAVKQRRKHSCGEPEVCTLPSPPLHHIGDDSQDSKTKTWPPKAPWQHSTHTN\n+TMPNPSSSLYQMNIPPSSQWGDSMPMLQSPVWSTASDCPPSTGISSGFPFTQQQQQQQQQ\n+QHKPMTKGFKSFPVKHEHRPSYLHQY\n+>XP_006779821.1 gi|583968737|ref|XP_006779821.1| PREDICTED: glucose-6-phosphate isomerase-like [Neolamprologus brichardi]\n+MGLTQDPNFQKLQEWYTAHALGLNMRHMFEADKERFNKLSLTLKTEDGDILLDYSKNLIT\n+EDVMKMLVDLAKSRGIEAAREKMFTGEKINFTEGRAVLHVALRNRSNTPIMVDGKDVMPD\n+VNKVLEKMKGFCHKVRSGEWKGYTGKAITDVVNVGIGGSDLGPLMVTEALKPYSKDGPRV\n+WFVSNIDGTHIAKTLAQLNPETTLFIIASKTFTTQETITNAESAKAWFLEHAKDKAAVAK\n+HFVALSTNGPKVKDFGIDTENMFEFWDWVGGRFSLWSAIGMAIALHIGFDNFEKLLSGAH\n+WMDKHFRTAPLDKNAPILLALLGIWYINFFHAETQAMLPYDQYMHRFTAYFQQGDMESNG\n+KYITNHGTRVNYHTGPIVWGEPGTNGQHAFYQLIHQGTRMVPCDFLIPAQSQHPIRDNLH\n+HKVSLMLERYLSKXXALMKGKTTEEAKKELEASGLSGEALEKILPHKVRRIKRNDLIKDN\n+EPAALLMARNSNKLKPKLKRRAPCRVAFTKRDSPSKNSVNRC\n+>XP_006779822.1 gi|583968739|ref|XP_006779822.1| PREDICTED: Wilms tumor protein 1-interacting protein homolog [Neolamprologus brichardi]\n+MEHYQEDLGLRATKLMEDLSLYDAYQDGMYDARRDLVINPDLDFSAPALVEHKAKPMNGT\n+SVLHQQHHTVENFSSGNKVYNAAPVRPVNCNRTVPVDFCAPQRDAVYNEDGCCTKSEVAL\n+PCYTGTSERHRRYSLEVQGHRYSTGSTFDGVPLNKPVAVPGNRCNSVCIASSHDGRYNAT\n+SPRSSLASSLSSQEQSKHASPRSSISSPRTSLVVPGQERYTSPRSSLVHCEGNSVLSPRS\n+SYASTASDTSKHSSPRASLNSCDCCSKPNSNRTSGISMGYDQRHTSPRSSTASQYSFTTS\n+PRSSYSDSRYGPVVNQDLEGVLHSAPLASPRSSICSQDGSARPGASANCVVSPRSSISSH\n+SSRSSRSSRGSMSTYPDLQLPSPRSSMLGTSLHEDTLLQEFGDSNGVQNRIHLQGLSAVP\n+EPQQQSGQTGGTADIPSGSPSSYSYVMPSKTASSGQRFKLPYQVTPSRESGPSQAEKRLE\n+ALTLELEKELEMHMKKEYFGICVKCGKGVYGASQACQAMGNLYHTNCFTCCSCGRRLRGK\n+AFYNVNGKVYCEEDFLYSGFQQTAEKCFVCGHLIMEMILQALGKSYHPGCFRCVVCKEGL\n+DGVPFTVDVENNIYCVKDYHTVFAPKCASCNQPILPAQGSEETIRVVSMDKDYHVECYHC\n+EDCGLQLNDEERHRCYPLEGHLLCHDCHILRLQSQVPAHAPPSYPLHVTEL\n+>XP_006779823.1 gi|583968741|ref|XP_006779823.1| PREDICTED: short-chain dehydrogenase/reductase family 42E member 1-like isoform X1 [Neolamprologus brichardi]\n+MGTASKETFLITGGSGYFGNRLALSLLKKGAKVILFDIIPPSQELPEDVVFVQGDIREYP\n+DVEKAVTGVDCVFHIASYGMSGREQLNRQLIEAVNVQGTQNILKACVEHGVSRLIYTSTF\n+NVVFGGQVIENGDESLPYLPLHLHPDHYSRTKSLADMAVLKANGTVLKGCSGLLSTCALR\n+PAGIYGPGEQRHLPRIVDYIEKGIFRFVYGKPSSLVEFVHVDNLVSAHVLAAEALTPEKQ\n+HRAAGQAYFISDGRPVNNFEFFRPLVEGLGYRFPTLRLPISLIYFFAFLTEMIHCLIGPF\n+YNFQPLLTRTEVYKTGVTHYFSMAKAKAELGYEPREYNLDEVVQWFRSRGHGKKCHRSFL\n+SRLLLNVLFVSALVAVSLSFLPVVGS\n+>XP_006779824.1 gi|583968743|ref|XP_006779824.1| PREDICTED: short-chain dehydrogenase/reductase family 42E member 1-like isoform X2 [Neolamprologus brichardi]\n+MGTASKETFLITGGSGYFGNRLALSLLKKGAKVILFDIIPPSQELPEDVVFVQGDIREYP\n+DVEKAVTGVDCVFHIASYGMSGREQLNRQLIEAVNVQGTQNILKACVEHGVSRLIYTSTF\n+NVVFGGQVIENGDESLPYLPLHLHPDHYSRTKSLADMAVLKANGTVLKGCSGLLSTCALR\n+PAGIYGPGEQRHLPRIVDYIEKGIFRFVYGKPSSLVEFVHVDNLVSAHVLAAEALTPEKQ\n+HRAAGQAYFISDGRPVNNFEFFRPLVEGLGYRFPTLRLPISLIYFFAFLTEMIHCLIGPF\n+YNFQPLLTRTEVYKTGVTHYFSMAKAKAELGYEPREYNLDEVVQWFRSRGHGKKCHRSFL\n+SRLLLNVLFVSALVAVSLSFLPVVGS\n' |
b |
diff -r ae4d5733272f -r 733ca84b21ee test-data/test.fasta --- a/test-data/test.fasta Fri Oct 16 16:13:34 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,11 +0,0 @@ ->ID1 desc -GATACA - - ->ID2 desc -GATACAGATACA -GATACAGA -TACAGATACA ->ID3 desc -GATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGA -TACAGATACA |
b |
diff -r ae4d5733272f -r 733ca84b21ee tool_dependencies.xml --- a/tool_dependencies.xml Fri Oct 16 16:13:34 2015 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="biopython" version="1.65"> - <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency> |