Repository 'splitfasta'
hg clone https://toolshed.g2.bx.psu.edu/repos/rnateam/splitfasta

Changeset 5:733ca84b21ee (2020-09-21)
Previous changeset 4:ae4d5733272f (2015-10-16)
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/splitfasta commit 31945d5d8c5ebee64ebf29c6ea022fb831f47274"
modified:
splitFasta.xml
added:
split_fasta.py
test-data/ID1.fasta
test-data/ID2.fasta
test-data/ID3.fasta
test-data/part1.fasta
test-data/part2.fasta
test-data/part3.fasta
test-data/part4.fasta
test-data/sample1.fasta
test-data/sample2.fasta
removed:
splitFasta.py
test-data/ID1_result1.fasta
test-data/ID2_result1.fasta
test-data/ID3_result1.fasta
test-data/test.fasta
tool_dependencies.xml
b
diff -r ae4d5733272f -r 733ca84b21ee splitFasta.py
--- a/splitFasta.py Fri Oct 16 16:13:34 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,13 +0,0 @@
-#!/usr/bin/env python
-import os
-import sys
-from Bio import SeqIO
-
-if __name__ == "__main__":
-    inpath = sys.argv[1]
-    os.mkdir('splits')
-    with open(inpath, 'r') as handle:
-        for record in SeqIO.parse(handle, 'fasta'):
-            header = os.path.join('splits', record.id + '.fasta')
-            with open(header, 'w') as handle2:
-                SeqIO.write([record], handle2, 'fasta')
b
diff -r ae4d5733272f -r 733ca84b21ee splitFasta.xml
--- a/splitFasta.xml Fri Oct 16 16:13:34 2015 -0400
+++ b/splitFasta.xml Mon Sep 21 15:40:14 2020 +0000
[
@@ -1,35 +1,59 @@
-<tool id="rbc_splitfasta" name="Split Fasta" version="0.2.0">
+<tool id="rbc_splitfasta" name="Split Fasta" version="0.4.0">
     <description>files into a collection</description>
     <requirements>
-        <requirement type="package" version="1.65">biopython</requirement>
+        <requirement type="package" version="1.76">biopython</requirement>
     </requirements>
-    <stdio>
-        <exit_code range="1:" />
-    </stdio>
-    <command interpreter="python">
+    <command detect_errors="aggressive">
     <![CDATA[
-        splitFasta.py $inputFile
+        #if $splitmode.splitmode_select == "each":
+            python $__tool_directory__/split_fasta.py '$inputFile'
+        #else if $splitmode.splitmode_select == "chunks":
+            python $__tool_directory__/split_fasta.py '$inputFile' $splitmode.num_chunks
+        #end if
     ]]></command>
     <inputs>
         <param name="inputFile" type="data" format="fasta" label="Fasta file to split"/>
+        <conditional name="splitmode">
+            <param name="splitmode_select" type="select" label="Split mode">
+                <option value="each">Each sequence in its own dataset</option>
+                <option value="chunks">Split into a number of chunks</option>
+            </param>
+            <when value="chunks">
+                <param name="num_chunks" type="integer" value="10" label="Number of chunks to split into" />
+            </when>
+            <when value="each"/>
+        </conditional>
     </inputs>
     <outputs>
-        <collection name="splitted_fasta" type="list" label="Sequence collection in FASTA format">
+        <collection name="splitted_fasta" type="list" label="${tool.name} on ${on_string}">
             <discover_datasets pattern="(?P&lt;designation&gt;.*)" directory="splits" ext="fasta" visible="false"/>
         </collection>
     </outputs>
     <tests>
         <test>
-            <param name="inputFile" value="test.fasta" />
-            <output_collection name="splitted_fasta">
-                <element name="ID1.fasta" file="ID1_result1.fasta" ftype="fasta" />
-                <element name="ID2.fasta" file="ID2_result1.fasta" ftype="fasta" />
-                <element name="ID3.fasta" file="ID3_result1.fasta" ftype="fasta" />
+            <param name="inputFile" value="sample1.fasta" />
+            <param name="splitmode|splitmode_select" value="each" />
+            <output_collection name="splitted_fasta" count="3">
+                <element name="ID1" file="ID1.fasta" ftype="fasta" />
+                <element name="ID2" file="ID2.fasta" ftype="fasta" />
+                <element name="ID3" file="ID3.fasta" ftype="fasta" />
+            </output_collection>
+        </test>
+        <test>
+            <param name="inputFile" value="sample2.fasta" />
+            <param name="splitmode|splitmode_select" value="chunks" />
+            <param name="num_chunks" value="4" />
+            <output_collection name="splitted_fasta" count="4">
+                <element name="part1" file="part1.fasta" ftype="fasta" />
+                <element name="part2" file="part2.fasta" ftype="fasta" />
+                <element name="part3" file="part3.fasta" ftype="fasta" />
+                <element name="part4" file="part4.fasta" ftype="fasta" />
             </output_collection>
         </test>
     </tests>
     <help><![CDATA[
-        Takes an input file and writes each consecutive two lines to a separate file, in a dataset collection.
+        Takes an input FASTA file and writes entries (i.e. sequences) to separate datasets, which are organized in a dataset collection.
+        There are two modes: 1) each sequence is written to its own data set which is named by the ID of the sequence or 2) The file is split into a given number of chunks which are numbered.
     ]]></help>
     <citations>
         <citation type="bibtex">
b
diff -r ae4d5733272f -r 733ca84b21ee split_fasta.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/split_fasta.py Mon Sep 21 15:40:14 2020 +0000
[
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+import os
+import sys
+from Bio import SeqIO
+
+num_chunks = 0
+if len(sys.argv) == 3:
+    num_chunks = int(sys.argv[2])
+    input_filename = sys.argv[1]
+elif len(sys.argv) == 2:
+    input_filename = sys.argv[1]
+else:
+    exit("Usage: split_fasta.py <input_filename> [<num_chunks>]")
+
+os.mkdir('splits')
+
+if num_chunks != 0:
+    # if splitting into chunks we need to count how many records are in the
+    # input file
+    record_count = 0
+    with open(input_filename) as input_file:
+        for line in input_file:
+            if line.lstrip().startswith('>'):
+                record_count += 1
+
+    records_per_chunk = round(float(record_count) / num_chunks)
+
+count = 1
+with open(input_filename) as input_file:
+
+    chunk_record_count = 0  # how many lines have we written to the output file
+    records = []
+    for record in SeqIO.parse(input_file, 'fasta'):
+        records.append(record)
+        if num_chunks == 0 or (count < num_chunks and
+           len(records) >= records_per_chunk):
+            if num_chunks == 0:
+                output_filename = os.path.join('splits', record.id)
+            else:
+                output_filename = os.path.join('splits', 'part{}'.format(count))
+            SeqIO.write(records, output_filename, 'fasta')
+            count += 1
+            records = []
+
+    if records:
+        # this only applies for the mode where input file is
+        # split into chunks
+        output_filename = os.path.join('splits', 'part{}'.format(count))
+        SeqIO.write(records, output_filename, 'fasta')
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ID1.fasta Mon Sep 21 15:40:14 2020 +0000
b
@@ -0,0 +1,3 @@
+>ID1
+MSNSEASSTCPIPSRSIHEKSWSPLPDSYSQTPGGTVFSTTPGGTRIIYDRKFLLECRNS
+PIARTPPCCLPDIPGVTRPSLQIIEQEEDSKDLSIDDSQFVIDI
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID1_result1.fasta
--- a/test-data/ID1_result1.fasta Fri Oct 16 16:13:34 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
->ID1 desc
-GATACA
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ID2.fasta Mon Sep 21 15:40:14 2020 +0000
b
@@ -0,0 +1,3 @@
+>ID2
+MADVVLGVGTGVFIITLIWILTLALTIILSRATGPTKLGIIPVVLLALIITLVLVFFPRA
+AEVPAPQRAAQIVDMFFIGRYVLLSLVSLVFLAALFMLLPLHFLEPIYAKPLRTH
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID2_result1.fasta
--- a/test-data/ID2_result1.fasta Fri Oct 16 16:13:34 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,2 +0,0 @@
->ID2 desc
-GATACAGATACAGATACAGATACAGATACA
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID3.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ID3.fasta Mon Sep 21 15:40:14 2020 +0000
b
@@ -0,0 +1,5 @@
+>ID3
+MAVDNFLFGQCILYFLAFLFGFIAVVPLSENGDDFQGKCLLFTEGIWQNENMTMGKQRFI
+VEEWGPESSCRFITFVGIVSLILSAVQAWRTFFFLCKGHDDSLFHSFLNLLLSLLVLFVV
+FVAGTISSVGFSIWCDSVTENGAMPSSCEDLQDTDLELGVENSSFYDQFAIAQFGLWSAW
+LCWLGLTVLAFLKVYHNHRQQELLESLVQEKELLLGHPLQRSSYVYNRNAMI
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/ID3_result1.fasta
--- a/test-data/ID3_result1.fasta Fri Oct 16 16:13:34 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,3 +0,0 @@
->ID3 desc
-GATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACA
-GATACA
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/part1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/part1.fasta Mon Sep 21 15:40:14 2020 +0000
[
b'@@ -0,0 +1,182 @@\n+>NP_001007355.1 gi|55925472|ref|NP_001007355.1| eukaryotic translation initiation factor 4E-binding protein 3 [Danio rerio]\n+MSNSEASSTCPIPSRSIHEKSWSPLPDSYSQTPGGTVFSTTPGGTRIIYDRKFLLECRNS\n+PIARTPPCCLPDIPGVTRPSLQIIEQEEDSKDLSIDDSQFVIDI\n+>NP_956692.1 gi|41055339|ref|NP_956692.1| transmembrane protein 218 [Danio rerio]\n+MADVVLGVGTGVFIITLIWILTLALTIILSRATGPTKLGIIPVVLLALIITLVLVFFPRA\n+AEVPAPQRAAQIVDMFFIGRYVLLSLVSLVFLAALFMLLPLHFLEPIYAKPLRTH\n+>NP_001003767.1 gi|57524633|ref|NP_001003767.1| transmembrane protein 179 [Danio rerio]\n+MAVDNFLFGQCILYFLAFLFGFIAVVPLSENGDDFQGKCLLFTEGIWQNENMTMGKQRFI\n+VEEWGPESSCRFITFVGIVSLILSAVQAWRTFFFLCKGHDDSLFHSFLNLLLSLLVLFVV\n+FVAGTISSVGFSIWCDSVTENGAMPSSCEDLQDTDLELGVENSSFYDQFAIAQFGLWSAW\n+LCWLGLTVLAFLKVYHNHRQQELLESLVQEKELLLGHPLQRSSYVYNRNAMI\n+>NP_001002700.1 gi|50540464|ref|NP_001002700.1| fatty-acid amide hydrolase 2-A [Danio rerio]\n+MALTRFERFLGRLLRAVVWILFAAFKLFAPQQRHGVSRLPPITNPLLLLSAMQLARKIRR\n+KEVTSVEVVQAYIDRIQEVNPLINAMVKDRFSAALQEAAQVDKLIEEETGGEDVLEDRLP\n+LLGVPITVKEAFALQGMPNSTGLLTRRDLVSGADAPSVALLKRAGAIPLGVTNCSELCMW\n+LESHNHLYGITNNPYDFERIVGGSSGGEGSILGAGSSVIGIGSDIGGSIRIPCFFNGIFG\n+HKPSVGIVNNEGQYPPASGQQMGFLCTGPMCRYAEDLIPMLSIMGGPNAEKLSLFTEVDL\n+KKLRFFSVPHNGGSHLVSPVEPQLLHAQKMVVKRLEADLGVKVQELLIPQLKYSFQIWGT\n+MMASPGKDGKPPTTFAELMSEGGKKVWPAWELFKWFLGFSSHTLAAIGLALVELFQSSHP\n+SPFIMQQKESLQQELEELLGTDGVLLYPSHPLIAQKHHHPIFTPFNFSYTGIFNILGLPV\n+TQCPLGLSAEGLPLGVQIVAGKLQDRLSLATALYLEKAFGGWREPGKTTIKP\n+>NP_001003555.1 gi|57525887|ref|NP_001003555.1| centromere protein P [Danio rerio]\n+MEQKYEEDIQKLQQEIEMLEAEQEETLRSIFVQHGDRLQQGVKSACEERGGGGAQQHTLS\n+KLITEVRELEKDLRRQTEINGITLNECFVKTLHKSERKLIQQLRLAGHCGLLLFQVEFAV\n+TEIQEDNVLHRRVTELNIVVDGVEFKDFSAFVSRVEDTKDLLLFFRTLRTFSERCEDRRQ\n+TFQHFQEKYPDVVNLPEGCRSEIMIIRSPQLPGISMTLFWKIHVSKEGVVKPLLDLLLKM\n+PDQALELDTKKVMEKASDYFQSLLQLLGVEASIEGLIRTVCS\n+>NP_997599.1 gi|47058959|ref|NP_997599.1| protein dispatched homolog 2 [Danio rerio]\n+MESGSISRQREDAEMPDSSTTEGPSLEAPQSEIPEVSLCPPDSDSTESQMCPVEIEENQT\n+KSSSPFNSHSSTQLERQVSQGSAYHSPPHKKCPCCGHQQPSQSDVCPGQMNALHQADCAA\n+SPVKTLYSCSPSRLPSCHTKMQCHWLHGSHDGSNHKPVQHHMVTVRNDGLHRIPRSYSQV\n+IVEYPMTVLISCTLVLFACSLAGILTGPLPDFSDPLLGFEPRGTDISVRLATWTRLKQNT\n+GPGKPLSPVPWQLTEKTTTGKDTIKSEPQFRERSRRMLHRDNAEHNFFCNAPGERYAQLV\n+FRSGNSASLWSLKAIYSMCQMEQTQIRSGPQFDKLCQVKSEFYGSMVKNECCPSWSLGNY\n+LAVLNNISSCFSLTSQQVSESLGLLRFCAPYYHDGSLIASCTERSKFGRCASVPHRCKLS\n+SIFQILHYLVDKDFLGPQTVEYKVPSLKYSIVFLPVEKSDSLMNIYLDHLEGHKLTYNNT\n+TITGMDLGIKQKLFKYYLARDSIYPVLAALALLITIGLYLKSLFIAAMSLVAVILSLSTS\n+YFFYKVAFRLTFFPLLNLAAVFVLLGSCLNQALTFVDFWKLQLSHNPPAVPEKRMNRVLQ\n+EMGYLIIVSGLTSSVTFYSGYISSITAVRCYAVYLGSASLINTLFALVWLPCTLILQERY\n+AVLSSNTVGKVAWKPCCSKNAGGFWETSSRKRCLFTFRQKLRTLGRGFSDTSNLLFLKIL\n+PCGVVKFRYIWICWFAVLAAGGTYISCVDPGMKLPTSDSRTTQLFRSSHPFERYDAEYRH\n+QFMFERMKDGEDEPMMLTLIWGIVPSDNGDHFDPKSNGSLSVDPGFNMSSLQAQIWLRDL\n+CGKIQNQTFYSPLSAEQDTAEDNVCFVEHLIHWVSIRRCSESEDAFSFCCNNIPFPYPPR\n+VFEQCLSMMVAEQHAEGRLPSAGGLRFDSEGRIAALVVIFKTVQLYSFNYNRMSQFYQEI\n+LSWFNREISKAPAGLQRGWFVSQLGLYDLQQCLSSETLEVAGFSVALTFALLLLTTWNIP\n+LSVYVSIAVAGSVFATVGLLVLLEWQLNGVEALFISAAAGLSVDFVANYCISYSLAPHSD\n+RLGRVAHSIKRMGCPVATGAGAYFCVGIIMLPATALLFRKLGIFLLLVKCVACGFATFFF\n+QSLCCFFGPQNNCGRITLPCVTQQSTENILSSCSATEPGTNNPAANGAFGCGKGSRVRRS\n+FNKENEGFLCPNQQHHRKRQPVGGREPEQNELQPLACQLSDSFENSTCTSKLSNRPSVLS\n+DDIQFCGLSPKQDYDRVSIEADSTEMCSRHLKGCNPPPALQTSSPYKENMLRLPQDACKE\n+KVLCKKCRGQSRGGLQLWNVSLSSSSSMDEIMITQTTDTVNERSLSMDDHIHKRLLSCQS\n+QSSIEGLEESNDTCLTEVEAAIPQAGKIEDEFQPGHLNGKRDTLRLSLKETVYDLASPGS\n+GRVRTAQSDVPVILPNSKPDMPDVWIKREGKGEGGS\n+>NP_001013313.1 gi|61651744|ref|NP_001013313.1| coiled-coil domain-containing protein 115 [Danio rerio]\n+MRVDENLRLDEQLLLFMEQLEALEEKRQRLNSLIEEGWFSIAKARYSMGNKQVSALQYAS\n+EMQPLAHVETSLLEGGTAEFKCERSENKAEEQKTKTIEDIGAKETGLRRRVHTKQKEVKE\n+GEQDTDEVKTKTDSPTPEHRNPLKWFGILVPQNLKQAQSAFKEVITLSVEIASLQSTILA\n+TRKEMQVQMKEKQERTEKAQLEVKEE\n+>NP_991238.1 gi|45387769|ref|NP_991238.1| pituitary homeobox 3 [Danio rerio]\n+MDFNLLTDSEARSPALSLSDSGTPQHDPGCKGQDNSDTEKSHQNHTDESNPEDGSLKKKQ\n+RRQRTHFTSQQLQELEATFQRNRYPDMSTREEIAVWTNLTEARVRVWFKNRRAKWRKRER\n+NQQAELCKNGFGAQFNGLMQPYDDMYSGYSYNNWATKSLASSPLSAKSFPFFNSMNVSPL\n+S'..b'I\n+>XP_006779747.1 gi|583968576|ref|XP_006779747.1| PREDICTED: corticosteroid 11-beta-dehydrogenase isozyme 2-like [Neolamprologus brichardi]\n+MEDYTLPFWIYLVIVTVFIGGAMKKILASHLNTTSTVVAWLGATVLVERLWAFCLPAMLL\n+LVLFGITFCIYYATKTSQPRAMLPAHGKAVIITGCDSGFGNATAKHLDSLGFEVFATVLD\n+LNGDGAKELQRTCSHRLTLLQVDITQPQQVQQALLDTKAKLGLKGLWALVNNAGVCVNFG\n+EVELSLMSNYRGCMEVNFFGTLSITKAFLPLLRQTKGRIVTISSPAGDQPFPCLAAYGAS\n+KAALNLITETLRHELEPWGVQVSTILPSSYRTAQSTNSAYWEKQHKHLLQNLSPALLEDY\n+GEEYMTETKDLFQTFAKHTTTNLQPVVDTIVQALLAPQPQPRYFAGAGLSLMYFLYAYFP\n+YSMSNNFLKKKFLKKNVIPRALRKQSAFDLNLSLHNNNNEEKLQQM\n+>XP_006779748.1 gi|583968578|ref|XP_006779748.1| PREDICTED: transient receptor potential cation channel subfamily M member 1-like [Neolamprologus brichardi]\n+MYIRVSFDSKPDSLLHLMVKDWQLELPTLLISVHGGLQNFDLPPKLKQVFGKGLIKAAVT\n+TGAWIFTGGVSTGVIRHVGDALKDHSSKSRGKVCAIGIAPWGIVENKEDLIGRDVTRPYQ\n+TMSNPLSKLSVLNSSHSHYILADNGTCGKYGAEVRLRRQLEKHISLQKINTRLGQGVPVV\n+CLIVEGGPNVISITLESLKEEPPVPVVVCDGSGRASDILSFAHRYCEEDG\n+>XP_006779749.1 gi|583968580|ref|XP_006779749.1| PREDICTED: chymotrypsin B-like [Neolamprologus brichardi]\n+MAFLWIVSCLAFVGAAYGCGTPAIPPRVTGYARIVNGEEAVPHSWPWQVSLQQTNGFHFC\n+GGSLISEQWVVTAAHCNVRTYHNVIVGEHNKGYGSTENIQVLKPAKVFTHPSWNPQTINN\n+DITLIKLASPARLGTNVSPVCLADTTDSFAAGMKCVTTGWGLTRYNAPSTPNNLQQAALP\n+LLSNEECKKHWGSNISDVMICAGGAGATSCMGDSGGPLVCQKDNVWTLVGIVSWGSSRCS\n+TSTPAVYARVTKLRGWVDQILASN\n+>XP_006779750.1 gi|583968582|ref|XP_006779750.1| PREDICTED: agouti-related protein-like [Neolamprologus brichardi]\n+MFGTVLLCCWSFGLLPLASSLVHGNLPLDEGPVAGRRTETFLSEIERSQVPDRMHEPALL\n+PVDSVEDHFLMDTGSYDEDTSAALQLQGRAMRSPRRCIPHQQSCLGYPLPCCDPCDTCYC\n+RFFNAICYCRRVGHVCPPRRT\n+>XP_006779751.1 gi|583968584|ref|XP_006779751.1| PREDICTED: EMILIN-1-like [Neolamprologus brichardi]\n+MAALPLLLLLVLWTCGNAKGAFPLRQSYNLYTNGHAHGARAASRHRNWCAFVVTKTVSCV\n+VEDGVETYVKPDYHPCSWGSGQCSRVVVYRTYMRPRYKVAYKMVTEMDWKCCHGYSGADC\n+NIGPVGGGGTQISTTRPQPGQGGGTTSGQGGGGHSYGGGSSGSGQSGGNADNEKMRQLEE\n+KIRSLTKNLQDLQSTMSTMNERLQEEGGRNGFGERSSGGRNPADAAQPEIKETIHSIQTK\n+LDQLDNRTQAHDKTLVSINNHLVNGKGNELEGGASGGSLSEGRLNSLKEEILSKLERRVS\n+LSCSSCQAGVEDLRKQQQQDRERIRALEKQMNAMDVQYRQSLDGLRRDVVRSQGCCDIIS\n+DLQDRVTDAERKISTASENFDILQNRLDREISGQGGTSENTGSRGQGLPVGGETGGHGRD\n+AMITEEHLNNRLKDLERRVNSTMQKTEESCSYLENHVKDYFHRELDELRSVFLERFDDQA\n+DRITDVELDVEQVKDSISDHDKRLSKLENTTSQMSWRLEKCGCVASEQGGGGEGRGRGDG\n+GYGGGSWGAGGGGSTGEGKDGGNRGDGGGTWGAGGGGGGSTGGGGRWGGTGGGLPGTGGE\n+KDNSTKKSLEWRVVANEDQIRHFNTQLKDLSMSGDSLYDKVLDLTDDVGKIKALTGDHGE\n+HFNRIVTVVEMLGEDCELCGKVEKELQKMRNYSQNALSNIQNHINRIQNRMDSEGDSCFQ\n+MCSVLQSEVSVLRDDVRRCTNQCKSNPDMTTGVDHARPGGTDDNSGPLDPAKPLDGHSVI\n+EGINNNHLKTLQGELSNVILTFSSINDTLKGLEHTVQKHDSVITDLGNTKDKIISEIDKV\n+QQELTEHIEDNRNRLDKMDRDIRRFESTVLEMGDCKRSGDGLEKRLSKLEGVCGRLDGVS\n+DSILKIKEGLNKHVSSLWTCVSGLNDTVIRHGGLLDFIQDGQDDIHSRVKNLNSSLNQVS\n+RDLQSFSEHDLTGPPGPQGPQGHPGERGFNGPPGLPGPPGFPGPRGEIGPHGPKGETGLP\n+GADAQIPKLSFSAALTAPMDRAGTIVFDKVFVNEGNFYNPRTGIFTAPVDGNYYFSAVLT\n+GHRNEKIEAVLSKSNYGMARVDSGGYQPEGLENNPVAEAKVNPGSLAVFSIILPLQTQDT\n+VCIDLVMGKLAHSVEPLTVFNGMLLYENK\n+>XP_006779752.1 gi|583968586|ref|XP_006779752.1| PREDICTED: zinc finger protein 507-like isoform X1 [Neolamprologus brichardi]\n+MEEITNVITHSSAASSSSSTSGSHTRQTKEKQPSQGFQQKTADDSLIQVIKKLSKIVEKR\n+PQRRCASGGQKRALQVGERGAEQGGGSICKKIKRNLKDEVGVERSTDDSSLPSPWSGDDN\n+NNVTTAVAEVAANPNSSDLKRTVTCYQCSLCPHLSQTLPLLKEHLKQHNEQHSDLILMCS\n+ECHFTSRDHEQLEAHVRMHFDNGDNQKRKYPVSEAKEEVLKNQDVDLTGDNCSAGTEVKK\n+SSVSNAKELPQKKKWYSYEEYGLYRCLICSYVCSQQRMLKTHAWKHAGLVDCSYPIFEDE\n+DGGSAKREVQAAPNNASAREEIVVLQDKSLQKLPTGFKLQLCMPVAVEDKQEVVNLQGSH\n+LSESPKTEEEDEYPIKDMTSEEPAVEVQVTTEAETEVELGGHHESTSATDSLLSSAQKII\n+NRSPNSAGHINVIVERLPSAEDSVMASNPLLLSPDVDGDKSLLEKKAEEQEHVEGVKDEV\n+VLCYSPGNANKSQHLGADIKPSIAKSNDLPRDENVPPAGRKRTHSESLRLHSLAAEVLVA\n+MPMRTPELPNSGAKVALKTVAAQAQSPQAGQKPTEGAAAGQKASDVGTAAAMLNCNEGRE\n+ETLGSLGLGKGDDDGPAANGGISLSLLTVIERLRERSDQNTSDEDILKELQDNAQFQSGA\n+GVVAANGAGSYVCSSVPGMDGLVGSPDSGLVDYIPGSDRPYRCRLCRYSSGNKGYIKQHL\n+RVHRQREPYQCPICEHIASDSKDLENHMIHHCKSRMYQCKQCPDAFHYKSQLRNHEREHH\n+SFSGDVEMLTPVAETAAAMEETERVTYEEGSPQKMFKCDVCNYTSSTYVGVRNHRRIHNS\n+DKPYRCCSCDFATTNMNSLKSHMRRHPQEHQAVQLLEQYRCSLCGYVCSHPPSLKSHMWK\n+HAGDQNYNYEQVNKAINEAISQSSR\n'
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/part2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/part2.fasta Mon Sep 21 15:40:14 2020 +0000
[
b'@@ -0,0 +1,235 @@\n+>XP_006779753.1 gi|583968588|ref|XP_006779753.1| PREDICTED: zinc finger protein 507-like isoform X2 [Neolamprologus brichardi]\n+MEEITNVITHSSAASSSSSTSGSHTRQTKEKQPSQGFQQKTADDSLIQVIKKLSKIVEKR\n+PQRRCASGGQKRALQVGERGAEQGGGSICKKIKRNLKDEVGVERSTDDSSLPSPWSGDDN\n+NNVTTAVAEVAANPNSSDLKRTVTCYQCSLCPHLSQTLPLLKEHLKQHNEQHSDLILMCS\n+ECHFTSRDHEQLEAHVRMHFDNGDNQKRKYPVSEAKEEVLKNQDVDLTGDNCSAGTEVKK\n+SSVSNAKELPQKKKWYSYEEYGLYRCLICSYVCSQQRMLKTHAWKHAGLVDCSYPIFEDE\n+DGGSAKREVQAAPNNASAREEIVVLQDKSLQKLPTGFKLQLCMPVAVEDKQEVVNLQGSH\n+LSESPKTEEEDEYPIKDMTSEEPAVEVQVTTEAETEVELGGHHESTSATDSLLSSAQKII\n+NRSPNSAGHINVIVERLPSAEDSVMASNPLLLSPDVDGDKSLLEKKAEEQEHVEGVKDEV\n+VLCYSPGNANKSQHLGADIKPSIAKSNDLPRDENVPPAGRKRTHSESLRLHSLAAEVLVA\n+MPMRTPELPNSGAKVALKTVAAQAQSPQAGQKPTEGAAAGQKASDVGTAAAMLNCNEGRE\n+ETLGSLGLGKGDDDGPAANGGISLSLLTVIERLRERSDQNTSDEDILKELQDNAQFQSGA\n+GVVAANGAGSYVCSSVPGMDGLVGSPDSGLVDYIPGSDRPYRCRLCRYSSGNKGYIKQHL\n+RVHRQREPYQCPICEHIASDSKDLENHMIHHCKSRMYQCKQCPDAFHYKSQLRNHEREHH\n+SFSGDVEMLTPVAETAAAMEETERVTYEEGSPQKMFKCDVCNYTSSTYVGVRNHRRIHNS\n+DKPYR\n+>XP_006779754.1 gi|583968590|ref|XP_006779754.1| PREDICTED: probable C-mannosyltransferase DPY19L3-like isoform X1 [Neolamprologus brichardi]\n+MTTLRQRKGSKGKEPSPAAELQSQQHNCCSEHHPEKILHGDWSWGAIIWTSVGWSVSVGL\n+GLLCCIYVATLHENDLWFSNIKEVEREISFRTECGLYYSYYKQMLHAPSIQEGLKEMIHD\n+NLTESKRTINLLQRMNIYQEVFLSVLYRLLPIQSYLEPVYFYIYTVFSLQAVYVIALYLT\n+AWLLSGSWLAGALTGVWYILNRVDTTRVEFTISLRENWSLPFFALQVTAITCYLRPQLTT\n+LQQKVMVWLMYVTTFCFCLTWQFNQFILLVQALVIYTLDCGDFLTTTQVTTLYLVQVSSL\n+LSVWFLQFCNSMILGSLVLSFIVAALFIRHCQPGVKTGSLVVRLGKVLLHSALVLLLTVT\n+INYLAKKALQLQSDEHIFKFIKSKFALGSTRDFDASLYLCEEAFGLLPLDTLERLAGTLL\n+LYPYVLTLLLLCGMLVAAALQNLSRPNRGSTEEKKGAREGQVAAFRPDVAYNVLHTLFYG\n+LLAFSTMRMKYIWTGHMCAVAAYGVCGTELWTVLLSALRCNTKLLLRLVRYVAPVVMIGF\n+LYYKFWPKLMEELSELREFYDPDTVELMTWISTKTPKQAVFAGSMQLLAGIKLCTGRVLT\n+NHPHYEDKDLRERTRQVYQVYARRSPEEVYDILKAIGADYVVLENSICYERRHRRGCRLR\n+DLLDLANGHIMDGPGENDPDLVPATHPRFCDAIKTDAAYNALFTRTFQNKTFHVYRLKKK\n+RKKNTKGSSEPSVTQ\n+>XP_006779755.1 gi|583968592|ref|XP_006779755.1| PREDICTED: probable C-mannosyltransferase DPY19L3-like isoform X2 [Neolamprologus brichardi]\n+MTTLRQRKGSKGKEPSPAAELQSQQHNCCSEHHPEKILHGDWSWGAIIWTSVGWSVSVGL\n+GLLCCIYVATLHENDLWFSNIKEVEREISFRTECGLYYSYYKQMLHAPSIQEGLKEMIHD\n+NLTESKRTINLLQRMNIYQEVFLSVLYRLLPIQSYLEPVYFYIYTVFSLQAVYVIALYLT\n+AWLLSGSWLAGALTGVWYILNRVDTTRVEFTISLRENWSLPFFALQVTAITCYLRPQLTT\n+LQQKVMVWLMYVTTFCFCLTWQFNQFILLVQALVIYTLDCGDFLTTTQVTTLYLVQVSSL\n+LSVWFLQFCNSMILGSLVLSFIVAALFIRHCQPGVKTGSLVVRLGKVLLHSALVLLLTVT\n+INYLAKKALQLQSDEHIFKFIKSKFALGSTRDFDASLYLCEEAFGLLPLDTLERLAGTLL\n+LYPYVLTLLLLCGMLVAAALQNLRPNRGSTEEKKGAREGQVAAFRPDVAYNVLHTLFYGL\n+LAFSTMRMKYIWTGHMCAVAAYGVCGTELWTVLLSALRCNTKLLLRLVRYVAPVVMIGFL\n+YYKFWPKLMEELSELREFYDPDTVELMTWISTKTPKQAVFAGSMQLLAGIKLCTGRVLTN\n+HPHYEDKDLRERTRQVYQVYARRSPEEVYDILKAIGADYVVLENSICYERRHRRGCRLRD\n+LLDLANGHIMDGPGENDPDLVPATHPRFCDAIKTDAAYNALFTRTFQNKTFHVYRLKKKR\n+KKNTKGSSEPSVTQ\n+>XP_006779756.1 gi|583968594|ref|XP_006779756.1| PREDICTED: probable C-mannosyltransferase DPY19L3-like isoform X3 [Neolamprologus brichardi]\n+MCRGLKEMIHDNLTESKRTINLLQRMNIYQEVFLSVLYRLLPIQSYLEPVYFYIYTVFSL\n+QAVYVIALYLTAWLLSGSWLAGALTGVWYILNRVDTTRVEFTISLRENWSLPFFALQVTA\n+ITCYLRPQLTTLQQKVMVWLMYVTTFCFCLTWQFNQFILLVQALVIYTLDCGDFLTTTQV\n+TTLYLVQVSSLLSVWFLQFCNSMILGSLVLSFIVAALFIRHCQPGVKTGSLVVRLGKVLL\n+HSALVLLLTVTINYLAKKALQLQSDEHIFKFIKSKFALGSTRDFDASLYLCEEAFGLLPL\n+DTLERLAGTLLLYPYVLTLLLLCGMLVAAALQNLSRPNRGSTEEKKGAREGQVAAFRPDV\n+AYNVLHTLFYGLLAFSTMRMKYIWTGHMCAVAAYGVCGTELWTVLLSALRCNTKLLLRLV\n+RYVAPVVMIGFLYYKFWPKLMEELSELREFYDPDTVELMTWISTKTPKQAVFAGSMQLLA\n+GIKLCTGRVLTNHPHYEDKDLRERTRQVYQVYARRSPEEVYDILKAIGADYVVLENSICY\n+ERRHRRGCRLRDLLDLANGHIMDGPGENDPDLVPATHPRFCDAIKTDAAYNALFTRTFQN\n+KTFHVYRLKKKRKKNTKGSSEPSVTQ\n+>XP_006779757.1 gi|583968598|ref|XP_006779757.1| PREDICTED: MTSS1-like protein-like isoform X1 [Neolamprologus brichardi]\n+MLGEITHLQAIIDDLTVLTTDPHKLPPASEQVIKDLKGSDYSWSYQTPPSSPSSSGSRKS\n+SMCSSVNSTHSSASRSSGGGGSGGVGGGGSLPHSPTSSSSSSCRYRSSLPHQPPPPGGIA\n+AHRLSSVSSHDSGFVSQDANIYSKPPSPMPSDITSQKSSSSASSEASETCQSVSECSSPT\n+TFGSSFATFRPALFHSGSTRPLSVILPVPASPPYIRPPGSSSSSPTSKVPMWKDWSKAGQ\n+YEQPVAAA'..b'DRYGEQGLREGGGGGPGMDDIFSHIFGGGLFGFMGGQSSRSRNGGRRRGEDMVH\n+PLKVSLEDLYNGKTTKLQLSKNVLCSTCNGQGGKTGAVQKCTACRGRGMRIMIRQLAPGM\n+VQQMQSVCTDCNGEGEVISEKDRCKKCEGKKVVKEVKILEVHVDKGMKHGQKITFGGEAD\n+QAPGVEPGDIVLVLQEKEHETYRRDGNDLFMNHKIGLVEALCGFQFMLKHLDGRQIVVKY\n+PAGKVIEPGSVRMVRGEGMPQYRNPFEKGDLYIKFDVQFPDNNWISPEKLGELEDMLPSR\n+SEPPIISGDTEEVDLQDYDVSQSSSSGNRREAYNDSSDEEGSHHGSGVQCAHQ\n+>XP_006779770.1 gi|583968632|ref|XP_006779770.1| PREDICTED: low-density lipoprotein receptor-related protein 3-like [Neolamprologus brichardi]\n+MGLTELPLLLPLLGLLWLRCALLCAGCSEQVEIHTERRGVIYSPSWPLNYPAGVNCSWHI\n+QGGQGEVITISFRNFDLAESGKCTGDWLLLTPTWKRESRLCGSVLPQPFISTRGRVWLFF\n+HSQANSSGQAQGFRLSYIRGHLGQSSCQSDEFLCGNGKCLPRSWKCNGQDECGDASDERS\n+CLPTPTEAQPGLCPFGSLPCTEGQSTRCLPTALRCNGARDCHDGSDELGCPDTTCGKRLG\n+NFYGSFASPDFFRANRSGDTELRCSWLLDTQDPKPIVLQLDLQLGPGDLLHVYDGLLQRA\n+EHLLQVFSYHNNRRPALLESSRGQMSVLYMAQPHSPGHGFNATYQVKGYCFPGERPCGSD\n+QGCYSERQRCDGYWHCPSGRDEEGCPMCPDGEFPCEGGTGMCYPASERCNNQKRCPDGSD\n+EKNCYDCQPGNFHCGTNLCIFETWRCDGQEDCLDGSDERDCLAAVPRKVITAALIGSLVC\n+SLLLVIALGCALKLHSLRNREYRAFETQMTRMEADFVQREAPPSYGQLIAQGLIPPVEDF\n+PVYNPTQASVLQNLRLAMRRQIRRHSTRRSTSSSSRRRLGHLWNRLFRSGGRGRGHAPLL\n+DPPGPTQITLGLHSYRTVGEQGPQSRAVPAGGSDVVGVDLPESPASPLSFHSVDSPEEEE\n+DLSPVSRDGSRAAESSPPTPCQSDSSVQSGLPLSPQEASVPLCPPRASRKLVLELAVNLK\n+GVSLRRYSPLGPLSPISPPVFPSSSQTPSTQPQPQGSEVTSPTEPLFSSVKPEDSDSQFT\n+VNVPSRDETKPEARSSLCRFGRSISEEGGDLGRETLC\n+>XP_006779771.1 gi|583968634|ref|XP_006779771.1| PREDICTED: LOW QUALITY PROTEIN: rhophilin-2-like [Neolamprologus brichardi]\n+MTDALLSNGINDGGGDKNYFKKGCNPFAQTGRSKLQNTRASLNQQIIKQMRMRAGAENLL\n+KATSNSKVKEMVLLELSYVNSNLQLLMSELEGLNSSVEVYQNNQSSTQRILVPVFLNETT\n+VEFSILKIXSDFILEHYSEDGKTFEDEIADFMDLRQACRTPSRSEAGVELLGKYYSHLPL\n+IESRFFSPTRQTGIFFTWYTAFLGLKYQQNHICLIXFCFLFFFLLLFSVKSLMIXITSTN\n+CSFLALIRFQLVPTALSCPGVLNNLKETFTHTPSYDMSPAMLSMLIRLMLAQAQECLFEK\n+IALPGIRNQFYSLMKVAQEAAKVSEIYDQVHQCMIQTPVKDNVPFFWSTMSQIKTNHYRS\n+MAHYFVASALLDHQLGPGDDEDKQEKTLSQVYDSLPEGCTALDILKKKDERQRIGKAHIR\n+RAIFGHEEALRIYGLCKNTNNLEVLQEILKASHQRSVNKHSENENEEEFADYMEAPKIIS\n+KTEHKAEMEFPAAAKVKVIDFFQRLGPQSVFSAKQRWTAPRTIRVRSDDRDLGFTLKGDS\n+PVQVVSLDPLCAAAADGLKEGDYIITVGDTECKWMSVSDVMRLLKDVDEEGIDIQVVSMM\n+DNSTAMPTKSATFCGNLPKTYSMICLAYNEDDKNSKVRKVAKKSSFLSWGLKNKMKSAST\n+LSLPTADKAGALPWNKPCPTFPSSSSYNNDSGLY\n+>XP_006779772.1 gi|583968636|ref|XP_006779772.1| PREDICTED: E3 ubiquitin-protein ligase RNF182-like [Neolamprologus brichardi]\n+MKDSAAETSGVEEGESHTLGQEHDLKMSCPQTEFEEKESPPPEELECKICYQRYNVHHRK\n+PKILDCLHRVCARCLIKILDIADSAGCISCPFCRHQTEITEQEISALPDDVNIVSHLVMR\n+DKSWNSDQNREVVLTPKSFSSSSPSHDSSNCLVITIMEVQRDSQHSPSQNGSSDVYAEQS\n+LDSVSIGSNGPADQDALSKFCNHVPRILVWLLGFLYFGSLPLGIYLLVIQRVTLGIVCVS\n+LVPSSLTVCLVYGFCQCLCQGMCDCSSRG\n+>XP_006779773.1 gi|583968638|ref|XP_006779773.1| PREDICTED: centrosomal protein of 89 kDa-like [Neolamprologus brichardi]\n+MLRFSFRREKDKEFKHIAHGLIPAASIAPKPAVPRTPPPRSPNPSPERPRSALAAAILSS\n+SLTGQTWAIPPARLMSLSESGQSESFTSEPNISTALYTRDRWSEDLVSRPRLSSPDQSEG\n+ELEDKEQEVVDEEDGEEHVYHTLDRRQNSSLTESVYALPLKAKSVFKSTTPLPTQTSGRR\n+ESSPDFTEETSGQSPEPKEKKMSVRKTLENWKDDVPTTPTISTAGHPRQASQAKSPKDLR\n+ELPPEPSNTYSELRKKVVRDRREKNTRMVDKEKLQEERLQRLEREISDSKAFSNQRSSAG\n+SQAELQNLRQHAQELVDENDALKLTVHRLNVELSHYQARFRPLSKEEHSKVSGLPNTGSP\n+PPWLVDMKYSSPLLLAYEDRMNEKDAILQTTEENMEKLHVQLEEVIKENEKLHDEITKTG\n+AVNQKDCYQIQQQAVLVLQENQVLINQLEAQHAEAKDTHSRHNTEVAKVSKKMMLLEVEN\n+QRLEGDLEESRRELQKNKRDLQVLQARLKDAVTWDEHCSIAGKLRRQLEQHESRSKDGID\n+KLLLRVSNLQEENRILALDKAQLTAKTRAMEAELELSRQASRKAERRMSMLKQQKAECVL\n+KEEKTRHYLGAVISVAEHISQERDRLLHMASSLQQEKQRFISRILSGTVRFGKLQEEVKV\n+YRSQASTRLAALEEAVEGRTVSYQTEILHLQTLLRERQEAEEKLLQSKREIEEELEVVWE\n+AATRENQQMRETLLDSKLTGDLHSWPAHAPDEITTSSQQQQHKHGLDFYC\n+>XP_006779774.1 gi|583968640|ref|XP_006779774.1| PREDICTED: myocyte-specific enhancer factor 2A-like [Neolamprologus brichardi]\n+MGRKKIQITRIVDERNRQVTFMKRKFGLMKKAYELSVLCDCEIALIIFNGSNKLFQYAST\n+DMDKVLLKYTEYNEPHESRTNSDIVEALNKKEHRGCDSPDADASYVLTPNTEEKYKKINE\n+EFDNMMKTHKISTGQQQQQHQQHFMHVAPGSMAYSHSGGGGATSQALAAATAALADGGIL\n+PSPHSHLHRNINSSQRPPSAGGGLQGSSELALQNGSGPTVNGFGKIIPSKSPPPPPPHGN\n+SMVPTSRKTDLRVVIPHSKGMMQTLNNQRMSSSQSSQPLSTPVVSITTPSLPHQSLVYAG\n+IGSAYNDYSLNSGELSGFNSAAGPSLSSMAAWEQQQLSSMG\n'
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/part3.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/part3.fasta Mon Sep 21 15:40:14 2020 +0000
[
b'@@ -0,0 +1,343 @@\n+>XP_006779775.1 gi|583968642|ref|XP_006779775.1| PREDICTED: COUP transcription factor 2-like [Neolamprologus brichardi]\n+MAMVAWRNTEAVGDSQGTLSSPVSQVAPLSLPGELTGHMNPAPSLEIPQTAAAPQGAPPP\n+NPSGNTVATTTNNNNSTSSSSSSSSLSMDKQQSQQIECIVCGDKSSGKHYGQFTCEGCKS\n+FFKRSVRRNLTYTCRANRNCPIDQHHRNQCQYCRLKKCLKVGMRREAVQRGRIPTQSYHG\n+QFALTNGDPLQCHSYLSGYISLLLRAEPYPTSRFGSQCLQNNNILGIENICELAARMLFS\n+AVEWARNIPFFPDLQVPDQVALLRLTWSELFVLNAAQCSMPVHAAPLLAAAGLHASPMSA\n+DRVVAFMDHIRVFQEQVEKLKVLHVDSAEYSCIKAIVLFTTDACGLSDVAHVEGLQEKSQ\n+CALEEYVRSQYPNQPNRFGKLLLRLPSLRSVSSSVIEQLFFVRLVGKTPIETLIRDMLLS\n+GSSFNWPYMAIQ\n+>XP_006779776.1 gi|583968644|ref|XP_006779776.1| PREDICTED: solute carrier organic anion transporter family member 3A1-like [Neolamprologus brichardi]\n+MQVKNQICTERSSNDDPEQDDNQKKTSCFSNIKIFLVSECALMLAQGTVGAYLVSVLTTL\n+ERRFNLQSADVGVIASSFEIGNLALILFVSYFGAKAHRPRLIGCGGIVMALGALLSALPE\n+FLTHQYEYEAGDSWHAEDGRDVCSNISRSENRDSGFKCGNRANTNMMYLLLIGAQVLLGI\n+GATPVQPLGVSYIDDHVHRKDSSLYIGILFSTLVFGPACGFILGSVCTKVYVDAVFIDTS\n+TLDITPDDPRWIGAWWGGFLLCGALLFLSALFMFGFPQALDEQDMDSGAESEQAMLPSSL\n+SLEFQGSKPNGAIHGFDINSGLSVCQHLRVIPRVTRHLLSNPVFSCITLAACMEIAVVAG\n+FAAFLGKYLEQQFNLTTSSANQLLGMTAIPCACLGIFLGGLLVKKLNLSALGAVRMAMLV\n+NLVSTACYVSFLFLGCDTGPVAGVTVAYGNETLQSWQQPESACISNCNCYTASVSPVCGS\n+NGVTYLSACFAGCTKPNLTNCACISSNSEEAVALPGKCPSPGCQQAFLTFLCVICVCSMI\n+GAMAQTPSVIILIRTVSPELKSYALGVLFLLLRLIGFIPPPLIFGMGIDSTCLFWSSVCG\n+EKGACMLYDNVAYRHLYVSIAIVLKSSAFLLYTTTWQCLRKNYRKYIKNNEGYLTPTELF\n+ASNVTLDNLGKDITQNPTNRTKFIYNLEDRETCDNMESVL\n+>XP_006779777.1 gi|583968646|ref|XP_006779777.1| PREDICTED: gonadotropin-releasing hormone II receptor-like isoform X1 [Neolamprologus brichardi]\n+MNGSSCCDPAAVMYQQRSGLDLNASCEWPDPHCNWTSVDGALQLPTFSTAAKIRVIVTFI\n+LCGISTFCNLAVLWAANGHKRKSHVRVLIINLTAADLLVTFIVMPVDAVWNITVQWLAGD\n+LACRFLMFLKLQAMYSCAFVTVVISLDRQSAILNPLGIAMVRKRNRVMLMVAWIMSALLS\n+IPQMFIFHNVTITYPANFTQCTTRGSFVTHWQETAYNMFTFCCLFLLPLVIMIICYTRIF\n+VQISKQMTKKNMPSNEPHLRCSKNNIPKARMRTLKMSIVIVICFIVCWTPYYLLGLWYWF\n+FPDDLEGKVSHSLTHILFIFGLFNACLDPIIYGLFTIRFQKGLRNCYRKAAVMSSLETNA\n+VIMESLKCTGSVLPSKRGMTSGEKDISSEQAEAKSTDNSV\n+>XP_006779778.1 gi|583968648|ref|XP_006779778.1| PREDICTED: gonadotropin-releasing hormone II receptor-like isoform X2 [Neolamprologus brichardi]\n+MNGSSCCDPAAVMYQQRSGLDLNASCEWPDPHCNWTSVDGALQLPTFSTAAKIRVIVTFI\n+LCGISTFCNLAVLWAANGHKHAVWNITVQWLAGDLACRFLMFLKLQAMYSCAFVTVVISL\n+DRQSAILNPLGIAMVRKRNRVMLMVAWIMSALLSIPQMFIFHNVTITYPANFTQCTTRGS\n+FVTHWQETAYNMFTFCCLFLLPLVIMIICYTRIFVQISKQMTKKNMPSNEPHLRCSKNNI\n+PKARMRTLKMSIVIVICFIVCWTPYYLLGLWYWFFPDDLEGKVSHSLTHILFIFGLFNAC\n+LDPIIYGLFTIRFQKGLRNCYRKAAVMSSLETNAVIMESLKCTGSVLPSKRGMTSGEKDI\n+SSEQAEAKSTDNSV\n+>XP_006779779.1 gi|583968650|ref|XP_006779779.1| PREDICTED: lactoylglutathione lyase-like [Neolamprologus brichardi]\n+MSDKGLSDEAVAAVCKDGDPITKDFMMQQTMLRVKDPNKSLDFYTRILGMTLLQKFDFPS\n+MRFSLFFLGYEDKKEIPADVKEKTAWTFSRRATLELTHNWGSESDDSQSYHNGNSDPRGF\n+GHIGIAVPDVYAACKLFEEQGVTFVKKPDDGKMKGLAFIQDPDGYWIEILSPNNMVSITS\n+K\n+>XP_006779780.1 gi|583968652|ref|XP_006779780.1| PREDICTED: bifunctional glutamate/proline--tRNA ligase-like isoform X1 [Neolamprologus brichardi]\n+MALNLTINTSNPPLGALLTAEHVKSSVQVSVEEGKDTRLHISDSVQFSDDNSICRYLARV\n+APALGLYGSNMMEQTEVDHWLEFSARSLCNQPDLTVALAELDKALSLRTFLVGHALTLAD\n+LSVWAALKDHGEWPKQGKSFSHVSRWFFFLSSQVPFTAVGNKYASKKASMNKTKSEGKKA\n+DVGKFVELPGAEMGKVVVRFPPEASGYLHIGHAKAALLNQHYQVTFKGKLIMRFDDTNPE\n+KEKEDFEKVILEDVAMLQIHPDQFTYTSDHFPIIMKFAEKLLAEGKAYIDNTPPEQMKQE\n+REQRVESTCRNNSVEQNMKMWSEMKAGTEYGQTCCMRAKIDMNSNNGCMRDPTLYRCKNA\n+AHPRTGNTYNIYPTYDFACPIVDSLEGVTHALRTTEYHDRDEQFYWIINALGLRKPYVWE\n+YARLNLNNTVLSKRKLTWFVDQGYVDGWDDPRFPTVRGVLRRGMTVEGLKQFIAAQGGSR\n+SVVNMEWDKIWSFNKKLPVSCLKVIDPVAPRYTALSSSYVVPVSVPEATEEMKEIAKHPK\n+NAEVGMKEVWFGPRVLIEGADAETFTEGETVTFINWGNLIITKINKGADGKVLSMKASLN\n+LDNKDYKKTTKITWLAETNNSLPVPAICVNYQPLISKAVITKDDNFKDYINKHSKLEEKM\n+LGDPCLKNLKKGDIIQLQRRGFYICDQPYEPLSPNSCKESPCVLIYIPDGHTKEMPTAGS\n+KEKSKTQASDNTPASPAKAPKTSVPAPASAPAADLFSSIVAQGEAVRLLKAAKAPKDEVD\n+KAVKQLLSLKEQFKQQTGVEYKPGMAPPASTPAPPTSSSDSTSCPYTRVVQQGELVRKLK\n+AEQAPKDQIDAAVKQLLALKAEFKKLTGQDYKPGMATPAPSASSPVTATSSSSPPSSSSG\n+LYEHVAQQGEVVRKLKSEKAPKDQVDAAVKQLLALKEEYKRITGQEYKPGATPP'..b'DEDTFGVSIAVG\n+LAGFACVLLLVLFVLINKYGRRSKFGMKGPVAVISGEEDSASPLHHVNHGIITPCTLDAS\n+PDAVVIGMTRIPVVENPQYFRHGHNCNKPATLVQHIKRRDIILKRELGEGAFGKVFLAEC\n+YNLSPTKDKMLVAVKTLKDPNLSARKDFQREAELLTNLQHDHIVKFYGVCVDGDPLIMVF\n+EYMKHGDLNKFLRAHGPDAMILVDGQPLQSNGELGLSQMLHIATQIASGMVYLASQHFVH\n+RDLATRNCLVGNGLLVKIGDFGMSRDIYSSDYYRVGGHTMLPIRWMPPESIMYRKFSTES\n+DVWSFGVILWEIFTYGKQPWFQLGNNEVIECITQGRVLERPRICPKEVYDIMLGCWQREP\n+QQRLNIKDIQKVLFAMGKATPVYLDILG\n+>XP_006779794.1 gi|583968681|ref|XP_006779794.1| PREDICTED: synaptic vesicle glycoprotein 2B-like [Neolamprologus brichardi]\n+MDDPYRNNVNQQMTEGGDYTYTQDGGGQDGYPYQTDYPPQDEDAASDATEGADEDDQMYE\n+GEYQGIPHPDEIKEARRAARVEARRKARMAAQQEEEEENLPEQYETIMEDCGHGRFQWML\n+FFVLGLALMADGVDGFVVGFVLPSAEKDMCISNADKGLLGLLVYVAMMVGALVWGGLCDK\n+MGRRKCLIYVLTIDLVFSFLSCFAQGYGFFLFLRFCSGFGIGGSIPIVYTYFTEFLQMDK\n+RGEHLSWLCMFWMFGGLYASFTAWGIIPHYGWGFAIGTHIQMHSWRLFILVCLFPALAAL\n+IGLVFMPESPRFLLENARHDEAWMILRQVHDTNWKAKGEPERVFTVTNIKTPQTQDDEFI\n+EIQSETGTAFQRWTVRKMTMLQQVMANIMSLSAPELRLQGLLLVIVWFCLAFSYHGLGVW\n+FPDMIKYMQYEEYESKVRIFHRERVERFHFNFSLVNQIHREGEYIHDKFANIEIKSVKFE\n+SSLFENCYFEDVKSTNTFFENCTIKNTVFYNTDLWQDKFKNCRMENATFLHPKKGCHLNF\n+QEENDIVIYMVSFLGSLAVLPGNIISALFMDKIGRIRIIGGSMLASSACTFLLLLSFSQG\n+AVICWQCLFYGVSVAAWNGLEVISVELYPSSKRGTAFGILNGICKFAAIIASSIFAAFIG\n+ITKIIPIFLAFAALVCGGMVALKLPETREKILS\n+>XP_006779795.1 gi|583968683|ref|XP_006779795.1| PREDICTED: AP-3 complex subunit sigma-2-like [Neolamprologus brichardi]\n+MIKAILIFNNHGKPRLIRFYQYFAEDMQQQIIRETFHLVSKRDDNVCNFLEGGSLIGGSD\n+YKLIYRHYATLYFVFCVDSSESELGILDLIQVFVETLDKCFENVCELDLIFHMDKVHYIL\n+QEVVMGGMVLETNMNEIVAQVEVQNRMEKSEGGLSAAPARAVSAVKNMNLPEIPRNINIG\n+DINIKVPSLSPF\n+>XP_006779796.1 gi|583968685|ref|XP_006779796.1| PREDICTED: synaptosomal-associated protein 25-B-like isoform X1 [Neolamprologus brichardi]\n+MADESDMRNELADLQTRADQIADESLESTRRMLALVEESKDAGIRTLVMLDEQGEQLERI\n+EEGMDQINKDMKDAEKNLNNLGQFCGLCSCPCNKIKGGGQAWGGNQDGVVNSQPGARVVD\n+EREQMAISGGFIRRVTNDARENEMDENLEQVGGIIGNLRHMALDMGQEIDTQNRQIDRIM\n+DKADSNKTRIDEANQRATKMLGSG\n+>XP_006779797.1 gi|583968687|ref|XP_006779797.1| PREDICTED: synaptosomal-associated protein 25-B-like isoform X2 [Neolamprologus brichardi]\n+MADESDMRNELADLQTRADQIADESLESTRRMLALVEESKDAGIRTLVMLDEQGEQLDRV\n+EEGMNKVNADLKEAEKDLKDIGQCCGLICPCIKKIKGGGQAWGGNQDGVVNSQPGARVVD\n+EREQMAISGGFIRRVTNDARENEMDENLEQVGGIIGNLRHMALDMGQEIDTQNRQIDRIM\n+DKADSNKTRIDEANQRATKMLGSG\n+>XP_006779798.1 gi|583968689|ref|XP_006779798.1| PREDICTED: protein FAM219B-like [Neolamprologus brichardi]\n+MMNDILEEPEKDSLLEAQQDSQGLSGPSSGTRPKSIDGGIRPVEKRGPYIMSRAPAIHLK\n+LQKHREMARKALKKKALSPGPPVTHQPRQGAKRMVKYNKGYAALSQHAEDTLVAIDSDSD\n+EEIDFEQYSSGYSSAEIHPDLSKQLLQDGYRLDEIPDDEDLDLIPPKAMGSSVCCCSEGP\n+SCPIQ\n+>XP_006779799.1 gi|583968691|ref|XP_006779799.1| PREDICTED: semaphorin-7A-like [Neolamprologus brichardi]\n+MFLEIWKMRFSLVACLFFLHICCLAVGNDRSPRMIFTEKEAAMNRLDLLHGPPVRILLEE\n+KPDTVLAVGKTYLNTYNIKNQNKNQTRMQLENCNRNCSYDITLAHLMEDAKKLFVCGTIH\n+DETVCCNSNLTEQPPICKDIKDISSFNIKEGDLSALAESKQSTDLYITRSGSDESVGIHK\n+FGKARVGPKNHHKEQHYVGLVLSKREEDPSQNRVYGFYREKTKDDGLFSEMWLPFVTQVC\n+MTDVGGPKNNLQYTWTSQMNARLFCGDQERKQHFSELVDVSTVDADRWQYTKIYALFRNE\n+WGMSAVCVYTIEDISKIFENSPFNGYTKKQMDRPRMCAPDSSKLSVDTLKNIDKTSEMEQ\n+LVHPVGNPGLLFFNHRNYTHIQVDSKPNSRGGLEWLQFLTVNNGGIHKVLQNESHTFVIA\n+EYQPFKQKAHVLSIILQSTFKKLYVNNGSQLVQLDVADCSQYGDTCQDCMLSRDPYCGWN\n+GTQCIRETEGSWHDAATGNLSICNEHNASNYKGDPVPVPRYSKYFLQCPVSSRHAQYSWQ\n+HDENSTACSSGKEQCLYLIDNMDSECKGTYKCISQEMGYSKVLVQYELQVENDAKTQPYK\n+RLWPNKAEGRKTSPVIWVCLMMALIKSLSF\n+>XP_006779800.1 gi|583968694|ref|XP_006779800.1| PREDICTED: cytochrome P450 1A1-like [Neolamprologus brichardi]\n+MALMILPFIGALSVSHVLVAVTTACLVYMIIKNAQNKIPEGLQQLPGPKPFPIIGNVLEL\n+GSRPYLSLTSMSKRYGDVFQIQIGMRPVVVLSGNETVRQALIKQGDEFAGRPDLYSFRYI\n+NDGKSLSFSTDQAGIWRARRKLAYSALRSFSNLDSTTPEYSCALEEHISKETEYLIKELN\n+TVMKTKGSFDPFRYVVVSVANVICGMCFGRRYDHHDDELVSLVNLSDDFVKVVGSGNPAD\n+FIPLLQYLPSTKMKKFMSLNARFSKFVQKLVTEHYATFDKDNIRDITDSLIDHCEDRKLD\n+ENANIQMSDEKIVGIVNDLFGAGFDTISTALSWSLMYFVAYPEIQNRLFEEIKEKVGLDR\n+MPVFSDRNNLPLLEAYILELFRHSSYLPFTIPHCTTKDTSLNGYFIPKDTCVFINQWQIN\n+HDPEMWEDPFSFKPERFLNADGTEVNKVEGEKVMTFGLGKRRCIGEVIARNEVFLFLAIL\n+IQKLNFQALPGDQLDLTPEYGLTMKHKRYHLRATMRVRNEQ\n'
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/part4.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/part4.fasta Mon Sep 21 15:40:14 2020 +0000
[
b'@@ -0,0 +1,245 @@\n+>XP_006779801.1 gi|583968696|ref|XP_006779801.1| PREDICTED: enhancer of mRNA-decapping protein 3-like [Neolamprologus brichardi]\n+MATDWVGSVVSIDCGATLGVYQGEVSSVDRVSQTISLKHPYHNGVKCPVPEVTFSAMDIK\n+DLKFLDIQNKVNKTSAGKDTATEPSYISTGRHGQTNKTNHSLAISNSSGLSSNPRKGSSN\n+SRGTTQSTPRRSNVRNGGAGGQRSKNDECFGDGTDENLDTDFDFEGNLALFDKAAVFSQI\n+DGASSNSNKLQHHNTQAEQKTQSYRHDENILEVKPVTYRQITVPQHGGKEYCTDTGLVVP\n+TVPYELHKQLLAAAERWGLSLERRLEAVGVCSSQMALTLLGGPNRLTPKNVHQRPTVVLL\n+CGPHVQGAQGISCGRHLANHEVEVILFLPNFVKMQESVTSEVNLFSKTSGKQVSSVKDLP\n+MSPVDLVINCLDCHENPLLKEQSWYQSVADWANKNRAPVLSIDPPVSEQPQSVDAKWTLS\n+LGLPLPLADKDSRVYLCDIGLPKMVYQEVGINYHSPFGCKFVIPLHSV\n+>XP_006779802.1 gi|583968698|ref|XP_006779802.1| PREDICTED: tyrosine-protein kinase CSK-like isoform X1 [Neolamprologus brichardi]\n+MSGIHVPWSTGTECVAKYNFQTANEQDLPFCKGDVLTIIGVTRDPNWYRARNQVGREGTI\n+PANYVQKREGVKSGGKLSLMPWFHGKITREQAERLLYPPETGLFLVRESTNYPGDYTLCV\n+SCDGKVEHYRIIYHNGKLTIDEEEYFENLMQLVEHYTKDADGLCTRLIKPKLMEGTVAAQ\n+DEFSRSGWALNRKELKLLQTIGKGEFGDVMVGDYRGTKVAVKCIKNDATAQAFIAEASVM\n+TQLRHNNLVQLLGVIVEERGSLYIVTEYMAKGSLVDYLRSRGRTVLGGDCLLKFSLDVCE\n+AMEYLEANNFVHRDLAARNVLVSDDNIAKVSDFGLTKEASSIQDTAKLPVKWTSPEALRE\n+KRFSTKSDVWSYGILLWEIYSFGRVPYPRIPLKEVVPRVEKGYKMDAPDGCPAVVYDLMK\n+QCWTLDPVMRPSFRMLREKLQHIRAKELYL\n+>XP_006779803.1 gi|583968700|ref|XP_006779803.1| PREDICTED: tyrosine-protein kinase CSK-like isoform X2 [Neolamprologus brichardi]\n+MSGIHVPWSTGTECVAKYNFQTANEQDLPFCKGDVLTIIGVTRDPNWYRARNQVGREGTI\n+PANYVQKREGVKSGGKLSLMPWFHGKITREQAERLLYPPETGLFLVRESTNYPGDYTLCV\n+SCDGKVEHYRIIYHNGKLTIDEEEYFENLMQLVEHYTKDADGLCTRLIKPKLMEGTVAAQ\n+DEFSRSGWALNRKELKLLQTIGKGEFGDVMVGDYRGTKVAVKCIKNDATAQAFIAEASVM\n+TQLRHNNLVQLLGVIVEERGSLYIVTEYMAKGSLVDYLRSRGRTVLGGDCLLKFSLDVCE\n+AMEYLEANNFVHRDLAARNVLVSDDNIAKVSDFGLTKEASSIQDTAKLPVKWTSPEALRE\n+KRFSTKSDVWSYGILLWEIYSFGRVPYPRIPLKEVVPRVEKGYKMDAPDGCPAVVYDLMK\n+QCWTLDPVMRPSFRMLREKLQHIRAKELYL\n+>XP_006779804.1 gi|583968702|ref|XP_006779804.1| PREDICTED: complexin-3-like [Neolamprologus brichardi]\n+MAFMVKHVVGGQLKNLTGGLTEEKSEGEKSDAAAQGMTQEEFEQYQQQLEEEKKEREAHY\n+AQKKAERATVRTHFREKYRLPKNEMDETQIQQAGDDVVLPTELAKMIAEDNEEETHKQSV\n+LGQLSNIQNVDIDQLKDKAQATLEDLKKQTENCSLM\n+>XP_006779805.1 gi|583968704|ref|XP_006779805.1| PREDICTED: growth arrest-specific protein 1-like [Neolamprologus brichardi]\n+MKCWCSALALLPWVLVALDAQLICWQALLRCHDEPECELAYNQYMTACEGNIKGTRKQCP\n+SHCISALIRLNHTRSGPDLETCDCAQDLDCLDAKRAIEPCLPRRHPKDAGGIGCMEARQR\n+CEEDSNCHTSLTAYLSYCGQLFNGRKCSSKCKATIQQMLFIPNGMLLNRCICDGVERPFC\n+EVVKENMSKLCSIGDHSVVSDPTKDYEDPYEDDYSKNDKEVDFSENSSASQSLSRGVLPL\n+CLLTARILY\n+>XP_006779806.1 gi|583968706|ref|XP_006779806.1| PREDICTED: serine/threonine-protein kinase ULK3-like [Neolamprologus brichardi]\n+MASTSSFAPPKLSDFILTERLGSGTYATVYKAYRKGNSREVVAVKVVGKKTLNKASTENL\n+LTEIEILKTVRHPHIVQLKDFQWDAENIYLILEWCSGGDLSRFIRSRRILPESVTRRFLQ\n+QIACALQFLHERNISHLDLKPQNILLSGSILKLADFGFAQYMSPWDEQSVLRGSPLYMAP\n+EMVCRRQYDSRVDLWSVGVILYEALFGRAPFASKSYAELEEKIRSNQPIELPPGARVSKD\n+CRDLLLRLLERNPDARITFAEFFTHPFVDMEHMPSAESIVKAKKLVLQAIQKDQEGERSE\n+ALSLYCSALEHFVPAIYYETNCQRKEALRQKVRQYVSRAEELKALVASDNRLSFEQARTS\n+RDILREMSKDQPRLLAALEMASTAIAKEESGSDDLEALDMYQQCLGELLLGLAAEPQGRR\n+RELLHSEIKSLMSRAEYLKKHIKMQETQRDVSLDRESLAESVRSSCCLQ\n+>XP_006779807.1 gi|583968708|ref|XP_006779807.1| PREDICTED: TM2 domain-containing protein 3-like [Neolamprologus brichardi]\n+MATVCQIWRPDRGRCLKSYGIIAVLFMDLMLQCVNGSLSTTNVETHYTRDGPFITSPVVP\n+DASSVFPADEDTSKCPSGGLCHRLPAHCIQCDYHLKCTYGKPTLFTCRPKKGVHCIGESG\n+HQQTNFSLNITCQFCWQLDPSQYRCTNSTNCMTVSCPRKRYNATCDVLDHVHCLGKRRFP\n+KRLFCNWTGGYKWSTALALSITLGGFGADRFYLGQWREGLGKLFSFGGLGIWTLIDVLLI\n+GVGYVGPVDGSLYI\n+>XP_006779808.1 gi|583968710|ref|XP_006779808.1| PREDICTED: la-related protein 6-like [Neolamprologus brichardi]\n+MYALVNAFMRCLSFLLPPSWLYVSFCLWVGNECEETLQRPNPRARFKSREPLTYEEVKAA\n+AKAAAEAEAQGGSRPSVSPGPDCVSLAATSPAAPKGPSSGLIWIGGLWRAVERVFGAPWV\n+LLRHHLCPKRRRAALGAPYPVCAFELGKIKSFQRGAAAAAAAAKIVDVKGPGETTFTYSK\n+NMSGSVGVPSVNSTECASDASAEQGIDEVITVDQLSQEMGTVTITVAIQAAEDEEPEEVT\n+SNNADFLGGSCSEDEIGRHDKSSGAGTSGGELEEESWQPPDPELIQKLVTQIEYYLSDEN\n+LEHDAFLLKHVRRNKLGFVSVKLLTSFKKVKHLTRDWRTTAYALRHSKILELNDEGRKVR\n+RKSAVPVFASESLPSRMLLLSDLQRWPE'..b'NDQLIRCITEYMQKGRAVECVQYQQILHRNIVY\n+LATIADASPDSAASTSNCTSNDTSASAAAVNGHTEGS\n+>XP_006779820.1 gi|583968735|ref|XP_006779820.1| PREDICTED: uncharacterized protein KIAA0355-like [Neolamprologus brichardi]\n+MYCCSAQESKMDYKRRFLLGGSKQKVQQHQQYQMPELSRTLSASLASSCSASSPMGTGVG\n+MSGSCHPPPSGTSTAVADIQQGISKYLDALNVFCRASAFLTDLFSSVFRNSHYSKAAMQL\n+KDVQEHVMEAASRLTAAIKPEIAKMLMELSAGAANFKDQNDFSLQDVEVLGRCFLTVMQV\n+HFQFLSQALQKVQPVAQSCLAEALAQAQERCANARSQSSDLGPLTELEEASRSWKGAAEA\n+TARLRERGRDGCLAGIQVQQLFCSNNTTIPEHQLKELNMKIDSALQAYKAALESLGHSEY\n+ALKAGFHLNPKAVEAALQGCCSEAEAQQAGRMQTTSQPIQCELPTIPVQIGSHFLKGVSF\n+NESAAENLKLKTHTMLQLIKEALGQNGVTPRDDSPVTEVLNQVCPSSWRGACKTAVQLLF\n+AQAGLVVVDTAQIENKEAYAPQITLEGSKVVVQVPSTWCLKEDPATMSLLQRSLDPEKTL\n+GLVDVLYTAVFDINRWKERKEQALPTIQIQLQRESPDYGIPTDLPPGTSSKTSSGLPKTI\n+SKLTSKFTKKVSSSSNSGGSFSIPSTPSRSMLTTSNSEDKAKGLGHSDGRLQSILQMGSL\n+PCTSDSTQQNQLANGSVSEDQGMNLPTDQEMQDVIDFLSGFNMGKSQQASPLVKRRNSVA\n+SANPAELKPPSGPSQATSSISHSALQPPAQTLPQPQPQPQPSQPVQKQQPQPNPQPPPPQ\n+QQQPQQQQQPPPPPPQQPSPQAQHLYYQHLLQPITQQQAPPPQLPPQQTPPQVLPQQRVA\n+SKWLGTSGQQPPPQGPPAGLSPLGPIGQWASSGLPDLSSDLYSLGLVSTYMDSVVSEMLG\n+QKPQGPRNNTWPNRDQSEGVFGVLGDTLPFDPAVGSDPEFARYVAGVSQAMQQKRQVQHI\n+RRPSNTRSNWPMPDEQHRTWSHPEYFNEGDAVNSGWSANQGDSASSSDETSSANGDSLFS\n+MFSGPDLVAAVKQRRKHSCGEPEVCTLPSPPLHHIGDDSQDSKTKTWPPKAPWQHSTHTN\n+TMPNPSSSLYQMNIPPSSQWGDSMPMLQSPVWSTASDCPPSTGISSGFPFTQQQQQQQQQ\n+QHKPMTKGFKSFPVKHEHRPSYLHQY\n+>XP_006779821.1 gi|583968737|ref|XP_006779821.1| PREDICTED: glucose-6-phosphate isomerase-like [Neolamprologus brichardi]\n+MGLTQDPNFQKLQEWYTAHALGLNMRHMFEADKERFNKLSLTLKTEDGDILLDYSKNLIT\n+EDVMKMLVDLAKSRGIEAAREKMFTGEKINFTEGRAVLHVALRNRSNTPIMVDGKDVMPD\n+VNKVLEKMKGFCHKVRSGEWKGYTGKAITDVVNVGIGGSDLGPLMVTEALKPYSKDGPRV\n+WFVSNIDGTHIAKTLAQLNPETTLFIIASKTFTTQETITNAESAKAWFLEHAKDKAAVAK\n+HFVALSTNGPKVKDFGIDTENMFEFWDWVGGRFSLWSAIGMAIALHIGFDNFEKLLSGAH\n+WMDKHFRTAPLDKNAPILLALLGIWYINFFHAETQAMLPYDQYMHRFTAYFQQGDMESNG\n+KYITNHGTRVNYHTGPIVWGEPGTNGQHAFYQLIHQGTRMVPCDFLIPAQSQHPIRDNLH\n+HKVSLMLERYLSKXXALMKGKTTEEAKKELEASGLSGEALEKILPHKVRRIKRNDLIKDN\n+EPAALLMARNSNKLKPKLKRRAPCRVAFTKRDSPSKNSVNRC\n+>XP_006779822.1 gi|583968739|ref|XP_006779822.1| PREDICTED: Wilms tumor protein 1-interacting protein homolog [Neolamprologus brichardi]\n+MEHYQEDLGLRATKLMEDLSLYDAYQDGMYDARRDLVINPDLDFSAPALVEHKAKPMNGT\n+SVLHQQHHTVENFSSGNKVYNAAPVRPVNCNRTVPVDFCAPQRDAVYNEDGCCTKSEVAL\n+PCYTGTSERHRRYSLEVQGHRYSTGSTFDGVPLNKPVAVPGNRCNSVCIASSHDGRYNAT\n+SPRSSLASSLSSQEQSKHASPRSSISSPRTSLVVPGQERYTSPRSSLVHCEGNSVLSPRS\n+SYASTASDTSKHSSPRASLNSCDCCSKPNSNRTSGISMGYDQRHTSPRSSTASQYSFTTS\n+PRSSYSDSRYGPVVNQDLEGVLHSAPLASPRSSICSQDGSARPGASANCVVSPRSSISSH\n+SSRSSRSSRGSMSTYPDLQLPSPRSSMLGTSLHEDTLLQEFGDSNGVQNRIHLQGLSAVP\n+EPQQQSGQTGGTADIPSGSPSSYSYVMPSKTASSGQRFKLPYQVTPSRESGPSQAEKRLE\n+ALTLELEKELEMHMKKEYFGICVKCGKGVYGASQACQAMGNLYHTNCFTCCSCGRRLRGK\n+AFYNVNGKVYCEEDFLYSGFQQTAEKCFVCGHLIMEMILQALGKSYHPGCFRCVVCKEGL\n+DGVPFTVDVENNIYCVKDYHTVFAPKCASCNQPILPAQGSEETIRVVSMDKDYHVECYHC\n+EDCGLQLNDEERHRCYPLEGHLLCHDCHILRLQSQVPAHAPPSYPLHVTEL\n+>XP_006779823.1 gi|583968741|ref|XP_006779823.1| PREDICTED: short-chain dehydrogenase/reductase family 42E member 1-like isoform X1 [Neolamprologus brichardi]\n+MGTASKETFLITGGSGYFGNRLALSLLKKGAKVILFDIIPPSQELPEDVVFVQGDIREYP\n+DVEKAVTGVDCVFHIASYGMSGREQLNRQLIEAVNVQGTQNILKACVEHGVSRLIYTSTF\n+NVVFGGQVIENGDESLPYLPLHLHPDHYSRTKSLADMAVLKANGTVLKGCSGLLSTCALR\n+PAGIYGPGEQRHLPRIVDYIEKGIFRFVYGKPSSLVEFVHVDNLVSAHVLAAEALTPEKQ\n+HRAAGQAYFISDGRPVNNFEFFRPLVEGLGYRFPTLRLPISLIYFFAFLTEMIHCLIGPF\n+YNFQPLLTRTEVYKTGVTHYFSMAKAKAELGYEPREYNLDEVVQWFRSRGHGKKCHRSFL\n+SRLLLNVLFVSALVAVSLSFLPVVGS\n+>XP_006779824.1 gi|583968743|ref|XP_006779824.1| PREDICTED: short-chain dehydrogenase/reductase family 42E member 1-like isoform X2 [Neolamprologus brichardi]\n+MGTASKETFLITGGSGYFGNRLALSLLKKGAKVILFDIIPPSQELPEDVVFVQGDIREYP\n+DVEKAVTGVDCVFHIASYGMSGREQLNRQLIEAVNVQGTQNILKACVEHGVSRLIYTSTF\n+NVVFGGQVIENGDESLPYLPLHLHPDHYSRTKSLADMAVLKANGTVLKGCSGLLSTCALR\n+PAGIYGPGEQRHLPRIVDYIEKGIFRFVYGKPSSLVEFVHVDNLVSAHVLAAEALTPEKQ\n+HRAAGQAYFISDGRPVNNFEFFRPLVEGLGYRFPTLRLPISLIYFFAFLTEMIHCLIGPF\n+YNFQPLLTRTEVYKTGVTHYFSMAKAKAELGYEPREYNLDEVVQWFRSRGHGKKCHRSFL\n+SRLLLNVLFVSALVAVSLSFLPVVGS\n'
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/sample1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample1.fasta Mon Sep 21 15:40:14 2020 +0000
b
@@ -0,0 +1,11 @@
+>ID1
+MSNSEASSTCPIPSRSIHEKSWSPLPDSYSQTPGGTVFSTTPGGTRIIYDRKFLLECRNS
+PIARTPPCCLPDIPGVTRPSLQIIEQEEDSKDLSIDDSQFVIDI
+>ID2
+MADVVLGVGTGVFIITLIWILTLALTIILSRATGPTKLGIIPVVLLALIITLVLVFFPRA
+AEVPAPQRAAQIVDMFFIGRYVLLSLVSLVFLAALFMLLPLHFLEPIYAKPLRTH
+>ID3
+MAVDNFLFGQCILYFLAFLFGFIAVVPLSENGDDFQGKCLLFTEGIWQNENMTMGKQRFI
+VEEWGPESSCRFITFVGIVSLILSAVQAWRTFFFLCKGHDDSLFHSFLNLLLSLLVLFVV
+FVAGTISSVGFSIWCDSVTENGAMPSSCEDLQDTDLELGVENSSFYDQFAIAQFGLWSAW
+LCWLGLTVLAFLKVYHNHRQQELLESLVQEKELLLGHPLQRSSYVYNRNAMI
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/sample2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample2.fasta Mon Sep 21 15:40:14 2020 +0000
[
b'@@ -0,0 +1,1005 @@\n+>NP_001007355.1 gi|55925472|ref|NP_001007355.1| eukaryotic translation initiation factor 4E-binding protein 3 [Danio rerio]\n+MSNSEASSTCPIPSRSIHEKSWSPLPDSYSQTPGGTVFSTTPGGTRIIYDRKFLLECRNS\n+PIARTPPCCLPDIPGVTRPSLQIIEQEEDSKDLSIDDSQFVIDI\n+>NP_956692.1 gi|41055339|ref|NP_956692.1| transmembrane protein 218 [Danio rerio]\n+MADVVLGVGTGVFIITLIWILTLALTIILSRATGPTKLGIIPVVLLALIITLVLVFFPRA\n+AEVPAPQRAAQIVDMFFIGRYVLLSLVSLVFLAALFMLLPLHFLEPIYAKPLRTH\n+>NP_001003767.1 gi|57524633|ref|NP_001003767.1| transmembrane protein 179 [Danio rerio]\n+MAVDNFLFGQCILYFLAFLFGFIAVVPLSENGDDFQGKCLLFTEGIWQNENMTMGKQRFI\n+VEEWGPESSCRFITFVGIVSLILSAVQAWRTFFFLCKGHDDSLFHSFLNLLLSLLVLFVV\n+FVAGTISSVGFSIWCDSVTENGAMPSSCEDLQDTDLELGVENSSFYDQFAIAQFGLWSAW\n+LCWLGLTVLAFLKVYHNHRQQELLESLVQEKELLLGHPLQRSSYVYNRNAMI\n+>NP_001002700.1 gi|50540464|ref|NP_001002700.1| fatty-acid amide hydrolase 2-A [Danio rerio]\n+MALTRFERFLGRLLRAVVWILFAAFKLFAPQQRHGVSRLPPITNPLLLLSAMQLARKIRR\n+KEVTSVEVVQAYIDRIQEVNPLINAMVKDRFSAALQEAAQVDKLIEEETGGEDVLEDRLP\n+LLGVPITVKEAFALQGMPNSTGLLTRRDLVSGADAPSVALLKRAGAIPLGVTNCSELCMW\n+LESHNHLYGITNNPYDFERIVGGSSGGEGSILGAGSSVIGIGSDIGGSIRIPCFFNGIFG\n+HKPSVGIVNNEGQYPPASGQQMGFLCTGPMCRYAEDLIPMLSIMGGPNAEKLSLFTEVDL\n+KKLRFFSVPHNGGSHLVSPVEPQLLHAQKMVVKRLEADLGVKVQELLIPQLKYSFQIWGT\n+MMASPGKDGKPPTTFAELMSEGGKKVWPAWELFKWFLGFSSHTLAAIGLALVELFQSSHP\n+SPFIMQQKESLQQELEELLGTDGVLLYPSHPLIAQKHHHPIFTPFNFSYTGIFNILGLPV\n+TQCPLGLSAEGLPLGVQIVAGKLQDRLSLATALYLEKAFGGWREPGKTTIKP\n+>NP_001003555.1 gi|57525887|ref|NP_001003555.1| centromere protein P [Danio rerio]\n+MEQKYEEDIQKLQQEIEMLEAEQEETLRSIFVQHGDRLQQGVKSACEERGGGGAQQHTLS\n+KLITEVRELEKDLRRQTEINGITLNECFVKTLHKSERKLIQQLRLAGHCGLLLFQVEFAV\n+TEIQEDNVLHRRVTELNIVVDGVEFKDFSAFVSRVEDTKDLLLFFRTLRTFSERCEDRRQ\n+TFQHFQEKYPDVVNLPEGCRSEIMIIRSPQLPGISMTLFWKIHVSKEGVVKPLLDLLLKM\n+PDQALELDTKKVMEKASDYFQSLLQLLGVEASIEGLIRTVCS\n+>NP_997599.1 gi|47058959|ref|NP_997599.1| protein dispatched homolog 2 [Danio rerio]\n+MESGSISRQREDAEMPDSSTTEGPSLEAPQSEIPEVSLCPPDSDSTESQMCPVEIEENQT\n+KSSSPFNSHSSTQLERQVSQGSAYHSPPHKKCPCCGHQQPSQSDVCPGQMNALHQADCAA\n+SPVKTLYSCSPSRLPSCHTKMQCHWLHGSHDGSNHKPVQHHMVTVRNDGLHRIPRSYSQV\n+IVEYPMTVLISCTLVLFACSLAGILTGPLPDFSDPLLGFEPRGTDISVRLATWTRLKQNT\n+GPGKPLSPVPWQLTEKTTTGKDTIKSEPQFRERSRRMLHRDNAEHNFFCNAPGERYAQLV\n+FRSGNSASLWSLKAIYSMCQMEQTQIRSGPQFDKLCQVKSEFYGSMVKNECCPSWSLGNY\n+LAVLNNISSCFSLTSQQVSESLGLLRFCAPYYHDGSLIASCTERSKFGRCASVPHRCKLS\n+SIFQILHYLVDKDFLGPQTVEYKVPSLKYSIVFLPVEKSDSLMNIYLDHLEGHKLTYNNT\n+TITGMDLGIKQKLFKYYLARDSIYPVLAALALLITIGLYLKSLFIAAMSLVAVILSLSTS\n+YFFYKVAFRLTFFPLLNLAAVFVLLGSCLNQALTFVDFWKLQLSHNPPAVPEKRMNRVLQ\n+EMGYLIIVSGLTSSVTFYSGYISSITAVRCYAVYLGSASLINTLFALVWLPCTLILQERY\n+AVLSSNTVGKVAWKPCCSKNAGGFWETSSRKRCLFTFRQKLRTLGRGFSDTSNLLFLKIL\n+PCGVVKFRYIWICWFAVLAAGGTYISCVDPGMKLPTSDSRTTQLFRSSHPFERYDAEYRH\n+QFMFERMKDGEDEPMMLTLIWGIVPSDNGDHFDPKSNGSLSVDPGFNMSSLQAQIWLRDL\n+CGKIQNQTFYSPLSAEQDTAEDNVCFVEHLIHWVSIRRCSESEDAFSFCCNNIPFPYPPR\n+VFEQCLSMMVAEQHAEGRLPSAGGLRFDSEGRIAALVVIFKTVQLYSFNYNRMSQFYQEI\n+LSWFNREISKAPAGLQRGWFVSQLGLYDLQQCLSSETLEVAGFSVALTFALLLLTTWNIP\n+LSVYVSIAVAGSVFATVGLLVLLEWQLNGVEALFISAAAGLSVDFVANYCISYSLAPHSD\n+RLGRVAHSIKRMGCPVATGAGAYFCVGIIMLPATALLFRKLGIFLLLVKCVACGFATFFF\n+QSLCCFFGPQNNCGRITLPCVTQQSTENILSSCSATEPGTNNPAANGAFGCGKGSRVRRS\n+FNKENEGFLCPNQQHHRKRQPVGGREPEQNELQPLACQLSDSFENSTCTSKLSNRPSVLS\n+DDIQFCGLSPKQDYDRVSIEADSTEMCSRHLKGCNPPPALQTSSPYKENMLRLPQDACKE\n+KVLCKKCRGQSRGGLQLWNVSLSSSSSMDEIMITQTTDTVNERSLSMDDHIHKRLLSCQS\n+QSSIEGLEESNDTCLTEVEAAIPQAGKIEDEFQPGHLNGKRDTLRLSLKETVYDLASPGS\n+GRVRTAQSDVPVILPNSKPDMPDVWIKREGKGEGGS\n+>NP_001013313.1 gi|61651744|ref|NP_001013313.1| coiled-coil domain-containing protein 115 [Danio rerio]\n+MRVDENLRLDEQLLLFMEQLEALEEKRQRLNSLIEEGWFSIAKARYSMGNKQVSALQYAS\n+EMQPLAHVETSLLEGGTAEFKCERSENKAEEQKTKTIEDIGAKETGLRRRVHTKQKEVKE\n+GEQDTDEVKTKTDSPTPEHRNPLKWFGILVPQNLKQAQSAFKEVITLSVEIASLQSTILA\n+TRKEMQVQMKEKQERTEKAQLEVKEE\n+>NP_991238.1 gi|45387769|ref|NP_991238.1| pituitary homeobox 3 [Danio rerio]\n+MDFNLLTDSEARSPALSLSDSGTPQHDPGCKGQDNSDTEKSHQNHTDESNPEDGSLKKKQ\n+RRQRTHFTSQQLQELEATFQRNRYPDMSTREEIAVWTNLTEARVRVWFKNRRAKWRKRER\n+NQQAELCKNGFGAQFNGLMQPYDDMYSGYSYNNWATKSLASSPLSAKSFPFFNSMNVSPL\n+'..b'NDQLIRCITEYMQKGRAVECVQYQQILHRNIVY\n+LATIADASPDSAASTSNCTSNDTSASAAAVNGHTEGS\n+>XP_006779820.1 gi|583968735|ref|XP_006779820.1| PREDICTED: uncharacterized protein KIAA0355-like [Neolamprologus brichardi]\n+MYCCSAQESKMDYKRRFLLGGSKQKVQQHQQYQMPELSRTLSASLASSCSASSPMGTGVG\n+MSGSCHPPPSGTSTAVADIQQGISKYLDALNVFCRASAFLTDLFSSVFRNSHYSKAAMQL\n+KDVQEHVMEAASRLTAAIKPEIAKMLMELSAGAANFKDQNDFSLQDVEVLGRCFLTVMQV\n+HFQFLSQALQKVQPVAQSCLAEALAQAQERCANARSQSSDLGPLTELEEASRSWKGAAEA\n+TARLRERGRDGCLAGIQVQQLFCSNNTTIPEHQLKELNMKIDSALQAYKAALESLGHSEY\n+ALKAGFHLNPKAVEAALQGCCSEAEAQQAGRMQTTSQPIQCELPTIPVQIGSHFLKGVSF\n+NESAAENLKLKTHTMLQLIKEALGQNGVTPRDDSPVTEVLNQVCPSSWRGACKTAVQLLF\n+AQAGLVVVDTAQIENKEAYAPQITLEGSKVVVQVPSTWCLKEDPATMSLLQRSLDPEKTL\n+GLVDVLYTAVFDINRWKERKEQALPTIQIQLQRESPDYGIPTDLPPGTSSKTSSGLPKTI\n+SKLTSKFTKKVSSSSNSGGSFSIPSTPSRSMLTTSNSEDKAKGLGHSDGRLQSILQMGSL\n+PCTSDSTQQNQLANGSVSEDQGMNLPTDQEMQDVIDFLSGFNMGKSQQASPLVKRRNSVA\n+SANPAELKPPSGPSQATSSISHSALQPPAQTLPQPQPQPQPSQPVQKQQPQPNPQPPPPQ\n+QQQPQQQQQPPPPPPQQPSPQAQHLYYQHLLQPITQQQAPPPQLPPQQTPPQVLPQQRVA\n+SKWLGTSGQQPPPQGPPAGLSPLGPIGQWASSGLPDLSSDLYSLGLVSTYMDSVVSEMLG\n+QKPQGPRNNTWPNRDQSEGVFGVLGDTLPFDPAVGSDPEFARYVAGVSQAMQQKRQVQHI\n+RRPSNTRSNWPMPDEQHRTWSHPEYFNEGDAVNSGWSANQGDSASSSDETSSANGDSLFS\n+MFSGPDLVAAVKQRRKHSCGEPEVCTLPSPPLHHIGDDSQDSKTKTWPPKAPWQHSTHTN\n+TMPNPSSSLYQMNIPPSSQWGDSMPMLQSPVWSTASDCPPSTGISSGFPFTQQQQQQQQQ\n+QHKPMTKGFKSFPVKHEHRPSYLHQY\n+>XP_006779821.1 gi|583968737|ref|XP_006779821.1| PREDICTED: glucose-6-phosphate isomerase-like [Neolamprologus brichardi]\n+MGLTQDPNFQKLQEWYTAHALGLNMRHMFEADKERFNKLSLTLKTEDGDILLDYSKNLIT\n+EDVMKMLVDLAKSRGIEAAREKMFTGEKINFTEGRAVLHVALRNRSNTPIMVDGKDVMPD\n+VNKVLEKMKGFCHKVRSGEWKGYTGKAITDVVNVGIGGSDLGPLMVTEALKPYSKDGPRV\n+WFVSNIDGTHIAKTLAQLNPETTLFIIASKTFTTQETITNAESAKAWFLEHAKDKAAVAK\n+HFVALSTNGPKVKDFGIDTENMFEFWDWVGGRFSLWSAIGMAIALHIGFDNFEKLLSGAH\n+WMDKHFRTAPLDKNAPILLALLGIWYINFFHAETQAMLPYDQYMHRFTAYFQQGDMESNG\n+KYITNHGTRVNYHTGPIVWGEPGTNGQHAFYQLIHQGTRMVPCDFLIPAQSQHPIRDNLH\n+HKVSLMLERYLSKXXALMKGKTTEEAKKELEASGLSGEALEKILPHKVRRIKRNDLIKDN\n+EPAALLMARNSNKLKPKLKRRAPCRVAFTKRDSPSKNSVNRC\n+>XP_006779822.1 gi|583968739|ref|XP_006779822.1| PREDICTED: Wilms tumor protein 1-interacting protein homolog [Neolamprologus brichardi]\n+MEHYQEDLGLRATKLMEDLSLYDAYQDGMYDARRDLVINPDLDFSAPALVEHKAKPMNGT\n+SVLHQQHHTVENFSSGNKVYNAAPVRPVNCNRTVPVDFCAPQRDAVYNEDGCCTKSEVAL\n+PCYTGTSERHRRYSLEVQGHRYSTGSTFDGVPLNKPVAVPGNRCNSVCIASSHDGRYNAT\n+SPRSSLASSLSSQEQSKHASPRSSISSPRTSLVVPGQERYTSPRSSLVHCEGNSVLSPRS\n+SYASTASDTSKHSSPRASLNSCDCCSKPNSNRTSGISMGYDQRHTSPRSSTASQYSFTTS\n+PRSSYSDSRYGPVVNQDLEGVLHSAPLASPRSSICSQDGSARPGASANCVVSPRSSISSH\n+SSRSSRSSRGSMSTYPDLQLPSPRSSMLGTSLHEDTLLQEFGDSNGVQNRIHLQGLSAVP\n+EPQQQSGQTGGTADIPSGSPSSYSYVMPSKTASSGQRFKLPYQVTPSRESGPSQAEKRLE\n+ALTLELEKELEMHMKKEYFGICVKCGKGVYGASQACQAMGNLYHTNCFTCCSCGRRLRGK\n+AFYNVNGKVYCEEDFLYSGFQQTAEKCFVCGHLIMEMILQALGKSYHPGCFRCVVCKEGL\n+DGVPFTVDVENNIYCVKDYHTVFAPKCASCNQPILPAQGSEETIRVVSMDKDYHVECYHC\n+EDCGLQLNDEERHRCYPLEGHLLCHDCHILRLQSQVPAHAPPSYPLHVTEL\n+>XP_006779823.1 gi|583968741|ref|XP_006779823.1| PREDICTED: short-chain dehydrogenase/reductase family 42E member 1-like isoform X1 [Neolamprologus brichardi]\n+MGTASKETFLITGGSGYFGNRLALSLLKKGAKVILFDIIPPSQELPEDVVFVQGDIREYP\n+DVEKAVTGVDCVFHIASYGMSGREQLNRQLIEAVNVQGTQNILKACVEHGVSRLIYTSTF\n+NVVFGGQVIENGDESLPYLPLHLHPDHYSRTKSLADMAVLKANGTVLKGCSGLLSTCALR\n+PAGIYGPGEQRHLPRIVDYIEKGIFRFVYGKPSSLVEFVHVDNLVSAHVLAAEALTPEKQ\n+HRAAGQAYFISDGRPVNNFEFFRPLVEGLGYRFPTLRLPISLIYFFAFLTEMIHCLIGPF\n+YNFQPLLTRTEVYKTGVTHYFSMAKAKAELGYEPREYNLDEVVQWFRSRGHGKKCHRSFL\n+SRLLLNVLFVSALVAVSLSFLPVVGS\n+>XP_006779824.1 gi|583968743|ref|XP_006779824.1| PREDICTED: short-chain dehydrogenase/reductase family 42E member 1-like isoform X2 [Neolamprologus brichardi]\n+MGTASKETFLITGGSGYFGNRLALSLLKKGAKVILFDIIPPSQELPEDVVFVQGDIREYP\n+DVEKAVTGVDCVFHIASYGMSGREQLNRQLIEAVNVQGTQNILKACVEHGVSRLIYTSTF\n+NVVFGGQVIENGDESLPYLPLHLHPDHYSRTKSLADMAVLKANGTVLKGCSGLLSTCALR\n+PAGIYGPGEQRHLPRIVDYIEKGIFRFVYGKPSSLVEFVHVDNLVSAHVLAAEALTPEKQ\n+HRAAGQAYFISDGRPVNNFEFFRPLVEGLGYRFPTLRLPISLIYFFAFLTEMIHCLIGPF\n+YNFQPLLTRTEVYKTGVTHYFSMAKAKAELGYEPREYNLDEVVQWFRSRGHGKKCHRSFL\n+SRLLLNVLFVSALVAVSLSFLPVVGS\n'
b
diff -r ae4d5733272f -r 733ca84b21ee test-data/test.fasta
--- a/test-data/test.fasta Fri Oct 16 16:13:34 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,11 +0,0 @@
->ID1 desc
-GATACA
-
-
->ID2 desc
-GATACAGATACA
-GATACAGA
-TACAGATACA
->ID3 desc
-GATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGATACAGA
-TACAGATACA
b
diff -r ae4d5733272f -r 733ca84b21ee tool_dependencies.xml
--- a/tool_dependencies.xml Fri Oct 16 16:13:34 2015 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,6 +0,0 @@
-<?xml version="1.0"?>
-<tool_dependency>
-    <package name="biopython" version="1.65">
-        <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" />
-    </package>
-</tool_dependency>