Repository 'gafa'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/gafa

Changeset 1:fc8ca4ade638 (2017-02-20)
Previous changeset 0:af9f72ddf7f9 (2016-12-21) Next changeset 2:0c2f9172334a (2017-02-20)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 81a1e79dda127d1afc16c7e456bbec16093a3c3f-dirty
modified:
GAFA.py
GAFA.xml
test-data/gene.json
test-data/test.gafa.sqlite
added:
test-data/align1.fasta
test-data/align2.fasta
test-data/align3.fasta
test-data/align4.fasta
test-data/tree1.nhx
test-data/tree2.nhx
test-data/tree3.nhx
test-data/tree4.nhx
removed:
test-data/cigar.tabular
test-data/tree.nhx
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 GAFA.py
--- a/GAFA.py Wed Dec 21 07:31:50 2016 -0500
+++ b/GAFA.py Mon Feb 20 06:25:33 2017 -0500
[
@@ -1,10 +1,51 @@
 from __future__ import print_function
 
+import collections
 import json
 import optparse
+import re
 import sqlite3
 
-version = "0.1.0"
+version = "0.2.0"
+
+Sequence = collections.namedtuple('Sequence', ['header', 'sequence'])
+
+
+def FASTAReader_gen(fasta_filename):
+    fasta_file = open(fasta_filename)
+    line = fasta_file.readline()
+    while True:
+        if not line:
+            return
+        assert line.startswith('>'), "FASTA headers must start with >"
+        header = line.rstrip()
+        sequence_parts = []
+        line = fasta_file.readline()
+        while line and line[0] != '>':
+            sequence_parts.append(line.rstrip())
+            line = fasta_file.readline()
+        sequence = "".join(sequence_parts)
+        yield Sequence(header, sequence)
+
+
+FASTA_MATCH_RE = re.compile(r'[^-]')
+
+
+def fasta_aln2cigar(sequence):
+    # Converts each match into M and each gap into D
+    tmp_seq = FASTA_MATCH_RE.sub('M', sequence)
+    tmp_seq = tmp_seq.replace('-', 'D')
+    # Split the sequence in substrings composed by the same letter
+    tmp_seq = tmp_seq.replace('DM', 'D,M')
+    tmp_seq = tmp_seq.replace('MD', 'M,D')
+    cigar_list = tmp_seq.split(',')
+    # Condense each substring, e.g. DDDD in 4D, and concatenate them again
+    cigar = ''
+    for s in cigar_list:
+        if len(s) > 1:
+            cigar += str(len(s))
+        cigar += s[0]
+    return cigar
 
 
 def create_tables(conn):
@@ -29,38 +70,41 @@
     cur.execute('''CREATE TABLE transcript (
         transcript_id VARCHAR PRIMARY KEY NOT NULL,
         protein_id VARCHAR UNIQUE,
+        protein_sequence VARCHAR,
         gene_id VARCHAR NOT NULL REFERENCES gene(gene_id))''')
 
     cur.execute('''CREATE TABLE gene_family_member (
         gene_family_id INTEGER NOT NULL REFERENCES gene_family(gene_family_id),
         protein_id VARCHAR KEY NOT NULL REFERENCES transcript(protein_id),
-        alignment VARCHAR NOT NULL,
+        protein_alignment VARCHAR NOT NULL,
         PRIMARY KEY (gene_family_id, protein_id))''')
     conn.commit()
 
 
-def cigar_to_db(conn, i, fname):
+def align_to_db(conn, i, fname):
     cur = conn.cursor()
-    with open(fname) as f:
-        for element in f.readlines():
-            seq_id, cigar = element.rstrip('\n').split('\t')
-            # Trim seq_id by removing everything from the first underscore
-            seq_id = seq_id.split('_', 1)[0]
+    for fasta_seq_align in FASTAReader_gen(fname):
+        seq_id = fasta_seq_align.header[1:]
+        # Trim seq_id by removing everything from the first underscore
+        seq_id = seq_id.split('_', 1)[0]
 
-            cur.execute('SELECT transcript_id, protein_id FROM transcript WHERE transcript_id=? OR protein_id=?',
-                        (seq_id, seq_id))
-            results = cur.fetchall()
-            if len(results) == 0:
-                raise Exception("Sequence id '%s' could not be found among the transcript and protein ids" % seq_id)
-            elif len(results) > 1:
-                raise Exception("Searching sequence id '%s' among the transcript and protein ids returned multiple results" % seq_id)
-            transcript_id, protein_id = results[0]
-            if protein_id is None:
-                print("Skipping transcript '%s' with no protein id" % transcript_id)
-            else:
-                cur.execute('INSERT INTO gene_family_member (gene_family_id, protein_id, alignment) VALUES (?, ?, ?)',
-                            (i, protein_id, cigar))
-                conn.commit()
+        cur.execute('SELECT transcript_id, protein_id FROM transcript WHERE transcript_id=? OR protein_id=?',
+                    (seq_id, seq_id))
+        results = cur.fetchall()
+        if len(results) == 0:
+            raise Exception("Sequence id '%s' could not be found among the transcript and protein ids" % seq_id)
+        elif len(results) > 1:
+            raise Exception("Searching sequence id '%s' among the transcript and protein ids returned multiple results" % seq_id)
+        transcript_id, protein_id = results[0]
+        if protein_id is None:
+            print("Skipping transcript '%s' with no protein id" % transcript_id)
+        else:
+            cigar = fasta_aln2cigar(fasta_seq_align.sequence)
+            cur.execute('INSERT INTO gene_family_member (gene_family_id, protein_id, protein_alignment) VALUES (?, ?, ?)',
+                        (i, protein_id, cigar))
+            protein_sequence = fasta_seq_align.sequence.replace('-', '')
+            cur.execute('UPDATE transcript SET protein_sequence=? WHERE protein_id=?', (protein_sequence, protein_id))
+    conn.commit()
 
 
 def newicktree_to_db(conn, i, fname):
@@ -99,7 +143,7 @@
 def __main__():
     parser = optparse.OptionParser()
     parser.add_option('-t', '--tree', action='append', help='Gene tree files')
-    parser.add_option('-c', '--cigar', action='append', help='CIGAR alignments of CDS files in tabular format')
+    parser.add_option('-a', '--align', action='append', help='Protein alignments in fasta_aln format')
     parser.add_option('-g', '--gene', help='Gene features file in JSON format')
     parser.add_option('-o', '--output', help='Path of the output file')
     options, args = parser.parse_args()
@@ -111,9 +155,11 @@
 
     gene_json_to_db(conn, options.gene)
 
-    for i, (tree, cigar) in enumerate(zip(options.tree, options.cigar), start=1):
+    for i, (tree, align) in enumerate(zip(options.tree, options.align), start=1):
         newicktree_to_db(conn, i, tree)
-        cigar_to_db(conn, i, cigar)
+        align_to_db(conn, i, align)
+
+    conn.close()
 
 
 if __name__ == '__main__':
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 GAFA.xml
--- a/GAFA.xml Wed Dec 21 07:31:50 2016 -0500
+++ b/GAFA.xml Mon Feb 20 06:25:33 2017 -0500
[
@@ -1,4 +1,4 @@
-<tool id="gafa" name="Gene Align and Family Aggregator" version="0.1.0">
+<tool id="gafa" name="Gene Align and Family Aggregator" version="0.2.0">
     <description>generates an SQLite database that can be visualised with Aequatus</description>
     <command>
 <![CDATA[
@@ -7,7 +7,7 @@
     -t '$dataset'
 #end for
 #for $dataset in $alignmentFile:
-    -c '$dataset'
+    -a '$dataset'
 #end for
 -g '$genesFile'
 -o '$outputFile'
@@ -15,16 +15,16 @@
     </command>
     <inputs>
         <param name="treeFile" type="data" format="nhx" multiple="true" label="Gene tree" help="Gene tree in Newick format, e.g. generated from 'TreeBeST best' or 'Get gene tree by Ensembl ID' tool" />
-        <param name="alignmentFile" type="data" format="tabular" multiple="true" label="CDS alignments" help="CDS alignments in tabular format generated by 'T-Coffee to CIGAR' tool" />
-        <param name="genesFile" type="data" format="json" label="Gene features" help="Gene features in JSON format generated by 'GFF3 to JSON' or 'Get Ensembl features by ID' tool" />
+        <param name="alignmentFile" type="data" format="fasta" multiple="true" label="Protein alignments" help="Protein alignments in fasta_aln format generated by T-Coffee" />
+        <param name="genesFile" type="data" format="json" label="Gene features" help="Gene features in JSON format generated by 'GFF3 to JSON' or 'Get features by Ensembl ID' tool" />
     </inputs>
     <outputs>
         <data name="outputFile" format="gafa.sqlite" label="${tool.name} on ${on_string}" />
     </outputs>
     <tests>
         <test>
-            <param name="treeFile" ftype="nhx" value="tree.nhx" />
-            <param name="alignmentFile" ftype="tabular" value="cigar.tabular" />
+            <param name="treeFile" ftype="nhx" value="tree1.nhx,tree2.nhx,tree3.nhx,tree4.nhx" />
+            <param name="alignmentFile" ftype="fasta" value="align1.fasta,align2.fasta,align3.fasta,align4.fasta" />
             <param name="genesFile" ftype="json" value="gene.json" />
             <output name="outputFile" file="test.gafa.sqlite" compare="sim_size" />
         </test>
@@ -35,7 +35,7 @@
 
 Aequatus is an open-source homology browser developed with novel rendering approaches to visualise homologous, orthologous and paralogous gene structures.
 
-N.B.: The tool will modify the sequence identifiers found in the "CDS alignments" dataset by removing everything from the first underscore to the end of the string.
+N.B.: The tool will modify the sequence identifiers found in the "Protein alignments" dataset by removing everything from the first underscore to the end of the string.
 
 .. _Aequatus: http://aequatus.earlham.ac.uk
 ]]>
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/align1.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/align1.fasta Mon Feb 20 06:25:33 2017 -0500
b
@@ -0,0 +1,108 @@
+>ENST00000338702_homosapiens_1
+MENQEKASIAGHMFDVVVIGGGISGLSAAKLLTEYGVSVLVLEARDRVGG
+RTYTIRNEHVDYVDVGGAYVGPTQNRILRLSKELGIETYKVNVSERLVQY
+VKGKTYPFRGAFPPVWNPIAYLDYNNLWRTIDNMGKEIPTDAPWEAQHAD
+KWDKMTMKELIDKICWTKTARRFAYLFVNINVTSEPHEVSALWFLWYVKQ
+CGGTTRIFSVTNGGQERKFVGGSGQVSERIMDLLGDQVKLNHPVTHVDQS
+SDNIIIETLNHEHYECKYVINAIPPTLTAKIHFRPELPAERNQLIQRLPM
+GAVIKCMMYYKEAFWKKKDYCGCMIIEDEDAPISITLDDTKPDGSLPAIM
+GFILARKADRLAKLHKEIRKKKICELYAKVLGSQEALHPVHYEEKNWCEE
+QYSGGCYTAYFPPGIMTQYGRVIRQPVGRIFFAGTETATKWSGYMEGAVE
+AGERAAREVLNGLGKVTEKDIWVQEPESKDVPAVEITHTFWERNLPSVSG
+LLKIIGFSTSVTALGFVLYKYKLLPRS
+>ENST00000542639_homosapiens_1
+MGKEIPTDAPWEAQHADKWDKMTMKELIDKICWTKTARRFAYLFVNINVT
+SEPHEVSALWFLWYVKQCGGTTRIFSVTNGGQERKFVGGSGQVSERIMDL
+LGDQVKLNHPVTHVDQSSDNIIIETLNHEHYECKYVINAIPPTLTAKIHF
+RPELPAERNQLIQRLPMGAVIKCMMYYKEAFWKKKDYCGCMIIEDEDAPI
+SITLDDTKPDGSLPAIMGFILARKADRLAKLHKEIRKKKICELYAKVLGS
+QEALHPVHYEEKNWCEEQYSGGCYTAYFPPGIMTQYGRVIRQPVGRIFFA
+GTETATKWSGYMEGAVEAGERAAREVLNGLGKVTEKDIWVQEPESKDVPA
+VEITHTFWERNLPSVSGLLKIIGFSTSVTALGFVLYKYKLLPRS------
+--------------------------------------------------
+--------------------------------------------------
+---------------------------
+>ENSPTRT00000040520_pantroglodytes_1
+MENQEKASIAGHMFDVVVIGGGISGLSAAKLLTEYGVSVLVLEARDRVGG
+RTYTIRNEHVDYVDVGGAYVGPTQNRILRLSKELGIETYKVNVSERLVQY
+VKGKTYPFRGAFPPVWNPIAYLDYNNLWRTIDNMGKEIPNDAPWEAQHAD
+EWDKMTMKELIDKICWTKTARRFAYLFVNINVTSEPHEVSALWFLWYVKQ
+CGGTTRIFSVTNGGQERKFVGGSGQVSERIMDLLGDQVKLNHPVTHVDQS
+SDNIIIETLNHEHYECKYVINAIPPTLTAKIHFRPELPAERNQLIQRLPM
+GAIIKCMMYYKEAFWKKKDYCGCMIIEDEDAPISITLDDTKPDGSLPAIM
+GFILARKADRLAKLHKEIRKKKICELYAKVLGSQEALHPVHYEEKNWCEE
+QYSGGCYTAYFPPGIMTQYGRVIRQPVGRIFFAGTETATKWSGYMEGAVE
+AGERAAREVLNGLGKVTEKDIWVQEPESKDVPAVEITHTFWERNLPSVSG
+LLKIIGFSTSVTTLGFVLYKYKLLPRS
+>ENSSSCT00000013404_susscrofa_1
+MERQEKANNAGHMVDVVVIGGGISGLSAAKLLNEYGINVLVLEARDRVGG
+RTYTVRNENVDYVDVGGAYVGPTQNRILRLSKELGLETYKVNVNECLVQY
+VKGKSYPFRGAFPPVWNPIAYLDYNNLWRTMDDMGKKIPADAPWESPHAE
+EWDKMTMKDLIDKICWTKTAKRFASLFVNINVTSEPHEVSALWFLWYVKQ
+CGGTTRIFSVTNGGQERKFVGGSGQVSERIMHLLGDRVKLRCPVTYVDQS
+GDNIIVETLNHELYECQYVISAIPPTLTAKIHFRPELPSERNQLIQRLPM
+GAIIKCMMYYKEAFWKKKNYCGCMIIEDEEAPISITLDDTKPDGSLPAIM
+GFILARKADRLAKVHKEIRKRKICELYAKVLGSQEALHPVHYEEKNWCEE
+QYSGGCYTAYFPPGIMTQYGRVIRQPVGRIFFAGTETATQWSGYMEGAVE
+AGERAAREILNALGKVSKKDIWLREPESEDVPAFEITRTFWERNLPSVTG
+LLKIIGFSTSVTALWLAVYKFRLLTRS
+>ENSSSCT00000035258_susscrofa_1
+SLPAIMGFILARKADRLAKVHKEIRKRKICELYAKVLGSQEALHPVHYEE
+KNWCEEQYSGGCYTAYFPPGIMTQYGRVIRQPVGRIFFAGTETATQWSGY
+MEGAVEAGERAAREDVPAFEITRTFWERNLPSVTGLLKIIGFSX------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+---------------------------
+>ENSSSCT00000032764_susscrofa_1
+MDDMGKKIPADAPWESPHAEEWDKMTMKDLIDKICWTKTAKRFASLFVNI
+NVTSEPHEVSALWFLWYVKQCGGTTRIFSVTNGGQERKFVGGSGQVSERI
+MHLLGDRVKLRCPVTYVDQSGDNIIVETLNHELYECQYVISAIPPTL---
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+---------------------------
+>ENSCAFT00000022939_canisfamiliaris_1
+MASREKTSIEGHMFDVVVIGGGISGLSAAKLLAEHEVDVLVLEARDRVGG
+RTYTVRNEHVDYVDVGGAYVGPTQNRILRLSKELGLETYKVNVNERLVQY
+VKGKTYPFRGAFPPVWNPIAYLDYNNLWRTMDNMGKEIPADAPWEAPHAE
+EWDKMTMKDLIDKICWTKTARRFASLFVNINVTSEPHEVSALWFLWYVKQ
+CGGTTRIFSVTNGGQERKFVGGSGQVSERIMERLGDRVKLKRPVTYVDQS
+DDNIIIETLNHELYECKYVISAIPPTLTAKIHFRPELPSERNQLIQRLPM
+GAIIKCMMYYKEAFWKKKDYCGCMIIEDEEAPISITLDDTKPDGSLPAIM
+GFILARKADRLAKLHKEIRKRKICELYAKVLGSQEALQPVHYEEKNWCEE
+QYSGGCYTAYFPPGIMTHYGRVIRQPFGRIYFAGTETATHWSGYMEGAVE
+AGERTAREVLNALGRVAEKDLKTQEPESKDVPAMEITHTFWERNLPSVTG
+LLKLIGFTTSVTALWIVAYKFRLLRRS
+>ENSMUST00000026013_musmusculus_1
+MTDLEKPSITGHMFDVVVIGGGISGLAAAKLLSEYKINVLVLEARDRVGG
+RTYTVRNEHVKWVDVGGAYVGPTQNRILRLSKELGIETYKVNVNERLVQY
+VKGKTYPFRGAFPPVWNPLAYLDYNNLWRTMDDMGKEIPVDAPWQARHAE
+EWDKITMKDLIDKICWTKTAREFAYLFVNINVTSEPHEVSALWFLWYVRQ
+CGGTSRIFSVTNGGQERKFVGGSGQISEQIMVLLGDKVKLSSPVTYIDQT
+DDNIIIETLNHEHYECKYVISAIPPVLTAKIHFKPELPPERNQLIQRLPM
+GAVIKCMVYYKEAFWKKKDYCGCMIIEDEEAPISITLDDTKPDGSMPAIM
+GFILARKAERLAKLHKDIRKRKICELYAKVLGSQEALSPVHYEEKNWCEE
+QYSGGCYTAYFPPGIMTLYGRVIRQPVGRIYFAGTETATQWSGYMEGAVE
+AGERAAREVLNALGKVAKKDIWVQEPESKDVPALEITHTFLERNLPSVPG
+LLKITGFSTSVALLCFVLYKFKQPQS-
+>ENSRNOT00000066674_rattusnorvegicus_1
+MTDLEKPNLAGHMFDVVVIGGGISGLAAAKLLSEYKINVLVLEARDRVGG
+RTYTVRNEHVKWVDVGGAYVGPTQNRILRLSKELGIETYKVNVNERLVQY
+VKGKTYPFRGAFPPVWNPLAYLDYNNLWRTMDEMGKEIPVDAPWQARHAQ
+EWDKMTMKDLIDKICWTKTAREFAYLFVNINVTSEPHEVSALWFLWYVRQ
+CGGTARIFSVTNGGQERKFVGGSGQVSEQIMGLLGDKVKLSSPVTYIDQT
+DDNIIVETLNHEHYECKYVISAIPPILTAKIHFKPELPPERNQLIQRLPM
+GAVIKCMVYYKEAFWKKKDYCGCMIIEDEEAPIAITLDDTKPDGSLPAIM
+GFILARKADRLAKLHKDIRKRKICELYAKVLGSQEALYPVHYEEKNWCEE
+QYSGGCYTAYFPPGIMTQYGRVIRQPVGRIYFAGTETATQWSGYMEGAVE
+AGERAAREVLNALGKVAKKDIWVEEPESKDVPAIEITHTFLERNLPSVPG
+LLKITGVSTSVALLCFVLYKIKKLPC-
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/align2.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/align2.fasta Mon Feb 20 06:25:33 2017 -0500
b
@@ -0,0 +1,108 @@
+>ENSPTRT00000040521_pantroglodytes_1
+MSNKCDVVVVGGGISGKAAAKLLHDSGLNVVVLEARDRVGGRTYTLRNQK
+VKYVDLGGSYVGPTQNRILRLAKELGLETYKVNEVERLIHHVKGKSYPFR
+GPFPPVWNPITYLDHNNFWRTMDDMGREIPSDAPWKAPLAEEWDNMTMKE
+LLDKLCWTESAKQLATLFVNLCVTAETHEVSALWFLWYVKQCGGTTRIIS
+TTNGGQERKFVGGSGQVSERIMDLLGDRVKLERPVIYIDQTRENVLVETL
+NHEMYEAKYVISAIPPTLGMKIHFNPPLPMMRNQMITRVPLGSVIKCIVY
+YKEPFWRKKDYCGTMIIDGEEAPVAYTLDDTKPEGNYAAIMGFILAHKAR
+KLARLTKEERLKKLCELYAKVLGSPEALEPVHYEEKNWCEEQYSGGCYTT
+YFPPGILTQYGRVLRQPVDRIYFAGTETATHWSGYMEGAVEAGERAAREI
+LHAMGKIPEDEIWQSEPESVDVPAQPITTTFLERHLPSVPGLLRLIGLTT
+IFSATALGFLAHKRGLLVRV
+>ENST00000378069_homosapiens_1
+MSNKCDVVVVGGGISGMAAAKLLHDSGLNVVVLEARDRVGGRTYTLRNQK
+VKYVDLGGSYVGPTQNRILRLAKELGLETYKVNEVERLIHHVKGKSYPFR
+GPFPPVWNPITYLDHNNFWRTMDDMGREIPSDAPWKAPLAEEWDNMTMKE
+LLDKLCWTESAKQLATLFVNLCVTAETHEVSALWFLWYVKQCGGTTRIIS
+TTNGGQERKFVGGSGQVSERIMDLLGDRVKLERPVIYIDQTRENVLVETL
+NHEMYEAKYVISAIPPTLGMKIHFNPPLPMMRNQMITRVPLGSVIKCIVY
+YKEPFWRKKDYCGTMIIDGEEAPVAYTLDDTKPEGNYAAIMGFILAHKAR
+KLARLTKEERLKKLCELYAKVLGSLEALEPVHYEEKNWCEEQYSGGCYTT
+YFPPGILTQYGRVLRQPVDRIYFAGTETATHWSGYMEGAVEAGERAAREI
+LHAMGKIPEDEIWQSEPESVDVPAQPITTTFLERHLPSVPGLLRLIGLTT
+IFSATALGFLAHKRGLLVRV
+>ENSRNOT00000044009_rattusnorvegicus_1
+MSNKCDVIVVGGGISGMAAAKLLHDCGLSVVVLEARDRVGGRTYTIRNKN
+VKYVDLGGSYVGPTQNRILRLAKELGLETYKVNEVERLIHFVKGKSYAFR
+GPFPPVWNPITYLDYNNLWRTMDEMGQEIPSDAPWKAPLAEEWDYMTMKE
+LLDKICWTNSTKQIATLFVNLCVTAETHEVSALWFLWYVKQCGGTTRIIS
+TTNGGQERKFIGGSGQVSERIKDILGDRVKLERPVIHIDQTGENVVVKTL
+NHEIYEAKYVISAIPPVLGMKIHHSPPLPILRNQLITRVPLGSVIKCMVY
+YKEPFWRKKDFCGTMVIEGEEAPIAYTLDDTKPDGSCAAIMGFILAHKAR
+KLVRLTKEERLRKLCELYAKVLNSQEALQPVHYEEKNWCEEQYSGGCYTA
+YFPPGILTQYGRVLRQPVGKIFFAGTETASHWSGYMEGAVEAGERAAREI
+LHAIGKIPEDEIWQPEPESVDVPARPITNTFLERHLPSVPGLLKLLGLTT
+ILSATALGFLAHKKGLFVRF
+>ENSMUST00000040820_musmusculus_1
+MSNKSDVIVVGGGISGMAAAKLLHDCGLSVVVLEARDRVGGRTYTIRNKN
+VKYVDLGGSYVGPTQNRILRLAKELGLETYKVNEVERLIHFVKGKSYAFR
+GPFPPVWNPITYLDNNNLWRTMDEMGQEIPSDAPWKAPLAEEWDYMTMKE
+LLDKICWTKSTKQIATLFVNLCVTAETHEVSALWFLWYVKQCGGTTRIIS
+TTNGGQERKFIGGSGQVSERIKDILGDRVKLERPVIHIDQTGENVIVKTL
+NHEIYEAKYVISAIPPALGMKIHYSPPLPMLRNQLISRVPLGSVIKCMVY
+YKEPFWRKKDFCGTMVIEGEEAPIAYTLDDTKPDGTYAAIMGFILAHKAR
+KLVRLTKEERLRKLCELYAKVLNSQEALQPVHYEEKNWCEEQYSGGCYTT
+YFPPGILTQYGRVLRQPVGKIFFAGTETASHWSGYMEGAVEAGERAAREI
+LHAIGKIPEDEIWQPEPESLDVPARPITSTFLERHLPSVPGLLKLFGLTT
+ILSATALGFLAHKRGLFVHF
+>ENSMUST00000168613_musmusculus_1
+MSNKSDVIVVGGGISGMAAAKLLHDCGLSVVVLEARDRVGGRTYTIRNKN
+VKYVDLGGSYVGPTQNRILRLAKELGLETYKVNEVERLIHFVKGKSYAFR
+GPFPPVWNPITYLDNNNLWRTMDEMGQEIPSDAPWKAPLAEEWDYMTMKE
+LLDKICWTKSTKQIATLFVNLCVTAETHEVSALWFLWYVKQCGGTTRIIS
+TTNGGQGKMLL---------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------
+>ENSMUST00000163344_musmusculus_1
+MSNKSDVIVVGGGISGMAAAKLLHDCGLSVVVLEARDRVGGRTYTIRNKN
+VKYVDLGGSYVGPTQNRILRLAKELGLETYKVNEVERLIHFVKIYKADCH
+ALCEPVCNCRDP--------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------------------------------------
+--------------------
+>ENSSSCT00000033745_susscrofa_1
+MAAAKLLHDSGLSVIVLEARDRVGGRTYTVRNQQVKYVDLGGSYVGPTQN
+RILRLSKELGLETYKVNEVERLIHYVKGKSYPFRGPLPPVRNPITFLDLN
+NLWRTVDDMGREIPSDAPWKAPLAEQWDQMTMKELLDKLCWTESSKQLAT
+LFVNLCVTAETHEVSALWFLWYVKQCGGTTRIISTTNGGQERKFVGGSGQ
+VTERIKDLLGDRVKLERPVVHIDQTGENVLVETLNHEVYEAKYVISAIPP
+VLGMKIHFSPPLPMMRNQLITRVPLGSVIKCIVYYKEPFWRHKDYCGSMI
+IEGEEAPIAYTLDDSKPDGSCAAIIGFILAHKARKLARLTKEERLKKLCD
+LYAKVLGSKEALNPVHYEEKNWCEEQYSAGCYTTYFPPGIMTQYGRVLRQ
+PVGRIYFAGTETATHWSGYMEGAVEAGERAAREILHAMGKIPEDEIWQSE
+PESVDVPAKPITTTFLERHLPSVPGLLRLIGLTAIFSATALGYLAHKRGL
+LVRV----------------
+>ENSSSCT00000023183_susscrofa_1
+MAAAKLLHDSGLSVIVLEARDRVGGRTYTVRNQQVKYVDLGGSYVGPTQN
+RILRLSKELGLETYKVNEVERLIHYIKGKSYPFRGPLPPVRNPITFLDLN
+NLWRTVDDMGREIPSDAPWKAPLAEQWDQMTMKELLDKLCWTESSKQLAT
+LFVNLCVTAETHEVSALWFLWYVKQCGGTTRIISTTNGGQERKFVGGSGQ
+VTERIKDLLGDRVKLERPVVHIDQTGENVLVETLNHEVYEAKYVISAIPP
+VLGMKIHFSPPLPMMRNQLITRVPLGSVIKCIVYYKEPFWRHKDYCGSMI
+IEGEEAPIAYTLDDSKPDGSCAAIIGFILAHKARKLARLTKEERLKKLCD
+LYAKVLGSKEALNPVHYEEKNWCEEQYSAGCYTTYFPPGIMTQYGRVLRQ
+PVGRIYFAGTETATHWSGYMEGAVEAGERAAREILHAMGKIPEDEIWQSE
+PESVDVPAKPITTTFLERHLPSVPGLLRLIGLTAIFSATALGYLAHKRGL
+LVRV----------------
+>ENSCAFT00000022963_canisfamiliaris_1
+MSGKCDVVMVGGGISGMAAAKLLHDFGLNVVVLEARDRVGGRTYTIRNQK
+VKYLDLGGSYVGPTQNCILRLAKELGLETYKVNEVERLIHHVKGKSYPFR
+GPFPPVWNPIAYLDHNNLWRTMDDMGREIPSDAPWKAPLAEEWDHMTMKE
+LLDKICWTESAKQLATLFVNLCVTAETHEVSALWFLWYVKQCGGTTRIIS
+TTNGGQERKFVGGSGQVSERIMDLLGDQVKLERPVTHIDQTGENVLVETL
+NHEVYEAKYVISAIPPTLGMKIHFNPPLPMMRNQLITRVPLGSVIKCIVY
+YKEPFWRKKDYCGTMIIEGEEAPIAYTLDDTKPDGNYAAIMGFILAHKAR
+KLARLTKDERMKKLCELYAKVLGSQEALQPVHYEEKNWCEEQYSGGCYTT
+YFPPGIMTQYGRVLRQPVGRIYFAGTETATHWSGYMEGAVEAGERAAREI
+LHAMGKIPEDEIWQSEPESVDVPAQPITTTFLERHLPSVPGLLRLIGLTA
+IFSATALGVLAHKRGLLVRV
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/align3.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/align3.fasta Mon Feb 20 06:25:33 2017 -0500
b
b'@@ -0,0 +1,171 @@\n+>ENSPTRT00000034846_pantroglodytes_1\n+MDPECAQLLPALCAVLVDPRQPVADDTCLEKLLDWFKTVTEGESSVVLLQ\n+EHPCLVELLSHVLKVQDLSSGVLSFSLRLAGTFAAQENCFQYLQQGELLP\n+GLFGEAGPLGRAAWAVPTVRSGWIQGLRSLAQHPSALRFLADHGAVDTIF\n+SLQGDSSLFVASAASQLLVHVLALSMQGGAEGQPCLPGGDWPACAQRIMD\n+HVEESLCSAATPKVTQALNVLTTTFGRCQSPWTEALWVRLSPRVACLLER\n+DPIPAAHSFVDLLLCVARSPVFSSSDGSLWETVARALSCLGPTHMGPLAL\n+GILKLEHCPQALRTQAFQVLLQPLACVLKATVQAPGPPGLLDGTADDATT\n+VDTLLASKSSCAGLLCRTLAHLEELQPLPQRPSPWPQASLLGATVTVLRL\n+CDGSAAPASSVGGHLCGTLAGCVRVQRAALDFLGTLSQGTGPQELVTQAL\n+AVLLECLESPGSSPTVLKKAFQATLRWLLSSPKTPGCSDLGPLIPQFLRE\n+LFPVLQKRLCHPCWEVRDSALEFLTQLSRHWGGQADFRCALLASEVPELA\n+LQLLQDPESYVRASAVTAMGQLSSQGLHAPTSPEHAEARQSLFPELLHIL\n+SVDSEGFPRRAVMQVFTEWLRDGHADAARDTEQFVATVLQVASRDLDWEV\n+RAQGLELALVFLGQTLGPPRTHCPYAVALPEVAPAQPLTEALRALCHVGL\n+FDFAFCALFDCDRPVAQKSCDLLLFLRDKIASYSSLREARGGPNTASAEA\n+TLPRWRAGEQAQPPGDQEPEAVLAMLRSLDLEGLRSTLAESSDHVEKSPQ\n+SLLQDMLATGGFLQGD----------------------------------\n+---------EADCY---\n+>ENST00000340611_homosapiens_1\n+MDPECAQLLPALCAVLVDPRQPVADDTCLEKLLDWFKTVTEGESSVVLLQ\n+EHPCLVELLSHVLKVQDLSSGVLSFSLRLAGTFAAQENCFQYLQQGELLP\n+GLFGEPGPLGRATWAVPTVRSGWIQGLRSLAQHPSALRFLADHGAVDTIF\n+SLQGDSSLFVASAASQLLVHVLALSMRGGAEGQPCLPGGDWPACAQKIMD\n+HVEESLCSAATPKVTQALNVLTTTFGRCQSPWTEALWVRLSPRVACLLER\n+DPIPAAHSFVDLLLCVARSPVFSSSDGSLWETVARALSCLGPTHMGPLAL\n+GILKLEHCPQALRTQAFQVLLQPLACVLKATVQAPGPPGLLDGTADDATT\n+VDTLLASKSSCAGLLCRTLAHLEELQPLPQRPSPWPQASLLGATVTVLRL\n+CDGSAAPASSVGGHLCGTLAGCVRVQRAALDFLGTLSQGTGPQELVTQAL\n+AVLLECLESPGSSPTVLKKAFQATLRWLLSSPKTPGCSDLGPLIPQFLRE\n+LFPVLQKRLCHPCWEVRDSALEFLTQLSRHWGGQADFRCALLASEVPQLA\n+LQLLQDPESYVRASAVTAMGQLSSQGLHAPTSPEHAEARQSLFLELLHIL\n+SVDSEGFPRRAVMQVFTEWLRDGHADAAQDTEQFVATVLQAASRDLDWEV\n+RAQGLELALVFLGQTLGPPRTHCPYAVALPEVAPAQPLTEALRALCHVGL\n+FDFAFCALFDCDRPVAQKSCDLLLFLRDKIASYSSLREARGSPNTASAEA\n+TLPRWRAGEQAQPPGDQEPEAVLAMLRSLDLEGLRSTLAESSDHVEKSPQ\n+SLLQDMLATGGFLQGD----------------------------------\n+---------EADCY---\n+>ENSRNOT00000064726_rattusnorvegicus_1\n+MDAECSRLLPALCAVLADPRQLVADDTCLEKLLDWFKTVTEAEPSLQLLQ\n+DHPCLMELLSHVLKPQDVSPRVLSFALRLVGVFAAQEDCFQYLQQGELLL\n+GLFGETGALSWAAWSIPSVRSGWIQGLCSLAHHPSALHFLADSGAVDTIF\n+SLQGDPSLFVASAASQLLVHILALSMQGGAPGSPVPEAVAWPVCAQKIMN\n+HVEESLHSKATPQVTQALNVLTTTFGRCHNPWTGVLWERLSPPVARLFER\n+DPIPATHSLMDLLLSVARSPVLNFAACGLWEMLAQTLSRLSPTQAGPLAL\n+GTLKLQHCPQELRTQAFRVLLQPLACILKATTQAPGPPGLLDGTAGSLLT\n+VDALLSSKSACVGLLCQTLAHLEELQMLPQCPSPWPQVPLLQAAVTILRL\n+SDGSADPSSSAGGRLCGALGGCVRVQRAALDFLGTLSLGTSPLELVLEVF\n+AVLLKTLESPESSPMVLKKAFQATLRWLQSPHKTPSGSDLSPDALLFLGE\n+LFPILQKRLCSPCWEVRDSALEFLTHLIRDWGGQADFREALHSSEVPTLA\n+RQLLQDPESYVRASAVGAAGQLSSQGLQATPANQQGLLMDLMHILSTDSE\n+GFPRRAVLRVFTEWLRDGHADVVRDTEWFVATVLQAVSRDLDWEVRVQGL\n+ELAQVFLIQAMGQPRLHCPYTVGLPGATSSRPHLEFLQTLCRLPLFEFAF\n+CALLDCDRPVAQKACDLLLFLRDKTVSCSNPQEAGDSPNSASVEAALQRW\n+REGEQAQPLGDLEPEAMLAILRSLDLEGLQGRLAKSSDHVEKSPQSLLQD\n+MLATVGVLEENEADCY----------------------------------\n+-----------------\n+>ENSMUST00000041588_musmusculus_1\n+MDPECSRLLPALCAVLADPRQLVADDTCLEKLLDWFKTVTEAESSLQLLQ\n+DHPCLMELLSHVLKPQDVSPRVLSFALRLVGVFAAQEDCFEYLQQGELLL\n+GLFGESGAPGWAAWSIPSVRSGWIQGLCYLAHHPSALHFLADSGAVDTLF\n+SLQGDPSLFVASAASQLLVHILALSMQGGAPGSPVPEAAAWPMCAQKIVN\n+HVDESLHAKATPQVTQALNVLTTTFGRCHNPWTGVLWERLSPPVARLFER\n+DPIPAVHALMDLLLSVARSPVLNFAACGLWEMLAQTLSRLSPIQAGPLAL\n+GTLKLQHCPQELRTQAFGVLLQPLACILKATTQAPGPPGLLDGTVGSLLT\n+VDILLASKSACVGLLCQTLAHLEELQMLPQCPSPWPQVHLLQAALTILHL\n+CDGSADPSSSAGGRLCGTLGGCVRVQRAALDFLGTLSQGTSPLELVLEVF\n+AVLLKTLESPESSPMVLKKAFQATLRWLQNPHKTPSSSDLSSDALLFLGE\n+LFPILQKRLCSPCWEVRDSALEFLTHLIRHWGGQADFREALRSSEVPTLA\n+LQLLQDPESYVRASAVGAAGQLSSQGLQAAPASPENSQAQQGLLMDLMHI\n+LSTDSEGFPRRAVLRVFTDWLRDGHADVVRDTEWFVATVLQAVSRDLDWE\n+VRVQGLELARVFLTQALGQPSLHCPYTVGLPRASSPRPHPEFLQTLCRLP\n+LFEFAFCALLDCDRPVAQKACDLLLFLRDKTVPCSSPREAGDSPNSASVE\n+AALQRWREGEQAQPLGDLDPEAMLAILRALDLEGLQGRLAKSSDHVEKSP\n+QSLLQDMLATVGVLEENEADCY----------------------------\n+-----------------\n+>ENSMUST00000153440_musmusculus_1\n+MDPECSRLLPALCAVLADPRQLVADDTCLEKLLDWFKTVTEAESSLQLLQ\n+DHPCLMELLSHVLKPQDVSPRVLSFALRLVGVFAAQEDCFEYLQQGELLL\n+GLFGESGAPGWAAWSIPSVRSGWIQGLCYLAHHPSALHFLADSGAVDTLF\n+SLQGDPSLFVASAASQLLVHILALSMQGGAPG'..b'-----------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+-----------------\n+>ENSMUST00000110806_musmusculus_1\n+MDPECSRLLPALCAVLADPRQLVADDTCLEKLLDWFKTVTEAESSLQLLQ\n+DHPCLMELLSHVLKPQDVSPRVLSFALRLVGVFAAQEDCFEYLQQGELLL\n+GLFGESGAPGWAAWSIPSVRSGWIQGLCYLAHHPSALHFLADSGAVDTLF\n+SLQGDPSLFVASAASQLLVHILALSMQGGAPGSPVPEAAAWPMCAQKIVN\n+HVDESLHAKATPQVTQALNVLTTTFGRCHNPWTGVLWERLSPPVARLFER\n+DPIPAVHALMDLLLSVARSPVLNFAACGLWEMLAQTLSRLSPIQAGPLAL\n+GTLKLQHCPQELRTQAFGVLLQPLACILKATTQAPGPPGLLDGTVGSLLT\n+VDILLASKSACVGLLCQTLAHLEELQMLPQCPSPWPQVHLLQAALTILHL\n+CDGSADPSSSAGGRLCGTLGGCVRVQRAALDFLGTLSQGTSPLELVLEVF\n+AVLLKTLESPESSPMVLKKAFQATLRWLQNPHKTPSSSDLSSDALLFLGE\n+LFPILQKRLCSPCWEVRDSALEFLTHLIRHWGGQADFREALRSSEVPTLA\n+LQLLQDPESYVRASAVGAAGQLSSQGLQAAPASPENSQAQQVDTGSW---\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+-----------------\n+>ENSMUST00000100505_musmusculus_1\n+MVHFALSGLIPVAEGCGGGIRTPRLTRSRRERTSYPDLGGTACLIMDPEC\n+SRLLPALCAVLADPRQLVADDTCLEKLLDWFKTVTEAESSLQLLQDHPCL\n+MELLSHVLKPQDVSPRVLSFALRLVGVFAAQEDCFEYLQQGELLLGLFGE\n+SGAPGWAAWSIPSVRSGWIQGLCYLAHHPSALHFLADSGAVDTLFSLQGD\n+PSLFVASAASQLLVHILALSMQGGAPGSPVPEAAAWPMCAQKIVNHVDES\n+LHAKATPQVTQALNVLTTTFGRCHNPWTGVLWERLSPPVARLFERDPIPA\n+VHALMDLLLSVARSPVLNFAACGLWEMLAQTLSRLSPIQAGPLALGTLKL\n+QHCPQELRTQAFGVLLQPLACILKATTQAPGPPGLLDGTVGSLLTVDILL\n+ASKSACVGLLCQTLAHLEELQMLPQCPSPWPQVHLLQAALTILHLCDGSA\n+DPSSSAGGRLCGTLGGCVRVQRAALDFLGTLSQGTSPLELVLEVFAVLLK\n+TLESPESSPMVLKKAFQATLRWLQNPHKTPSSSDLSSDALLFLGELFPIL\n+QKRLCSPCWEVRDSALEFLTHLIRHWGGQADFREALRSSEVPTLALQLLQ\n+DPESYVRASAVGAAGQLSSQGLQAAPASPENSQAQQGLLMDLMHILSTDS\n+EGFPRRAVLRVFTDWLRDGHADVVRDTEWFVATVLQAVSRDLDWEVRVQG\n+LELARVFLTQALGQPSLHCPYTVGLPRASSPRPHPEFLQTLCRLPLFEFA\n+FCALLDCDRPVAQKACDLLLFLRDKTVPCSSPREAGDSPNSASVEAALQR\n+WREGEQAQPLGDLDPEAMLAILRALDLEGLQGRLAKSSDHVEKSPQSLLQ\n+DMLATVGVLEENEADCY\n+>ENSCAFT00000025950_canisfamiliaris_1\n+MDPECSQLLPALCAVLADPRQPVADDTCLEKLLDWFKTITKAGSSLLLLQ\n+ENPCLVELLCHVLKPQDLSSRVLSFSLRLAGVFAAQEDCFQYLQQGELLP\n+RLFGEPGPLGGAAWTAPSVRSGWIQGLRSLAQHPSALHFLADCGAVDTIF\n+SLQGDSSLFVASAAGQLLVHILGLSMQGLAEGHPSLQAGDWPVCAQKIVG\n+HIEESLHSTAVPQITQALNVLTTTFGHCHDPWTQVLWVRLSPLVGSLLEK\n+DPVPAAHSLVDLLLSVARSPGLSSSSCGLWETLAQTLNHLSPTQAGPLAL\n+GILKLQDCPQVLRAQAFVILLQPLACVLKATGQDPGPSGVLDSATGDSLT\n+VDVLLSSKAACVGLLCRTLAHLELLQPLPQRPCPWPQEPLLGAAVTLLQL\n+CRGSASPASDVGRHLCALLLGCVRVQRAALDFLGTLSQGTGPQELVTEVF\n+AVLLEYLRSPDSSPTVLKKAFQATFRWLLSSPKTPGCCDLEPHALLVLRE\n+LLPVLQKRLCSPCWEVRDSGLEFLTQMTRHWGGQAGFRHALLASEVPKLT\n+EQLLRDPESYVRASAVTVMGQLSSQGLHVTPVSPEHPGGQQKSLLVELLH\n+ILSTDSEGFPRRAVMQVFTQWLRDHHADVAGDTEQFMARVLQVASQDLDW\n+EVRAQGLELALVFLEQTLGQFHSHCPYAVTPPVAAPAGSLAQALQPLCRV\n+RLFEFAFRALFDCDRPVAQKSCDLLLFLRAKTASSSGSLEESRSGPDVAS\n+VEAALQRWQAGDQGQPLGDLEPEVVLAVLRSVDLEGLRGALAEGSDHMEK\n+SPQSLLQDMLATVGVL----------------------------------\n+---------GENEADCY\n+>ENSSSCT00000008295_susscrofa_1\n+MDPECSRLLPALCAVLADPRQPVADDTCLEKLLDWFKTITEAGSSLLVLQ\n+ENPCLVELLFDVLKPQDLSPRVLSFSLRLAGMFAAQEDCFQYLQQGELLP\n+RLFGEAGPLGGAAWTAPTVRSGWIQGLRSLAQHPRALPFLADCGAVDTIF\n+SLQGDSSLFVASAAGQLLVHILDLAMRAPPRGHPGPQACDGPACAQKIVC\n+HLEDSLRAAAAPQLTQALNVLTTTFGHCHGLWTPGLWGRLSPLVGHLLEK\n+DPVPASHALVDLLLSVARSPVLSSESGPWETLALTLSRLSPMQAGPLALG\n+ILKLQDCPQALRTQASGVLLQPLACVLKAAVQGPGRPGRLLGKGGRCRMA\n+GPWGPERILGAGAGCHPRCDCCSCCHPGXSALGLAQEARLRLCDNAAIPR\n+SDLGGQFCGHLVACVRVQRAALDFLGTLSQRAGPQELVTQVSAVLLEYLS\n+GPDSSPTVLKKAFQASLGWLLSSPKTPGCCDLDPHAQQFLRELLPVLQKR\n+LCSPCWEVRDSGLEFLTQMARHWGGQAAFRQVLLASEVPRLTRQLLQDPE\n+SYVRASAVTATGQLSSWGLHAAPAGPEHPGVQQKSLLSELLHVLATDSEG\n+FPRRAVMQVFTEWLRGGHADVAEDPEQFVAGVLQAASRDLDWEVRAQGLE\n+LALVFLEQLLGPPGVRCPYAVALPKAAPPGTLAQALQALCRVQLFEFAFR\n+ALFDCDRPVAQKSCDLLLFLKAKATSCGTPQEERDSPDVSSVEATLQKWQ\n+AGEHGHPLGDLEPAAVVAVLRSMDLEGLQDTLAESSDHVERSPQSLLQDM\n+LATVGVLGENEADCY-----------------------------------\n+-----------------\n'
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/align4.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/align4.fasta Mon Feb 20 06:25:33 2017 -0500
b
b'@@ -0,0 +1,290 @@\n+>ENSPTRT00000019089_pantroglodytes_1\n+IGTGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHEL\n+ENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLK\n+DLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNN\n+ELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATV\n+INGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPT\n+KCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG\n+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDS\n+VTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRS\n+YALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQ\n+GKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELL\n+KFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDA\n+CGSNSWTVVDIDPPLRSNDPKSQNHP--GWLMRGLKPWTQYAIFVKTLVT\n+FSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDP\n+NGNITHYLVFWERQAEDSELFELDYCLKWCVWCVCVVVCLWSVCEYEDSA\n+GECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFVPRKTSSGTGAEDPR\n+PSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEKVVNKES\n+LVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGP\n+VTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCVSRKHFA\n+LERGCRLRGLSPGNYSVRVRATSLAGNGSWTEPTYFYVTDYLDVPSNIAK\n+IIIGPLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVF\n+PCSVYVPDEWEVPREKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAV\n+KTVNESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKGQPTLVVMELM\n+AHGDLKSYLRSLRPEAENNPGRPPPTLQEMIQMAAEIADGMAYLNAKKFV\n+HRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPVRWMAPE\n+SLKDGVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLD\n+QPDNCPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHS\n+EENKAPESEELEMEFEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYE\n+EHIPYTHMNGGKKNGRILTLPRSNPS----------\n+>ENST00000341500_homosapiens_1\n+MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHEL\n+ENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLK\n+DLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNN\n+ELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATV\n+INGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPT\n+KCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG\n+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDS\n+VTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRS\n+YALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQ\n+GKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELL\n+KFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDA\n+CGSNSWTVVDIDPPLRSNDPKSQNHP--GWLMRGLKPWTQYAIFVKTLVT\n+FSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDP\n+NGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHN\n+QSEYEDSAGECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFVPRPSRK\n+RRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPFEKVVNKESLVIS\n+GLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEAKADDIVGPVTHE\n+IFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHLCVSRKHFALERG\n+CRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYLDVPSNIAKIIIG\n+PLIFVFLFSVVIGSIYLFLRKRQPDGPLGPLYASSNPEYLSASDVFPCSV\n+YVPDEWEVSREKITLLRELGQGSFGMVYEGNARDIIKGEAETRVAVKTVN\n+ESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKGQPTLVVMELMAHGD\n+LKSYLRSLRPEAENNPGRPPPTLQEMIQMAAEIADGMAYLNAKKFVHRDL\n+AARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGLLPVRWMAPESLKD\n+GVFTTSSDMWSFGVVLWEITSLAEQPYQGLSNEQVLKFVMDGGYLDQPDN\n+CPERVTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPSFPEVSFFHSEENK\n+APESEELEMEFEDMENVPLDRSSHCQREEAGGRDGGSSLGFKRSYEEHIP\n+YTHMNGGKKNGRILTLPRSNPS--------------\n+>ENST00000302850_homosapiens_1\n+MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHEL\n+ENCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLK\n+DLFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNN\n+ELCYLATIDWSRILDSVEDNYIVLNKDDNEECGDICPGTAKGKTNCPATV\n+INGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHSECLGNCSQPDDPT\n+KCVACRNFYLDGRCVETCPPPYYHFQDWRCVNFSFCQDLHHKCKNSRRQG\n+CHQYVIHNNKCIPECPSGYTMNSSNLLCTPCLGPCPKVCHLLEGEKTIDS\n+VTSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGYLKIRRS\n+YALVSLSFFRKLRLIRGETLEIGNYSFYALDNQNLRQLWDWSKHNLTITQ\n+GKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELL\n+KFSYIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDA\n+CGSNSWTVVDIDPPLRSNDPKSQNHP--GWLMRGLKPWTQYAIFVKTLVT\n+FSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSDP\n+NGNITHYLVFWERQAEDSELFELDYCLKGLKLPSRTWSPPFESEDSQKHN\n+QSEYEDSAGECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFVPRKTSS\n+GTGAEDPRPSRKRRSLGDVGNVTVAVPTVAAFPNTSSTSVPTSPEEHRPF\n+EKVVNKESLVISGLRHFTGYRIELQACNQDTPEERCSVAAYVSARTMPEA\n+KADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELHL\n+CVSRKHFALERGCRLRGLSPGNYSVRIRATSLAGNGSWTEPTYFYVTDYL\n+DVPSNI'..b'GLIEEISGFLKIRRS\n+YALVSLSFFRKLHLIRGETLEIGNYSFYALDNQNLRQLWDWNKHNLTITQ\n+GKLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELL\n+KFSFIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDA\n+CGSNSWTVVDIDPPQRSNDPKSQTPS--HPGWLMRGLKPWTQYAIFVKTL\n+VTFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPS\n+DPNGNITHYLVYWERQAEDSELFELDYCLKGLKLPSRTWSPPFESDDSQK\n+HNQSEYDDSASECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFVPRKT\n+SSGNGAEDTRPSRKRRSLEEVGNVTATTPTLPDFPNISSTIAPTSHEEHR\n+PFEKVVNKESLVISGLRHFTGYRIELQACNQDSPEERCSVAAYVSARTMP\n+EAKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEEL\n+HLCVSRKHFALERGCRLRGLSPGNYSVRVRATSLAGNGSWTEPTYFYVTD\n+YLDVPSNIAKIIIGPLIFVFLFSVVIGSIYLFLRKRQPDGPMGPLYASSN\n+PEYLSASDVFPSSVYVPDEWEVPREKITLLRELGQGSFGMVYEGNAKDII\n+KGEVETRVAVKTVNESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKG\n+QPTLVVMELMAHGDLKSHLRSLRPDAENNPGRPPPTLQEMIQMTAEIADG\n+MAYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKG\n+LLPVRWMSPESLKDGVFTASSDMWSFGVVLWEITSLAEQPYQGLSNEQVL\n+KFVMDGGYLDPPDNCPERLTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHP\n+SFPEVSFFYSEENKAPESEELEMEFEDMENVPLDRSSHCQREEAGCREGG\n+SSLSIKRTYDEHIPYTHMNGGKKNGRVLTLPRSNPS\n+>ENSRNOT00000067448_rattusnorvegicus_1\n+MGSGRGCETTAVPLLMAVAVAGGTAGHLYPGEVCPGMDIRNNLTRLHELE\n+NCSVIEGHLQILLMFKTRPEDFRDLSFPKLIMITDYLLLFRVYGLESLKD\n+LFPNLTVIRGSRLFFNYALVIFEMVHLKELGLYNLMNITRGSVRIEKNNE\n+LCYLATIDWSRILDSVEDNYIVLNKDDNEECGDVCPGTAKGKTNCPATVI\n+NGQFVERCWTHSHCQKVCPTICKSHGCTAEGLCCHKECLGNCSEPDDPTK\n+CVACRNFYLDGQCVETCPPPYYHFQDWRCVNFSFCQDLHYKCRNSRKPGC\n+HQYVIHNNKCIPECPSGYTMNSSNLMCTPCLGPCPKVCQILEGEKTIDSV\n+TSAQELRGCTVINGSLIINIRGGNNLAAELEANLGLIEEISGFLKIRRSY\n+ALVSLSFFRKLHLIRGETLEIGNYSFYALDNQNLRQLWDWNKHNLTITQG\n+KLFFHYNPKLCLSEIHKMEEVSGTKGRQERNDIALKTNGDQASCENELLK\n+FSFIRTSFDKILLRWEPYWPPDFRDLLGFMLFYKEAPYQNVTEFDGQDAC\n+GSNSWTVVDIDPPQRSNDPKSQTPSH--PGWLMRGLKPWTQYAIFVKTLV\n+TFSDERRTYGAKSDIIYVQTDATNPSVPLDPISVSNSSSQIILKWKPPSD\n+PNGNITHYLVYWERQAEDSELFELDYCLKGLKLPSRTWSPPFESDDSQKH\n+NQSEYDDSASECCSCPKTDSQILKELEESSFRKTFEDYLHNVVFVPRKTS\n+SGNGAEDTRPSRKRRSLEEVGNVTATTPTLPDFPNISSTIAPTSHEEHRP\n+FEKVVNKESLVISGLRHFTGYRIELQACNQDSPEERCSVAAYVSARTMPE\n+AKADDIVGPVTHEIFENNVVHLMWQEPKEPNGLIVLYEVSYRRYGDEELH\n+LCVSRKHFALERGCRLRGLSPGNYSVRVRATSLAGNGSWTEPTYFYVTDY\n+LDVPSNIAKIIIGPLIFVFLFSVVIGSIYLFLRKRQPDGPMGPLYASSNP\n+EYLSASDVFPSSVYVPDEWEVPREKITLLRELGQGSFGMVYEGNAKDIIK\n+GEVETRVAVKTVNESASLRERIEFLNEASVMKGFTCHHVVRLLGVVSKGQ\n+PTLVVMELMAHGDLKSHLRSLRPDAENNPGRPPPTLQEMIQMTAEIADGM\n+AYLNAKKFVHRDLAARNCMVAHDFTVKIGDFGMTRDIYETDYYRKGGKGL\n+LPVRWMSPESLKDGVFTASSDMWSFGVVLWEITSLAEQPYQGLSNEQVLK\n+FVMDGGYLDPPDNCPERLTDLMRMCWQFNPKMRPTFLEIVNLLKDDLHPS\n+FPEVSFFYSEENKAPESEELEMEFEDMENVPLDRSSHCQREEAGCREGGS\n+SLSIKRTYDEHIPYTHMNGGKKNGRVLTLPRSNPS-\n+>ENSSSCT00000014817_susscrofa_1\n+VCPGMDIRNNLTRLHELANCSVIEGHLQILLMFKTRPEDFRDLSFPKLIM\n+ITDYLLLFRVYGLESLKDLFPNLTVIRGSRLFFNYALVIFEMVHLKELGL\n+YNLMNITRGAVRIEKNNELCYLATIDWSRILDSVEDNYIVLNKDDNEECG\n+DICPGTAKGKTNCPATVINGQFVERCWTHSHCQKVCPTICKSHGCTAEGL\n+CCHSECLGNCSEPDDPTKCVACRNFYLDGRCVETCPPPYYHFQDWRCVNF\n+SFCQDLHNKCKNSRRQGCHQYVIHNNKCIPECPSGYTMNSSNLMCTPCLG\n+PCPKVCHLLEGEKTIDSVTSAQELRGCTIINGSLIINIRGGNNLAAELEA\n+NLGLIEEISGYLKIRRSYALVSLSFFRKLRLIRGETLEIGNYSFYALDNQ\n+NLRQLWDWSKHNLTITQGKLFFHYNPKLCLSEIHKMEEVSGTKGRQERND\n+IALKTNGDQASCENELLKFSYIRTSYDKILLKWEPYWPPDFRDLLGFMLF\n+YKEAPYQNVTEFDGQDACGSNSWTVVDIDPPTRSNDPKSQNHPGWLMRGL\n+KPWTQYAIFVKTLVTFSDERRTYGAK--SDIIYVQTDATSKHV-------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+--------------------------------------------------\n+------------------------------------\n'
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/cigar.tabular
--- a/test-data/cigar.tabular Wed Dec 21 07:31:50 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,64 +0,0 @@\n-ENSLACP00000008815\t477DM4D2M2795D18M6D16M4D2MD17M8D14MD68MD28M2D32M2D13M27DM3D13M6D10M6D5M3D36MD139MD43M5D96M2D83MD16M2D27M2D66M5D102M12D49M4D124M7D6M3D16MD23MD26M12D68M4D85M\n-ENSXETP00000060681\t4D22MD23M3D43MD3MD17M5D19M4D5MD3M3D42M2D3M2D28M2D6M6D17MD2M3D14M11D23M4D4M3D15M3DM6D37MD7MD11M3D3MD24M4D8M3D19M4D9MD10M4D16M8D18MD14M82D17M12DM9D23M15D70M5D4M2D22M4D28M4D35MD44M29D2MD10M115D5M8D7M4D3M5D23M4D24MD8M7D103MD2M2D5MD21M2D44M5D42MDM3D64M6D11M2D75M8D34M5D9MD6M4D8MD33M8D7M7D14M11D15M15D3M12D8MD25M14D6M15D3M21D25M81DM5D10M5D16M162DM4D11M5D12MD21MD25M6D5M6D18M23D8MD5MD24M2D25M9D9M5D4M2D30M4D23M10D4M26D31M2D14M28DM10D10M38D13M59D17MD3M2D3M5D6M20D28M2D3M2D18M4D10MD27M5D19M4D31MD28M7D38M13D2M66D33M27D74MD24M2D5M10D31M6D16MD5MD17M8D13M8D2MD35M22D8MD11MD10M2D32M2D13M27DM3D14MD14M4D7M3D36MD139MD43M5D7M3D86M2D83MD16M2D27M2D67M4D102M12D49M4D37MD20M4D17M16D15MD13M7D6MD8M3D7MD23MD13MD3MD8M12D4MD11MD33MD9M17D41M19D4M16D\n-ENSOANP00000032170\t3946D47M2D14M2D27M497D6M113D\n-ENSOANP00000024376\t4D46M3D47MD18MD22M4D8M4D19M11D12M2D3MD38MD14M2D25M11D11M3D9M4D4M3D6MD16MD8MD28M10D11M3D3MD19M10D12MD17MD11MD10M4D15M9D18MD14M82D21M3D6M9D23MD2MD62M3D17M2D6M2D24M2D16MD2MD36MD10MD17MD26M29D2MD10M115D12MD7M4D4M4D24M3D18M8D7M7D2MD21M4D34M3D27M3D6MDMD9MD21M2D29MD47M14D2M3D42M4D17M7D11M2D7M30DM6D31M8D34M5D9MD6MD32MD12M8D7M8D13M11D14M16D3M12D8MD25M5D15M2D10M3D4M4D21M3D21M53D12MD12M2D13MD20M162DM4D11MD64M6D16MD13M13D17MD5MD24M2D25MD26M2D14MD113M2D6M2D14M8D3M8D3M2D57M4D22M39D22MD3M2D24M10D6M2D16M3DM2D26MD10MD6M2D19M5D10MD25M2D43MDM7D37M82D33MD17M4D37MD21M8D23M6D8M2D14MD13MD11MD5M6D16MD5MD17M8D14MD8MD37MD16M3D2MD17MD10M2D32M2D37M3DM3D14MD17M2D6M3D36MD139MD43M5D7M3D86M2D84M2D14M2D27M2D66M5D102M12D25M4D20M4D36MD40M10D3M2D45MD18MD23MD13MD3MD6M19D56M16D35M3D3M39D\n-ENSMEUP00000009812\t122D20M4D9M3D19M11D12M2D3MD37MD15M2D5MD2M3D14M11D13MD9M4D4M3D6MD11M6D8MD10M2D17MD7MD6MD4M3D3M5D20M4D13MD17MD11MD10M4D42M97D21M8DM9D23M15D51M2D18M2D6M2D22M4D16M16D35MD44M29D2MD10M115D12M20D23M4D24MD8M7D2M9D13M176D14M14DM4D41M19DM9D11M2D8M36D31M8D34M2DM3D6M3D6M4D9MD12M11D8M9D7M8D13M11D10MD4M15D3M12D8MD25M5D15M4D8M3D3M5D6MD14MD19M2D2M53D6MD5MD12M6D9MD20M166D25M5D47M6D21M6D3M11D9M4D6MD30M2D25M3D24M2D30M2D9MD11M6D18M3D48M2D6M2D7M29D19MD9M5D61M24D31M2D2M32D29M2D64M5D19M4D60M7D37M82D33MD17M4D104M2D46M6D22MD17M8D15MD7M3D52M6D17MD11M2D57M6D7M3DM3D15MD16M6D2M3D36MD139MD43M5D7M3D86M2D55MD21MD5M3D15M2D94M8D99M12D25M4D20M4D37MD41M7D4M2D13M6D13M7D6MD18MD23MD13MD3MD8M12D4MD11MD28M2D2MD10M17D16M3D16M4D3M19D3M16D\n-ENSSHAP00000012162\t4D4M19D23M3D47MD18MD22M4D9M3D19M11D12M2D3MD38MD14M2D5MD2M3D14M11D23M4D4M3D6MD12M6D7MD10M3D16MD7MD5MD5M3D3M2D23M4D13MD17MD11MD10M4D16MD6MD8M17D8M82D21M3D6M9D23MD2M3D29MD30M2D18M2D6M2D42M4D6MDM4D32MD2MD26M7D11M29D2MD10M115D6M7D13M6D23M4D42M2D20M4D34M3D27M3D8MD8M2D21M7D17MD13MD37M14D5M3D42M5D33M3D7M30DM6D31M8D34M5D7M3D18MD17M4D12M8D7M8D13M16D10M15D3M12D8MD25M5D15M2D10M3D4M4D44M54D6M7D12M5D10MD20M162DM4D28MD47M6D30M11D9M4D6MD30M2D25M3D24M2D4M2D24M2D9MD15M5D13M2D51M2D6M2D6M28DM10D19M18D36M4D9M24D31M2D2M32D28M2D4MD21MD17M2D19M5D19M4D12M3D7M3D33MDM7D38M13D4M57D40MD17M4D55M2D9MD12MD18M2D4M2D14MD13MD17M6D16MD5MD17M8D14MD8MD37MD16M3D2MD17M2D9M2D32M2D25M5D7M3DM3D14MD3MD13MD7M3D36MD139MD43M5D7M3D86M2D83MD16M2D95M5D102M12D25M4D20M4D37MD41M5D4M9D14MD12M7D6MD18MD23MD12MD13M12D4MD11MD27M113D\n-ENSMODP00000033276\t4D22MD23M3D47MD18MD22M4D9M3D19M11D12M2D3MD38MD14M2D5MD2M3D14M11D23M4D4M3D6MD11M7D7MD11M2D16MD7MD5MD5M3D3MD24M4D13MD17MD11MD10M4D23M2D7M17D8M82D21M3D6M9D23MD34MD30M2D18M2D6M2D24M2D16MDMD7MDM4D32MD2MD27MD16M29D2MD10M115D12MD13M6D23M4D42MD21M4D34M3D27M3D8MD9MD21M7D17MD13MD37M14D5M3D42M6D42M30DM6D31M8D34M5D9MD6M3D9MD20MD12M8D7M8D13M11D12M18D3M12D8MD25M5D15M2D10M3D5M3D11M4D31M52D12MD12M6D9MD5MD14M162DM4D28MD47M6D30M11D9M2D4M2D2MD5MD24M2D25M3D24M2D30M3D8MD15M10D61M2D6M2D6M46D12M18D35M5D9M24D31M2D2M32D28M2D4MD21MD8M3D28M4D19M4D13M2D12M3D28MDM7D8M3D26M14D4M57D40MD17M7D9MD42M2D9MD12MD18M2D4M2D14MD13MD17M6D16MD5MD17M8D14MD8MD37MD16M3D2MD17M2D9M2D59M5D7M3DM3D14MD3MD13MDMD5M3D36MD1'..b'3M9D35M3D3M19D20M\n-ENSLOCP00000009962\t3570DM9D3M3D36MD139MD42MDM3D97M2D83M3D14M2D19M2D6M2D68M4D101M12D49M4D36MD23M2D62M12DMD18MD18M3D2MD13MD3MD12MD6M21D18M119D\n-ENSDARP00000099674\t14D11M2D18M8D25MD17M5D17M22D2M4D9M6D20M7D15M3D53M2D8M3D13M7D28M4D4M3D6M19D18MD18MD13M17D20M4D8M3DM3D5M12D10M84D57M140D2M3D17M2D4M5D21M36D35MD11M5D28M29D13M115D4M28D23M4D24MD8M7D24M20D18M23D7M22D21M3D16M43DM3D13M14DM6D40M28D11M2D8M45D22M8D34M6D15M2D24M8D11M6D10M8D13M11D19M11D5M10D8MD17M2D94M53D61M162D16MD100M11D19MD5MD24M2D84M18D35M15D30M104D16M4D7MD9M24D25M42D27M3D32M6D27M5D19M4D8M7D43M8D16M108D29MD17M12D4M2D22M4D39MD24M2D14M2D12MD45MD17M8D14MD8MD37M4D7M3DM9D27M22D51M6D16MD16MD7M3D36MD139MD41M6D8M3D86M2D84M2D14M2D18M3D6M2D68M4D101M12D48M5D36MD42M15D14M3D13M19D13MD23MD12M6D12MD4M20D28M112D\n-ENSAMXP00000013440\t10D12M5D23M3D89M4D9M35D15MD53M2D8M3D13M11D14MD9M4D6MD6M19D18MD18MD13M9D28M4D5M13D2M12D11MD5M65D14M4D47M145DM3D17M2D6M2D22M4D17MDMD6M6D10M4D21MD21M10D13M29D13M115D4M37D14M4D15MD8MD8M7D4M7D13M20D31MD16M105DM3D13M14D47M33D6M2D8M36D12M10D9M8D37M3D8MD6M3D58M7D14M11D15M15D3M12D8MD25M5D15M15D5M3D21M8D12M83D14MD20M162DM17D15M7D14M2D21M23D11M28D8MD2M4D24M2D25M5D21M73D13M17D36M96D37M24D6M4D17M13D8M19D27M13D30M6D19M5D19M4D5M10D43M8D37M83D26M2D5MDM34D29MD22MD10M3D20M19DM2D12MD16M6D17MD5MD39MD8M2D36M3D12M9D16MD44MD25M19D16MD12M9D3M3D36MD139MD44M3D8M3D86M2D83M3D14M2D19M2D6M2D68M4D101M12D48M5D36MD23M23D7M2D32M9D4MD18MD23MD13MDM3D6M7D11M12D24M117D\n-ENSGMOP00000010385\t3588D34MD8M10D31MD89MD43M5D6M11D29M2D48M2D84M16DM2D18M2D6M2D64M3DM4D101M12D49M4D37MD41M69D8MD37MD12M12D4MD6M9D17M2DM117D\n-ENSONIP00000006940\t14D11M2D23M3D6MD40M7D14M4D22M19D11M14D9M2D3MD53M2D8M3D36M3D26M19D7MD3M16D10MD7MD11M4D2MD3M5D28M9D5M10D11MD5M65DM48D55M3D6M9D19M3DMD2M12D7M10D34M3D30M134D32M3D2M2D123M37DM6D3M10D13M2D16M7D2MD21M4D26M336DM7D36M3D10MD6M4D29MD11M8D16M9D4M11D18M23D12MD25MD7M41D16M21D31MD11MD11M56D179M5D16M4D17M2D25M6D2M14D8M34D2MD5MD7M13D4M2D25M6D16M7D2M148D43M15D8M15D28MD53M7D4M2D3M4D13M17D54M32D31M36D41M48D61MD35M84DMD13M3D33M6DM2D12MD19M6D14MD22M8D24MD37M2D12M13D2M5D6MD12M2D29M14D15M4D4M3D11M3D8MD2MD11M2D47MD139MD55M3D86M2D84M2D14M2D19M2D6M2D35MD30MD106M3D2M7D49M4D36MD29M12D12M7D10M5D12M9D11M3D9MD23MD13MDM5D10MD6M18D26M114D\n-ENSPFOP00000001575\t4D7M3D11M2D23M3D6M2D39M7D35M4D9M3D15M33D53M2D8M3D13M7D28MDM35D6MD3M10D16MD7MD11M3D28M4D8M9D5M10D11MD5M65DM82D21M3D3M2DM9D23MD2M12D7M43DM3D2M4D11M2D6M2D22M56D15MD23M5D16M29D2M3D2M2D4M115D4M37DM6D7M4D2M12DM2D16M10D59M22D8M22D9M2D10M2D16M87D35M28D11M46D31M7D40MD61M5D10M7D12M13D19M26D8MD25M2D18M3D8M12D46M27D37MD48M166D10M3D16MD47M6D60MD3M3D51MD30MD27M2D51M2D51M2D14M8D3M8D22M15D51MD9M24D31M2D3M5D14M12D29M2D31M6D178M57D39MDM34D65M14D11M2D7M6DM2D12MD19M6D20MD16M9D14MD21MD38M13D13MD11M3D30M2D58MD3MD21M3D36MD139MD55M3D86M2D84M2D14M2D19M2D6M2D35MD30MDM4D101M12D49M4D36MD27M6D9M5DM9D13M5D12M7D6MD5M4D9MD23MD13MD11M12D5M12D28M113D\n-ENSXMAP00000006983\t4D7M3D11M2D23M3D6M2D39M7D35M23D12M14D9M2D3MD53M2D8M3D13M7D28MDM35D6MD3M10D16MD7MD11M3D3M3D22M4D8M9D5M10D11MD5M65DM82D21M3D3M2DM9D23MD2M12D7M48DM4D11M2D6M2D22M56D15MD23M5D16M29D2M3D8M115D4M37DM6D7M4D15M2D7MD8M7D2MD6M2D13M13D25M22D8M22D9M2D10M2D16M87D35M16D3M9D11M2D8M36D21M18D39MD8MD52M8D7M12D9M11D53MD25MD19M3D8M12D46M27D37MD48M166D10M3D16MD47M6D60MD5MD24M2D25MD30MD27M2D46M2D56M2D14M8D3M8D22M15D51MD9M24D31M2D3M5D14M12D29M2D23M3D8M3D178M57D39MDM34D16M7D28M2D23M3D11M2D7M6DM2D12MD19M6D14MD5MD16M8D15MD21MD38M13D13MD11M3D30M2D60MD23M3D36MD139MD55M3D86M2D84M2D14M2D19M2D6M2D35MD32M4D101M12D49M4D36MD27M6D9M5DM9D13M5D12M7D6MD5M4D9MD23MD13MD3M3D5M12D5M12D28M113D\n-ENSGACP00000015199\t3583D39MD8M7D124MD40M7D8M3D86M2D83M3D14M2D19M2D6M2D66M5D102M12D49M4D36MD53M7D10M5D12M9D11M3D9MD23MD13MDM5D10M5D7M12D22M119D\n-ENSTNIP00000002435\t272D13M2514D9M465D6MD17M8D14M4D2MD16M9D14MD46M10D4M13D13MD10M4D56M24D11MD16M6D2M3D36MD139MD55M3D6MD79M2D84M2D14M2D19M2D6M2D66MDM4D101M12D49M4D36MDM338D\n-ENSTRUP00000015030\t3587D35MD139MD55M3D6MD79M2D83M3D14M2D19M2D6M2D66MDM4D101M12D49M4D36MD45M2D6M2D15M5D12M7D6MD3MDM4D9MD23MD12M6D7M12D5MD5M6D22M119D\n'
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/gene.json
--- a/test-data/gene.json Wed Dec 21 07:31:50 2016 -0500
+++ b/test-data/gene.json Mon Feb 20 06:25:33 2017 -0500
[
b'@@ -1,1 +1,9586 @@\n-{"ENSTNIG00000016261":{"source":"ensembl","object_type":"Gene","logic_name":"ensembl","version":1,"species":"tetraodon_nigroviridis","description":"breast cancer 2, early onset [Source:ZFIN;Acc:ZDB-GENE-060510-3]","display_name":"brca2","assembly_name":"TETRAODON8","biotype":"protein_coding","end":4705074,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIG00000016261","Transcript":[{"source":"ensembl","object_type":"Transcript","logic_name":"ensembl","Exon":[{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4700679,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000057385","start":4700614},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701157,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000041338","start":4701103},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701424,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000031348","start":4701218},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701571,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000063263","start":4701502},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701608,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000054769","start":4701587},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701940,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000041082","start":4701626},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4702349,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000031470","start":4702170},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4702609,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191315","start":4702422},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4702859,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191316","start":4702689},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4703307,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191317","start":4702938},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4703539,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191318","start":4703384},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4703769,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191319","start":4703622},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4703962,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191320","start":4703850},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4704239,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191321","start":4704041},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4704504,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191322","start":4704338},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4704734,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191323","start":4704596},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4705074,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191324","start":4704818}],'..b'                "version": 1, \n+                  "species": "canisfamiliaris", \n+                  "strand": -1\n+               }, \n+               {\n+                  "assembly_name": "CanFam3.1", \n+                  "end": 14518681, \n+                  "start": 14518563, \n+                  "db_type": "core", \n+                  "object_type": "Exon", \n+                  "id": "ENSCAFE00000177706", \n+                  "seq_region_name": "6", \n+                  "version": 1, \n+                  "species": "canisfamiliaris", \n+                  "strand": -1\n+               }, \n+               {\n+                  "assembly_name": "CanFam3.1", \n+                  "end": 14518488, \n+                  "start": 14518302, \n+                  "db_type": "core", \n+                  "object_type": "Exon", \n+                  "id": "ENSCAFE00000177713", \n+                  "seq_region_name": "6", \n+                  "version": 1, \n+                  "species": "canisfamiliaris", \n+                  "strand": -1\n+               }, \n+               {\n+                  "assembly_name": "CanFam3.1", \n+                  "end": 14518095, \n+                  "start": 14518022, \n+                  "db_type": "core", \n+                  "object_type": "Exon", \n+                  "id": "ENSCAFE00000177726", \n+                  "seq_region_name": "6", \n+                  "version": 1, \n+                  "species": "canisfamiliaris", \n+                  "strand": -1\n+               }, \n+               {\n+                  "assembly_name": "CanFam3.1", \n+                  "end": 14517147, \n+                  "start": 14517045, \n+                  "db_type": "core", \n+                  "object_type": "Exon", \n+                  "id": "ENSCAFE00000177737", \n+                  "seq_region_name": "6", \n+                  "version": 1, \n+                  "species": "canisfamiliaris", \n+                  "strand": -1\n+               }, \n+               {\n+                  "assembly_name": "CanFam3.1", \n+                  "end": 14516938, \n+                  "start": 14516840, \n+                  "db_type": "core", \n+                  "object_type": "Exon", \n+                  "id": "ENSCAFE00000177744", \n+                  "seq_region_name": "6", \n+                  "version": 1, \n+                  "species": "canisfamiliaris", \n+                  "strand": -1\n+               }, \n+               {\n+                  "assembly_name": "CanFam3.1", \n+                  "end": 14516681, \n+                  "start": 14516506, \n+                  "db_type": "core", \n+                  "object_type": "Exon", \n+                  "id": "ENSCAFE00000177753", \n+                  "seq_region_name": "6", \n+                  "version": 1, \n+                  "species": "canisfamiliaris", \n+                  "strand": -1\n+               }, \n+               {\n+                  "assembly_name": "CanFam3.1", \n+                  "end": 14516182, \n+                  "start": 14515204, \n+                  "db_type": "core", \n+                  "object_type": "Exon", \n+                  "id": "ENSCAFE00000177757", \n+                  "seq_region_name": "6", \n+                  "version": 2, \n+                  "species": "canisfamiliaris", \n+                  "strand": -1\n+               }\n+            ], \n+            "biotype": "protein_coding", \n+            "species": "canisfamiliaris", \n+            "end": 14529569, \n+            "Translation": {\n+               "end": 14528678, \n+               "Parent": "ENSCAFT00000025950", \n+               "db_type": "core", \n+               "object_type": "Translation", \n+               "id": "ENSCAFP00000024092", \n+               "start": 14515481, \n+               "length": 824, \n+               "species": "canisfamiliaris"\n+            }, \n+            "is_canonical": "1", \n+            "db_type": "core", \n+            "strand": -1\n+         }\n+      ], \n+      "species": "canisfamiliaris", \n+      "strand": -1\n+   }\n+}\n'
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/test.gafa.sqlite
b
Binary file test-data/test.gafa.sqlite has changed
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/tree.nhx
--- a/test-data/tree.nhx Wed Dec 21 07:31:50 2016 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,1 +0,0 @@
-(((((((ENSTRUP00000015030:0.072273,ENSTNIP00000002435:0.113355):0.217419,ENSGACP00000015199:0.159219):0.015782,((ENSXMAP00000006983:0.045736,ENSPFOP00000001575:0.056524):0.320294,ENSONIP00000006940:0.283168):0.045551):0.201095,ENSGMOP00000010385:0.374304):0.133192,(ENSAMXP00000013440:0.462514,ENSDARP00000099674:0.551859):0.178712):0.155187,ENSLOCP00000009962:0.210044):0.153275,((((((((ENSMGAP00000015990:0.033173,ENSGALP00000027524:0.03556):0.083504,ENSAPLP00000007411:0.115288):0.028469,(ENSFALP00000008821:0.042194,ENSTGUP00000012130:0.064837):0.09653):0.300747,ENSPSIP00000012858:0.197004):0.039526,ENSACAP00000004459:0.35271):0.088637,((((((((((((((ENSBTAP00000001311:0.011479,ENSOARP00000011988:0.017264):0.045414,ENSSSCP00000022872:0.123054):0,ENSSSCP00000028073:0.069764):0.001255,ENSTTRP00000010004:0.034189):0.007047,ENSVPAP00000000821:0.052959):0.015307,ENSECAP00000013146:0.048224):0.000353,(((ENSAMEP00000009909:0.028288,ENSMPUP00000001928:0.031962):0.008616,ENSCAFP00000009557:0.043063):0.010565,ENSFCAP00000019777:0.056037):0.027617):0.000302,(ENSMLUP00000012516:0.064576,ENSPVAP00000000225:0.107453):0.018072):0.004506,(ENSEEUP00000008968:0.120512,ENSSARP00000002541:0.129791):0.016812):0.008809,((((((((((ENSP00000369497:0.001811,ENSPTRP00000009812:0.003163):0.000207,ENSGGOP00000015446:0.019894):0.004997,ENSPPYP00000005997:0.007654):0.002773,ENSNLEP00000001277:0.011887):0.003225,((ENSMMUP00000009432:0.001779,ENSPANP00000002726:0.00613):0.000661,ENSCSAP00000013938:0.004946):0.011844):0.008398,ENSCJAP00000034250:0.036115):0.032464,ENSTSYP00000000441:0.06932):0.004582,(ENSMICP00000010933:0.039893,ENSOGAP00000009477:0.07109):0.018724):0.005621,ENSTBEP00000013856:0.096277):0.00151,(((((ENSMUSP00000038576:0.034382,ENSRNOP00000001475:0.044347):0.156559,ENSDORP00000006609:0.117166):0.013586,ENSSTOP00000004979:0.081466):0.001026,ENSCPOP00000004635:0.137701):0.012286,(ENSOCUP00000014514:0.050845,ENSOPRP00000014082:0.126167):0.05329):0.004454):0.008989):0.011526,(ENSCHOP00000007822:0.056068,ENSDNOP00000034947:0.060711):0.028397):0.005552,((ENSLAFP00000002234:0.07083,ENSPCAP00000000440:0.210145):0.017597,ENSETEP00000003277:0.202287):0.047366):0.125005,((ENSMODP00000033276:0.091526,ENSSHAP00000012162:0.098879):0.013978,ENSMEUP00000009812:0.08857):0.153103):0.044879,(ENSOANP00000024376:0.007695,ENSOANP00000032170:0.039107):0.30595):0.104539):0.110316,ENSXETP00000060681:0.774548):0.1098,ENSLACP00000008815:0.318609):0.18077):0;
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/tree1.nhx
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tree1.nhx Mon Feb 20 06:25:33 2017 -0500
[
@@ -0,0 +1,13 @@
+((((ENSPTRT00000040520_pantroglodytes:0.003426[&&NHX:S=pantroglodytes],
+ENST00000338702_homosapiens:0.005715[&&NHX:S=homosapiens]
+):0.056362[&&NHX:D=N:S=prim:T=31:B=97],
+ENST00000542639_homosapiens:100000[&&NHX:E=$-pantroglodytes:S=homosapiens]
+):4e-06[&&NHX:D=Y:SIS=50:DCS=0.5000:S=prim:T=8:B=43],
+(ENSRNOT00000066674_rattusnorvegicus:0.024708[&&NHX:S=rattusnorvegicus],
+ENSMUST00000026013_musmusculus:0.020534[&&NHX:S=musmusculus]
+):0.126977[&&NHX:D=N:S=rod:T=31:B=100]
+):0[&&NHX:D=N:S=euarc:T=8:B=46],
+(ENSCAFT00000022939_canisfamiliaris:0.078848[&&NHX:S=canisfamiliaris],
+ENSSSCT00000013404_susscrofa:0.084278[&&NHX:S=susscrofa]
+):0.037628[&&NHX:D=N:S=laur:T=31:B=100]
+)[&&NHX:D=N:S=mamm:B=0];
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/tree2.nhx
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tree2.nhx Mon Feb 20 06:25:33 2017 -0500
[
@@ -0,0 +1,17 @@
+(((ENSPTRT00000040521_pantroglodytes:0.003576[&&NHX:S=pantroglodytes],
+ENST00000378069_homosapiens:0.00314[&&NHX:S=homosapiens]
+):0.033582[&&NHX:D=N:E=$-rod:S=prim:T=31:B=100],
+ENSCAFT00000022963_canisfamiliaris:0.05575[&&NHX:E=$-susscrofa:S=canisfamiliaris]
+):0.036438[&&NHX:D=N:S=mamm:T=31:B=81],
+((ENSRNOT00000044009_rattusnorvegicus:0.033002[&&NHX:S=rattusnorvegicus],
+((ENSMUST00000040820_musmusculus:0[&&NHX:S=musmusculus],
+ENSMUST00000168613_musmusculus:0.012852[&&NHX:S=musmusculus]
+):0[&&NHX:D=Y:SIS=100:DCS=1.0000:S=musmusculus:T=18:B=100],
+ENSMUST00000163344_musmusculus:0.139009[&&NHX:S=musmusculus]
+):0.023529[&&NHX:D=Y:SIS=100:DCS=1.0000:S=musmusculus:T=2:B=96]
+):0.059746[&&NHX:D=N:E=$-prim:S=rod:T=7:B=96],
+(ENSSSCT00000023183_susscrofa:0.002007[&&NHX:S=susscrofa],
+ENSSSCT00000033745_susscrofa:0[&&NHX:S=susscrofa]
+):556.372[&&NHX:D=Y:SIS=100:DCS=1.0000:E=$-canisfamiliaris:S=susscrofa:T=31:B=100]
+):0[&&NHX:D=N:S=mamm:T=7:B=81]
+)[&&NHX:D=Y:SIS=0:DCS=0.0000:DD=Y:S=mamm:B=0];
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/tree3.nhx
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tree3.nhx Mon Feb 20 06:25:33 2017 -0500
[
@@ -0,0 +1,17 @@
+((((ENSPTRT00000034846_pantroglodytes:0.008941[&&NHX:S=pantroglodytes],
+ENST00000340611_homosapiens:0.002566[&&NHX:S=homosapiens]
+):0.14728[&&NHX:D=N:S=prim:T=31:B=100],
+(ENSRNOT00000064726_rattusnorvegicus:0.05607[&&NHX:S=rattusnorvegicus],
+((ENSMUST00000041588_musmusculus:0.005599[&&NHX:S=musmusculus],
+ENSMUST00000110806_musmusculus:0.003481[&&NHX:S=musmusculus]
+):0[&&NHX:D=Y:SIS=100:DCS=1.0000:S=musmusculus:T=24:B=98],
+ENSMUST00000153440_musmusculus:0.020275[&&NHX:S=musmusculus]
+):0.076743[&&NHX:D=Y:SIS=100:DCS=1.0000:S=musmusculus:T=24:B=98]
+):0.21386[&&NHX:D=N:S=rod:T=31:B=100]
+):0.103346[&&NHX:D=N:S=euarc:T=31:B=72],
+ENSCAFT00000025950_canisfamiliaris:0.111975[&&NHX:E=$-susscrofa:S=canisfamiliaris]
+):0.210884[&&NHX:D=N:S=mamm:T=31:B=54],
+(ENSMUST00000100505_musmusculus:13.5088[&&NHX:E=$-rattusnorvegicus-prim:S=musmusculus],
+ENSSSCT00000008295_susscrofa:0.206967[&&NHX:E=$-canisfamiliaris:S=susscrofa]
+):0[&&NHX:D=N:S=mamm:T=31:B=55]
+)[&&NHX:D=Y:SIS=17:DCS=0.1667:S=mamm:B=0];
b
diff -r af9f72ddf7f9 -r fc8ca4ade638 test-data/tree4.nhx
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tree4.nhx Mon Feb 20 06:25:33 2017 -0500
[
@@ -0,0 +1,19 @@
+((((ENSPTRT00000019089_pantroglodytes:0.004346[&&NHX:S=pantroglodytes],
+(ENST00000302850_homosapiens:0[&&NHX:S=homosapiens],
+ENST00000341500_homosapiens:0[&&NHX:S=homosapiens]
+):0.001483[&&NHX:D=Y:SIS=100:DCS=1.0000:S=homosapiens:T=7:B=100]
+):0.130609[&&NHX:D=N:S=prim:T=7:B=100],
+(ENSRNOT00000041155_rattusnorvegicus:0.025292[&&NHX:S=rattusnorvegicus],
+ENSMUST00000091291_musmusculus:0.023349[&&NHX:S=musmusculus]
+):0[&&NHX:D=N:S=rod:T=27:B=100]
+):1.86081[&&NHX:D=N:S=euarc:T=19:B=96],
+((ENST00000600492_homosapiens:4.08746[&&NHX:E=$-pantroglodytes:S=homosapiens],
+ENSMUST00000207100_musmusculus:0[&&NHX:E=$-rattusnorvegicus:S=musmusculus]
+):5.88981[&&NHX:D=N:S=euarc:T=1:B=1],
+(ENSRNOT00000067448_rattusnorvegicus:0[&&NHX:S=rattusnorvegicus],
+ENSMUST00000208839_musmusculus:100000[&&NHX:S=musmusculus]
+):0[&&NHX:D=N:E=$-prim:S=rod:T=1:B=0]
+):2.09131[&&NHX:D=Y:SIS=33:DCS=0.3333:S=euarc:T=1:B=0]
+):0[&&NHX:D=Y:SIS=75:DCS=0.7500:S=euarc:T=3:B=39],
+ENSSSCT00000014817_susscrofa:4.61035[&&NHX:E=$-canisfamiliaris:S=susscrofa]
+)[&&NHX:D=N:S=mamm:B=0];