Repository 'treebest_best'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/treebest_best

Changeset 0:4f9e5110914b (2016-12-20)
Next changeset 1:fd85bf67c4bb (2016-12-20)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 136cba2b8c8a2ac2465e7e9420314f2511b991f2-dirty
added:
TreeBest_best_wrapper.xml
fasta_header_converter.py
fasta_header_converter.xml
test-data/dna_alignment.fasta
test-data/genetree.nhx
test-data/out.fasta
test-data/species.nhx
test-data/test.fasta
test-data/test.json
b
diff -r 000000000000 -r 4f9e5110914b TreeBest_best_wrapper.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/TreeBest_best_wrapper.xml Tue Dec 20 16:32:25 2016 -0500
[
@@ -0,0 +1,117 @@
+<tool id="treebest_best_wrapper" name="TreeBeST best" version="1.9.2">
+    <description>Generate a phylogenetic tree using CDS alignment and species tree</description>
+     <requirements>
+        <requirement type="package" version="1.9.2_ep78">treebest</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+    <command>
+<![CDATA[
+treebest best
+-f $species_tree
+$P
+$S
+$A
+#if $C
+    -C $C
+#end if
+$s
+$g
+$N
+-c $c
+-d $d
+-l $l
+-L $L
+-b $b
+#if $k.k_selector == 'yes'
+    -k e
+#else
+    -k $k.k_value
+#end if
+#if $a.a_selector == 'yes'
+    -a e
+#else
+    -a $a.a_value
+#end if
+-F $F
+-p "Galaxy_TreeBest_Best"
+$aln
+> $output
+]]>
+    </command>
+
+    <inputs>
+        <param name="species_tree" type="data" format="nhx" label="Species file in Newick format" help="-f"/>
+        <param name="aln" type="data" format="fasta" label="CDS alignment in FASTA format"/>
+        <param name="P" type="boolean" truevalue="-P" falsevalue="" label="Skip PHYML" help="(-P)" />
+        <param name="S" type="boolean" truevalue="-S" falsevalue="" label="Ignore the probability of gene evolution (NOT recommended)" help="(-S)" />
+        <param name="A" type="boolean" truevalue="-A" falsevalue="" label="Apply constraint to PHYML" help="(-A)" />
+        <param name="r" type="boolean" truevalue="-r" falsevalue="" label="Discard species that do not appear at all" help="(-r)" />
+        <param name="C" type="data" format="nhx" optional="true" label="Constraining tree" help="Optional (-C)" />
+        <param name="s" type="boolean" truevalue="-s" falsevalue="" label="Only build tree for genes from sequenced species" help="(-s)" />
+        <param name="g" type="boolean" truevalue="-g" falsevalue="" label="Collapse alternative splicing forms" help="(-g)" />
+        <param name="N" type="boolean" truevalue="-N" falsevalue="" label="Do not mask low-scoring segments" help="(-N)" />
+        <param name="F" type="integer" min="1" value="11" label="Quality cut-off" help="(-F)" />
+        <param name="c" type="integer" min="1" value="2" label="Number of rate categories for PHYML-HKY" help="(-c)" />
+        <conditional name="k">
+            <param name="k_selector" type="select" label="tv/ts ratio (kappa)" help="(-k)">
+                <option value="yes">Estimate (e)</option>
+                <option value="no">Specify value</option>
+            </param>
+            <when value="yes" />
+            <when value="no">
+                <param name="k_value" type="float" value="0.0" label="tv/ts ratio (kappa)" />
+            </when>
+        </conditional>
+        <conditional name="a">
+            <param name="a_selector" type="select" label="Alpha parameter for Gamma distribution" help="(-a)">
+                <option value="yes">Estimate (e)</option>
+                <option value="no">Specify value</option>
+            </param>
+            <when value="yes" />
+            <when value="no">
+                <param name="a_value" type="float" value="0.0" label="Alpha parameter for Gamma distribution" />
+            </when>
+        </conditional>
+        <param name="d" type="float" min="0" value="0.15" label="Duplication probability" help="(-d)" />
+        <param name="l" type="float" min="0" value="0.10" label="Probability of a loss following a speciation" help="(-l)" />
+        <param name="L" type="float" min="0" value="0.20" label="Probability of a loss following a duplication" help="(-L)" />
+        <param name="b" type="float" min="0" value="0.01" label="Probability of the presence of an inconsistent branch" help="(-b)" />
+    </inputs>
+
+    <outputs>
+        <data name="output" format="nhx" label="$tool.name on ${on_string}" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="species_tree" ftype="nhx" value="species.nhx" />
+            <param name="aln" ftype="fasta" value="dna_alignment.fasta" />
+            <param name="F" value="11" />
+            <param name="k_selector" value="yes" />
+            <param name="a_selector" value="yes" />
+            <param name="d" value="0.15" />
+            <param name="l" value="0.10" />
+            <param name="L" value="0.20" />
+            <param name="b" value="0.01" />
+            <output name="output" file="genetree.nhx" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+`TreeBeST`_ (gene Tree Building guided by Species Tree) is a versatile program that builds, manipulates and displays phylogenetic trees. It is particularly designed for building gene trees with a known species tree and is highly efficient and accurate.
+
+The 'best' command builds the best gene tree from a species tree and a CDS alignment. The resultant tree will be bootstrapped for 100 times, reconciled with the species tree and rooted by minimizing with the number of duplications and losses. Duplications and losses are also stored in the NHX format.
+
+Note that TreeBeST first determines the topology of resultant tree with a complex procedure, and then performs a hundred times of resampling with an improved neighbour-joining algorithm. Branch lengths are finally estimated with the standard ML method under the HKY model.
+
+.. _TreeBeST: http://treesoft.sourceforge.net/treebest.shtml
+]]>
+    </help>
+    <citations>
+        <citation type="doi">10.1080/10635150390235520</citation>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 4f9e5110914b fasta_header_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_header_converter.py Tue Dec 20 16:32:25 2016 -0500
[
@@ -0,0 +1,39 @@
+from __future__ import print_function
+
+import json
+import optparse
+
+
+def read_gene_info(gene_info):
+    transcript_species_dict = dict()
+    for gene_dict in gene_info.values():
+        for transcript in gene_dict['Transcript']:
+            transcript_species_dict[transcript['id']] = transcript['species'].replace("_", "")
+    return transcript_species_dict
+
+
+parser = optparse.OptionParser()
+parser.add_option('-j', '--json', dest="input_gene_filename",
+                  help='Gene feature information in JSON format')
+parser.add_option('-f', '--fasta', dest="input_fasta_filename",
+                  help='Sequences in FASTA format')
+options, args = parser.parse_args()
+
+if options.input_gene_filename is None:
+    raise Exception('-j option must be specified')
+
+if options.input_fasta_filename is None:
+    raise Exception('-f option must be specified')
+
+with open(options.input_gene_filename) as json_fh:
+    gene_info = json.load(json_fh)
+transcript_species_dict = read_gene_info(gene_info)
+
+with open(options.input_fasta_filename) as fasta_fh:
+    for line in fasta_fh:
+        line = line.rstrip()
+        if line.startswith(">"):
+            name = line[1:].lstrip()
+            print(">" + name + "_" + transcript_species_dict[name])
+        else:
+            print(line)
b
diff -r 000000000000 -r 4f9e5110914b fasta_header_converter.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fasta_header_converter.xml Tue Dec 20 16:32:25 2016 -0500
[
@@ -0,0 +1,32 @@
+<tool id="fasta_header_converter" name="FASTA header converter" version="0.1.1">
+    <description>to append species information</description>
+    <command>
+<![CDATA[
+python '$__tool_directory__/fasta_header_converter.py'
+-f '$fastaFile'
+-j '$genesFile'
+> '$outputFile'
+]]>
+    </command>
+    <inputs>
+        <param name="fastaFile" type="data" format="fasta" label="FASTA file" help="FASTA file with transcript ID as FASTA ID" />
+        <param name="genesFile" type="data" format="json" label="Gene feature information" help="In JSON format" />
+    </inputs>
+    <outputs>
+        <data format="fasta" name="outputFile" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="fastaFile" ftype="nhx" value="test.fasta" />
+            <param name="genesFile" ftype="json" value="test.json" />
+            <output name="outputFile" file="out.fasta" />
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+Simple converter for FASTA files, which appends the species name to the FASTA header for usage in TreeBeST. It uses gene feature information in JSON format (similar to the result of Ensembl REST API - lookup/id).
+    ]]>
+    </help>
+    <citations>
+    </citations>
+</tool>
b
diff -r 000000000000 -r 4f9e5110914b test-data/dna_alignment.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/dna_alignment.fasta Tue Dec 20 16:32:25 2016 -0500
b
b'@@ -0,0 +1,168 @@\n+>ENSLOCT00000017020_lepisosteusoculatus\n+ATGGACAAAGGGCAGAAACCCGAAATTGAGAAGAAAGATGTAATTGTCATTGGGGGTGGT\n+TTATCTGGCTTGTCTGCTGCTAAACTGCTGAAAGAGTCTGGATTGAATGTCCTGGTTCTG\n+GAAGCACGTGATCGTGTGGGTGGGAGAACGTTGACTGTAAAAGGGCCTGAATTCCAATAT\n+GTGGATCTTGGAGGTGCTTATGTTGGTCCAACTCAGAACAGCATTCTGAGACTTGCAAAA\n+GAACTGGGGGTGAAGACTCATTTAGTAAACGAGAAAGAGCGACTCATTCATTATTATAAG\n+GGTAAGACGTATCCATTTCGAGGGCCATTTCCTCCAGCATGGAACCCCATCGTCTACCTG\n+GATTACAACAACCTCTGGAGGACTCTGGACCGAATGGGAAAGGAGATCCCAGCAGATGCT\n+CCATGGACTGCTCAACATGCCACACAGTGGGACAATATGACGATGAAAGAACTAATTGAC\n+AAACATTGCTGGACAAGGACTGCAAAAAACTTTGCAACTCTCTTCGTCAATGTCAATGTG\n+ACCTCTGAACCTCATGAAGTCTCAGCGCTCTGGTTTCTTTGGTATGTGAAGCAGTGTGGA\n+GGGACAAAAAGGATATTTTCAACATCTAATGGAGGACAGGAAAGAAAATTCATGGGGGGA\n+TCAGGGCAGATTAGTGAACGGATCGCAGACCGGCTGAAGGGCAACGTCCACCTAAACCAG\n+CCTGTGATTAAGCTCTTGCAGACTCCAGCTGGTGTGTGTGTCGAAACACTAACTGGAAAT\n+AAGTATGAGGCAGCCTATGCCATCAGTGCTATTCCACCAGGGCTGAGCATGAGTTTGCAT\n+TATGAGCCCCAGCTGCCTCCAGTGAGAAACCAAATGATCCAAAGAGTCCCAATGGGGTCC\n+ATCATTAAATGCATGATGTACTACAAGAGGACCTTCTGGAGAGAGAAGGGGTACTGCGGA\n+ACAATGATGATCGAAGATGAGGACTCTCCCATTTCAATGACCCTAGATGACACCAAACCA\n+GATGGATCGTATCCCTGTATTATGGGGTTTGTTTTAGCAAGAAAAGCAAGAGATCTTATC\n+AATTTGACTAAGGATGAGAGG---AAAAAACGTATCTGTCAGATTTATGCAAAAGTGCTG\n+GGAACAAATGAAGCATTGCATCCTGTTCACTATGAAGAAAAAGACTGGTGTGAAGAACAA\n+TATTCAGGAGGCTGTTATACTGCCTATTTTCCTCCAGGTACCTTCTGCCAGTTCAGCAGA\n+GTTCTAAGGGAGCCTTTTGGTAGGCTCTTCTTCGCAGGCACAGAGACAGCAACCAGATGG\n+AGTGGATATATGGATGGTGCTGTTCAAGCTGGTGAGAGAGCTGGAAGAGAGGTACTGCAT\n+---GCAATGGGTAAAGTATCCCAGTCAAAAATCTGGAAGGAAGACGTAGAATCACAGGAA\n+GTTCCAGCTGAACCAATTACAACAAGTTTCTTAGAAGAGCATCTGCCTTCTGTCCCTGCA\n+TTTCTGACAATGATCGGAGTCTCTTTTGCATTTATCACAACAGCCGCAGCCCTTGGGTCG\n+GGCATCTTAAAGACAAATTCACGTTTACGTCTCTTTCAT\n+>ENSLACT00000014274_latimeriachalumnae\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+---------------------------------------TTTTTTCTTTTTTTCTCCCAG\n+GGTAAGTCGTATCCCTTTAGGGGTCCTTTCCCTCCTATGTGGAACCCCATTGCCTATATG\n+GATTACAACAACCTGTGGAGAACAATGGACGAAATGGGGAAGGAGATCCCCAAGGAAGCC\n+CCGTGGAAGGCCCCACATGCTGAAGAATGGGATAAAATTACCATGAAGGATCTGATTGAC\n+AAAATCTGCTGGACAGGTGTTGCCAAGCAGTTTGCCACGCTGTTTGTGAATGTCAACGTT\n+ACTTCTGAACCCCATGAGGTCTCTGCTCTCTGGTTCCTGTGGTATGTGAAGCAGTGCGGT\n+GGAACGACCAGGATCTTTTCAACAACCAATGGTGGACAGGAGCGTAAGTTTGTTGGCGGT\n+TCAAGCCAGATCAGTGAGAAGATGATGGATCTCCTTGGAGATCAGGTGAAACTGCAGAGA\n+CCTGTAGTCGGCATTGATCAGTCTGGGGAGGATGTGGTCGTGGAGACTCTAAACCGTGAG\n+AAGTATGAGGCCAAATATGTCATTAGTGCCATTCCTCCAACCCTTAGTGTGAAGATTCAC\n+TACAACCCACCTTTGCCGCCTTTGAGAAACCAGTTAATCCATCGTGTTCCTTTGGGATCC\n+GTTATCAAGTGCATGGTGTATTACAAGGACTCATTTTGGAGAAAGCAGGATTACTGTGGC\n+AGCATGATAATTGAAGAAGAGGATGCTCCAATTGGGCTGACACTGGATGACACCAAACCA\n+GATGGCAGTGTGCCTGCCATTATGGGATTCATTCTTGCCCGAAAAGCCAGAAGACTAGCA\n+CATTTCACCAAAGAAGAGCGA---AAGAAGAAAATCTGTGAACTTTATGCAAGAGTTCTT\n+GGATCACAGGAAGCTTTACAGCCTGTACACTATGAAGAGAAGAACTGGTGTGAGGAGGAG\n+TACTCGGGCGGGTGTTACACTGCTTACTTCCCACCAGGAATAATGACCCAGTTTGGCAAG\n+GTAATCCGTCAGCCAGTTGGCAGAATTTTCTTTGCCGGCACTGAGACGGCTACCGAGTGG\n+AGCGGCTACATGGAGGGGGCAGTGCAGGCTGGAGAGAGAGCAGCCAGAGAGATCTTATGT\n+---TCAATGAGAAGAATTCCTGAAAGTGAAATCTGGAAGCCTGAGCCAGAGTGTCTGGAT\n+GTCCCAGCTTCTCCGATCACCACCGCGTTCTGGGAGAGGAACCTTCCATCTGTCCCTGGG\n+TTTCTGAGGCTGCTTGGATTCTCC---GCGTTTCTCACCTCGACAGCGGCAGTAGGGCTC\n+TTT------GCCTACAAGAGGGGACTGCTGATTAAGAAC\n+>ENSLACT00000009516_latimeriachalumnae\n+ATGTCTGTT------------------GAGAAAAGAGATGTGATCATTGTTGGAGGTGGT\n+CTATCAGGTCTGTCAGCTGCAAAATTGCTAGCAGAGGCGGGGCTGAACGTCTTGGTTCTG\n+GAGGCCAGGAACCGTGTTGGTGGAAGAACGTACACAGTTCAAGGCCCAGAGTTCCAGTAC\n+GTAGACCTTGGGGGAGCATATGTTGGACCCACCCAGAATCGCATACTAAGAGTCGCAAAG\n+GAGTTTGGGGTTAAGACTTACCTGGTAAATGAAAAGGAGCAACTGATTCATTATGTCAAG\n+GGAAAGTCATACCCCTTCCAAGGGCCTTTCCCTCCTGCATGGAACCCAATTGTCTATCTG\n+GATTACAACAATCTCTGGAGGACTTTAGATGAGATGGGACGAGAGATTCCTGCAGAAGCT\n+CCTTGGAAGGCACCCTGTGCTGAAGCGTGGGACAACATGACAATGAAGCAGCTGATTGAT\n+AAACTCTGCTGGACGAGGGTAGCCAAGGACTTTGCAACGCTATTTGTCAACGTGAAT'..b'TGACACCAAGCCC\n+GATGGCACTGTTCCTGCTATTATGGGTTTCATCCTTGCCCGAAAAGCTAGGAGGCTGGCA\n+CATATCACAAAAGAAGAAAGG---AAAACGAAAATCTGTGAACTTTATGCAAGGGTTCTT\n+GGATCAGAGGAAGCTTTACATCCAGTACACTATGAAGAGAAGAACTGGTGCGAGGAGGAG\n+TACTCGGGCGGGTGTTACGCTGCTTACTTCCCACCAGGAATAATGACCCAGTTTGGCAAG\n+GTAATCCGTCAGCCAGTTGGCAGAATTTTCTTTGCTGGCACTGAGACGGCTACCGAGTGG\n+AGCGGCTACATGGAGGGGGCAGTGCAGGCTGGAGAGAGAGCAGCCAGAGAGGTACGGTGC\n+---AGAAATGTGATC---------------------------------------------\n+------------------------------------------------------------\n+------------------------------------------------------------\n+------------------------ATAACATACCTACTG\n+>ENSLACT00000014695_latimeriachalumnae\n+---------------------------------AATTTTCTACTTCTCCAAAGAAACTTT\n+CTTTTAGGTCTGTCGGCCGCCAAGCTGCTGACTGAGTCTGGGCTGAATGTGGTTGTGTTG\n+GAGGCCCGTGACAGGGTTGGAGGAAGAACCTTCACCACCAGGAATAAGCATGTTAAGTAT\n+GTGGACCTTGGCGGAGCCTATGTGGGGCCGACGCAGAATCGCATTCTACGCCTGTCGAAG\n+GAGCTGGGCATTGAGACGTATAAAGTGAATGAAGTGGAGCGCTTCATCCATCACATAAAG\n+GGAAAATCATATCCTTTCCAGGGACCATTCCCTCCAATGTGGAATCCACTTGCCTTCCTG\n+GATTATAATAACCTCTGGAGAACACTGGATGAAATGGGAAAAGAGATCCCCAGGGAAGCC\n+CCGTGGAAGGCCCCACATGCCGAAGAATGGGACAAAATGACCATGAAGGATCTGATTGAC\n+AAAATCTGCTGGACAGAAGCTGCCAAGCAGTTTGCCACGCTGTTTGTGAATGTCAACGTT\n+ACTTCTGAGCCCTGTGAGGTCTCTGCTCTCTGGTTCCTGTGGTACGTGAAGCAGTGCGGT\n+GGAACGACCAGGATCTTTTCAATTGGTAATGGTGGGCAGGAGCGCAAGTTTGTCGGAGGT\n+TCAGGTCAGATCAGTGAGAAGATGATGGAGATCCTTGGAGATCGGGTGAAACTACAGGGA\n+CCTGTAATCAGCATTGATCAGTCCGGAGACGGTGTGGTCGTGGAAACTCTGAACCATGAG\n+AAGTATGAGGCCAAATATGTCATTAGTGCCATTCCCCCATCCCTTAGCATGAAGATTCAT\n+TACAAGCCACCTTTGCCACCAATACGAAACCAGTTTATCTATCGAGTTCCGATTGGATCT\n+GTTATCAAGTGTATGGTCTATTACAAGGAAGCCTTTTGGAAAAAGAAGGATTTCTGTGGC\n+TGTATGATGATTGAAGATGAAGAAGCTCCTATCGGTGTGACTCTGGATGACACCAAGCCC\n+GATGGCACTGTTCCTGCTATTATGGGTTTCATCCTTGCCCGAAAAGCTAGGAGGCTGGCA\n+CATATCACAAAAGAAGAAAGG---AAAACGAAAATCTGTGAACTTTATGCAAGGGTTCTT\n+GGATCAGAGGAAGCTTTACATCCAGTACACTATGAAGAGAAGAACTGGTGCGAGGAGGAG\n+TACTCGGGCGGGTGTTACGCTGCTTACTTCCCACCAGGAATAATGACCCAGTTTGGCAAG\n+GTAATCCGTCAGCCAGTTGGCAGAATTTTCTTTGCTGGCACTGAGACGGCTACCGAGTGG\n+AGCGGCTACATGGAGGGGGCAGTGCAGGCTGGAGAGAGAGCAGCCAGAGAGAGTAGACGC\n+CATGCAGCATCATCAAAGACCAGGGAGAAGCCGGGGGGAACAAAGCCTGACAAACACGAT\n+GTCCCAGCTTCTCCGATCACCACCGCGTTCTGGGAGAGGAACCTTCCATCTGTCCCTGGG\n+TTTCTGAGGCTGCTTGGATTCTCC---GCGTTTCTCACCTCGACAGCGGCAGTAGGGCTC\n+TTT------GCCTACAAGAGGGGACTGCTGATTAAGAAC\n+>ENSLACT00000026689_latimeriachalumnae\n+ATGACCAGC------------------AATAAATATGATGTAATTGTCATTGGAGCAGGC\n+ATCTCAGGTCTGTCGGCTGCCAAGCTGCTGACTGAGTCTGGGCTGAATGTGGTTGTGTTG\n+GAGGCCCGTGACAGGGTTGGAGGAAGAACCTTCACCACCAGGAATAAGCATGTTAAGTAT\n+GTGGACCTTGGCGGAGCCTATGTGGGGCCAACGCAGAATCGCATTCTACGCCTGTCGAAG\n+GAGCTGGGCATTGAGACGTATAAAGTGAATGAAGTGGAACGCCTCATCCATCATGTCAAG\n+GGTAAGTCGTATCCCTTTAGGGGTCCTTTCCCTCCTATGTGGAACCCCATTGCCTATATG\n+GATTACAACAACCTGTGGAGAACAATGGACGAAATGGGGAAGGAGATCCCCAAGGAAGCC\n+CCGTGGAAGGCCCCACATGCTGAAGAATGGGATAAAATTACCATGAAGGATCTGATTGAC\n+AAAATCTGCTGGACAGGTGTTGCCAAGCAGTTTGCCACGCTGTTTGTGAATGTCAACGTT\n+ACTTCTGAACCCCATGAGGTCTCTGCTCTCTGGTTCCTGTGGTATGTGAAGCAGTGCGGT\n+GGAACGACCAGGATCTTTTCAACAACCAATGGTGGACAGGAGCGTAAGTTTGTTGGCGGT\n+TCAAGCCAGATCAGTGAGAAGATGATGGATCTCCTTGGAGATCAGGTGAAACTGCAGAGA\n+CCTGTAGTCGGCATTGATCAGTCTGGGGAGGATGTGGTCGTGGAGACTCTAAACCGTGAG\n+AAGTATGAGGCCAAATATGTCATTAGTGCCATTCCTCCAACCCTTAGTGTGAAGATTCAC\n+TACAACCCACCTTTGCCGCCTTTGAGAAACCAGTTAATCCATCGTGTTCCTTTGGGATCC\n+GTTATCAAGTGCATGGTGTATTACAAGGACTCATTTTGGAGAAAGCAGGATTACTGTGGC\n+AGCATGATAATTGAAGAAGAGGATGCTCCAATTGGGCTGACACTGGATGACACCAAACCA\n+GATGGCAGTGTGCCTGCCATTATGGGATTCATTCTTGCCCGAAAAGCCAGAAGACTAGCA\n+CATTTCACCAAAGAAGAGCGA---AAGAAGAAAATCTGTGAACTTTATGCAAGAGTTCTT\n+GGATCACAGGAAGCTTTACAGCCTGTACACTATGAAGAGAAGAACTGGTGTGAGGAGGAG\n+TACTCGGGCGGGTGTTACACTGCTTACTTCCCACCAGGAATAATGACCCAGTTTGGCAAG\n+GTAATCCGTCAGCCAGTTGGCAGAATTTTCTTTGCCGGCACTGAGACGGCTACCGAGTGG\n+AGCGGCTACATGGAGGGGGCAGTGCAGGCTGGAGAGAGAGCAGCCAGAGAGATCTTATGT\n+---TCAATGAGAAGAATTCCTGAAAGTGAAATCTGGAAGCCTGAGCCAGAGTGTCTGGAT\n+GTCCCAGCTTCTCCGATCACCACCGCGTTCTGGGAGAGGAACCTTCCATCTGTCCCTGGG\n+TTTCTGAGGCTGCTTGGATTCTCC---GCGTTTCTCACCTCGACAGCGGCAGTAGGGCTC\n+TTT------GCCTACAAGAGGGGACTGCTGATTAAGAAC\n'
b
diff -r 000000000000 -r 4f9e5110914b test-data/genetree.nhx
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/genetree.nhx Tue Dec 20 16:32:25 2016 -0500
[
@@ -0,0 +1,11 @@
+(ENSLOCT00000017020_lepisosteusoculatus:0.258673[&&NHX:E=$-186625:S=lepisosteusoculatus],
+(ENSLACT00000009516_latimeriachalumnae:0.149849[&&NHX:S=latimeriachalumnae],
+((ENSLACT00000014274_latimeriachalumnae:0.011979[&&NHX:S=latimeriachalumnae],
+ENSLACT00000026689_latimeriachalumnae:0[&&NHX:S=latimeriachalumnae]
+):0.040268[&&NHX:D=Y:SIS=100:DCS=1.0000:S=latimeriachalumnae:T=31:B=100],
+(ENSLACT00000014694_latimeriachalumnae:0.022411[&&NHX:S=latimeriachalumnae],
+ENSLACT00000014695_latimeriachalumnae:0.019166[&&NHX:S=latimeriachalumnae]
+):0.074979[&&NHX:D=Y:SIS=100:DCS=1.0000:S=latimeriachalumnae:T=31:B=100]
+):0.296006[&&NHX:D=Y:SIS=100:DCS=1.0000:S=latimeriachalumnae:T=31:B=100]
+):0[&&NHX:D=Y:SIS=100:DCS=1.0000:E=$-32523:S=latimeriachalumnae:T=31:B=100]
+)[&&NHX:D=N:S=117571:B=0];
b
diff -r 000000000000 -r 4f9e5110914b test-data/out.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out.fasta Tue Dec 20 16:32:25 2016 -0500
b
b'@@ -0,0 +1,11171 @@\n+>ENSLACT00000008884_latimeriachalumnae\n+ATGGAATGGGAAGCAGTTGAAAGTGTCAAAGCTTTAATGCGAGATGATGAGCTTACAGAT\n+GCAGGACTAGATGCAAGCAAAGACAGTTTGAACCGTGCATGCAGGAGACAGTCAGGTGGA\n+AATTTCCGAGCTAGGAAGCGGATGAGACTTGAACAAGTGTCGGCTGATGAGCCACCAGTT\n+AAGAGGCAACTCTTGGCTGAATTTGACAGGACGGTTGAAAATGGCCATAAGTCACTTCAG\n+AAGCCTTTAATATGCACTCCAAATGGTACGCTGAAAGACAGAAGGAAGTTTATGTATAGT\n+GTTCCCCTTAAGCCTGTAGTGTGTGGTCCATGGAGCAACAACTCCAAAACTGGGCAACAG\n+GTCACAAAACCCAGCATTACTTTGCCTGGCAGAGGAGTAGAAACATTTCAACCCAAGAAC\n+CACATTGCTCCCAGTCCAGTTTATGATCCACCTTCAAATAGACGAGGCCCAGTCTTTGCT\n+CCACCATTTCATGGGGCCACGTTTCGGGGACTGCAGAAACCAAGTGCTTCACACACATCA\n+AGCAAAACTGCTAAAACTTTTGTTCCTCCATTTAAAATGAAAGCCAGTGCCTCCCACACA\n+GTACATTTCAGTAGCAAAGTTATCAACACTTGTGAAAAGATTTTAGAAAATCTAGTCTAC\n+TTGAAGCCTAGTTTAGCCTCTTGTAATATTTTTCAAAGTTTGGAAGAGATGACTGCTAAC\n+CTTCAGTGTGCCAGAGACCTACAGGAAATGAGGCTGAGAAAGAAGCAAAGGCAAAATATC\n+CGGCCGCAGCCAGGCAGCCTTTACCTTGCTAAAACATCTGGTGTTGCTAGGGTCTCCCTG\n+AAGGCAGCTACAGGAAATCAGTGCCCATCCTCCTACTCCACAGAGCAGTTATATGTTCAT\n+GGTGTAGGAAAAAGCACCTTGAAAGTACGCAGTGAGAATGCAGAATCTTTTCAGTTCAGT\n+TGCAGTGATTATTTTGGTAAAGATGTTCTCCTAGCTGGAAATGGCTTGAAGCTGGCAGAT\n+GGAGGGTGGCTTATACCCAGTGATAAAGGAATGGTAGGAAAAGAAGAATTTTACAGGGCA\n+CTGTGCGATACACCTGGTGTGGCCCCAAAGCTTATTAGTGAATCTTGGGTCTACAATCAT\n+TACAGATGGATTGTATGGAAGCTTGCAGCGATGGAGGCAGCTTTTCCAAAAGAATTTGGC\n+AACCGGTGTCTAACACCAGAGAGGGTGCTGCTACAGCTTAAATACAGGTATGACATTGAA\n+GTTGACAAGTGTCGAAGATCTACTGTAAAGAAGATAATGGAAAGAGATGACACTGCCGCC\n+AAGACACTTGTGCTGTGCATTTCGAAACTTATATCCGTGGAGGACCGTTTTAAACAAACC\n+AAAAATAAGAATGAAAAGGGTGCTGAAGAAGCTAGGAAAGAAGCAGTAGCTGGAGTCATT\n+GAAACTACGGATGGGTGGTATGGGATTAAGGTACTCTTGGATCCTCCCCTTACAGTGCTG\n+GTACAGAGAGGAAGGCTGTCAGTTGGCTGTAAAATTATAACACATGGAGCTGAAATAATT\n+GGCTCCCAGGATGCCTGTACACCACTGGAAGCTCCGGAGTGTCTCATGCTAAAGATTTCT\n+GCGAACAGTACTCGACCTGCTTGCTGGAGTGCTAAACTTGGGTTTCATCGAGATCCCCGA\n+CCTTTTCCTCTCCCATTAGCATCGCTTTTTAATGATGGTGGATTAGTTGGCTGTGTTGAT\n+GTTGTTGTAGTGCGACTGTACCCCATACAGTGGATGGAAAAGAAATCAGATGGGATTTTT\n+GTGTTTCGTAATGATCGAGCAGAAGAAAGAGAGGCTCAAAGGCAAGTTGAGAATCAGCAA\n+AGAAAAATGGAAAGTTTGTTTGCAAAGATTCAAACTGAATTTGAACAGAAATATGAAGCC\n+AAAAGCAAAAGGAGAGGCCAAAAGGCACAGAAATTCAGCAAGCAGGAAATCCAGGCTCTT\n+CAAGATGGTGCAGAACTGAATGAAGCAATTGAGAATTCAATGGATCCGGGTTACTTTGAG\n+GCTTGCTTAAGAGAGGAGCAGTTAAAAGTCCTGCATGGCCACAGACAAATGTTAAATGAA\n+AAGAAACAAGCAGAGTTCCAGGCAGAGTTCAAGAAGGCACTGGAGTCTGCTGAACAGGAG\n+GGGAAGAGCTGCTGCAAGCGAGGTGTAACCCCTGTTTGGAAACTACGCATTGTGGACTAT\n+AGAAAACCAAGTGCTGCAGAATATATATTAAATATTTGGCGACCATTGGCTGATCTGCAT\n+TCCCTGTTAAAGGAAGGTAACCGGTACAGAATTTACCAGTTACTTGCATCACAATCCAAA\n+GGAAGAACCACTACTGCTGACATACAGCTAACAGCTACAAAGAAAACCCAGTATCAGCAG\n+TTCCAGTCTTTCCCAGAATTGATATCAGAGCTGTACAGTCCAAGAAAAGCTGTTAAATTC\n+AACATGTTGATGGATCCAACTTTCCGACCAGCCTATGCAGAAGTAGACCTGGTAGGATAT\n+ACCATCTCTATAGAAGGAAAGCCAGGTGTTGCTCCAGTGGTGTACCTATCTGATGAAAGT\n+CATAACTTTGTGGCAATAAAAGTTTGGACTGCCCTAAATCAGCTTGCTGTTGAAGACATT\n+GTGAAGCCATTTTCACTGATTGCTGCAAGTAACTTACAGTGGAGATCAGACAGCAGATCA\n+ATAATTCCTATGTTGTATGCTGGGGACCTTTCAATATTTTCATCAAATCCAAAAGAAGGG\n+CATCTTCAAGAGGCTTTTAATCAAAGAAGGACTGCTATACAAGAGAACATTTCTGGTACA\n+TACCTCCCCCCAGAAAAAAAAAATCTGCATCAGGAGTCTTATAAATCATGTCAATACAAC\n+ACATTGAATGTTTTGATGAATGGAAACATACACACACAGAGCCCAGTGCTGTCCAGGGTT\n+CATATGGGTACATCCTGTGCTTTTCTCTTTCTACTGCCTTCACCCTACCCTGAAAGTAAA\n+CACACTAGTCCTTTGATAACTATGAAGGCAGGAGTCAAATCTATGACTTTCCCAGGCTCT\n+GCAAAACTAATGCCACAAGCAAGTGAAAATCAAGAACTGGATACTCCCAAGAATCGTAAA\n+AAGAAGGCAGCTTTAGACTATCTTTGCCGCATTCCTTCCCCACCTGCACTTACTCCTATT\n+CGCAGTTTTGTGTCTTCCTCCTTGCAAAAGGCTTTTCACCCACCAAGGAGCTGCGTCAAA\n+CTACAAAGCGGCGAAAACCCAGTCGTCCCCACAGTTGGCAATAACGCTGTCCTAGGGATT\n+CAGTCAAAAAAAGATGAAGGGCCTGCTGCTTTTAATGAAGAGGATTCAGTAGCAGACGAG\n+GAACTAGCAATGATCAATACACAGGCATTCTTAGTCGGCTTAAGAAGGGACAAAAGACCA\n+AGTTTACTGGACAAAACTGCCAGTTTAAAGGGGCATGTTCCCTCAGAAAGATTTCTTGAA\n+GAGAAGCTCTTGTCAGTACTAAAAGAGCAGGCAAGCTCTAATTCTGAGAGAAACGCCACA\n+TCATTGGAAAATAAGAGCTGTGATAAAAGCAGGACATGTGTGAAACCATGTGAACATTCT\n+AATGACAGCATTGCAGAGGAAACTTCAGAAATCATCCCAGGCTGTCATGGTGGAGAATCT\n+GCTGTGGAAAACCAAAGTAAAAATTCCTCATTGTGCCACAAAAAACTGCAACAGAAGAAA\n+AGACGGAAGTATTACTAA\n+>ENSXETT00000064180_xenopustropicalis\n+ATGGCTGCACCGCAACTTGGAAAATCTGTCTTCTA'..b'ATTCATGGTGCTCAGCTAGTCGGTTCACAGGATGCTTGTTCTCCTTTGGAGGCCCCT\n+GAGTCTATCATGCTAAAGATTTTTGCCAACAGCAGCAGGCGAGCACGATGGGATGCTAAA\n+CTGGGATTTTATAGGGACCCACGGCCATTCCTGCTCCCTGTCTCTTCTTTGTACAACAGT\n+GGGGGACCTGTAGGATGTGTGGATATTATTATATTAAGAAGCTATCCCACATTATGGATG\n+GAGAGAAAACCAGAAGGAGGCACTGTGTTCCGGTCAGGCCGAGCAGAAGAAAAGGAGGCT\n+AGACGGTACAACGTCCACAAGGAAAAAGCTATGGAGATTCTGTTTGACAAGATTCAAGCG\n+GAATTTGAAAAGGAAGAGAGGGATAACAGGAAACCTCGGAGCAGAAGACGGACAATCGGT\n+GATCAAGATATCAAAAGTCTTCAAGATGGAGAGGAGCTGTACGAAGCAGTGGGCGATGAC\n+CCAGCTTACCTTGAGGCACATTTGACTGAGCAGCAGGCAGAGACTCTACAGAACTACAAA\n+CGTCTGCTGATAGAAAAGAAGCAAGCAGAGCTGCAGGATCGCTACCGGCGAGCTGTAGAA\n+ACTGCAGAGGATGGCACAGGCAGCTGTCCCAAGCGAGATGTAGCACCTGTATGGAGACTC\n+AGCATTGCTGACTTCATGGAAAAGCCAGGCAGTGTTTACCAGCTGAACATTTGGCGGCCT\n+CCCTCAGAGCTCCAGTCTTTACTAAAAGAAGGCTGTCGATATAAGGTGTATAATCTCACC\n+ACAACAGATTCAAAGAAACAAGGTGGAAACACAACCGTTCAGCTAAGTGGAACAAAAAAA\n+ACACAATTTGAGGACCTTCAGGCATCCGAGGAATTGTTGTCAACATATTTTCAGCCAAGG\n+GTCTCGGCCACATTCATCGATCTCCAAGATCCAGAATTCCATTCGTTGTGTGGTGAGGTT\n+GATCTCACAGGATACGTCATCAGTATAATAGATGGACAAGGTTTCTCACCTGCTTTTTAC\n+CTAACTGATGGGAAACAAAATTTTGTAAAAGTGCGTTGTTTCAGCAGCTTCGCTCAGTCA\n+GGCTTGGAAGATGTAATAAAGCCAAGTGTCCTTTTAGCTTTAAGCAACCTCCAACTGAGA\n+GGTCAGGCAACATCACCCACTCCAGTCTTGTACGCTGGAGATCTAACCGTCTTCTCCACA\n+AACCCCAAAGAAGTTCATCTGCAGGAATCCTTCAGCCAGCTCAAAACCCTGGTTCAG\n+>ENSTRUT00000015099_takifugurubripes\n+CAGCTGGCACGGGATATGCAGGATATGCGAATCAGAAAAAAGAAACGCCAGACCATTCGT\n+CCATTACCGGGAAGTTTGTTTCAGAAGAAGTCCTCTGGAGTCGCCAGGATTCCATTTAAA\n+GCTGCAGTAAACGGAAAGCCACCTGCACGCTACACTGCCAAACCGCTGTGTGGCCTCGGG\n+GTTCCTCTGAATGTGTTGGAGATCACCAGTGAGACTGCAGAATCTTTTCGCTTCAGCTTG\n+CAGCACTTTGTTAAGCTGGAGTCTCTCATAGATAAAGGTGGCATACAGCTCGCTGATGGA\n+GGATGGCTGATTCCCACGAATGACGGGACAGCGGGAAAAGAAGAGTTTTATCGAGCATTG\n+TGTGATACCCCGGGGGTTGATCCTAAACTAATGAGTGAGGAGTGGGTGTATAATCACTAC\n+CGATGGATTGTATGGAAACAAGCTTCCATGGAAAGGTCATTTCCAGAAGAGATGGGCAGC\n+CTCTGTCTCACCCCAGAGCAGGTTCTCCTACAACTTAAGTACAGATATGACATAGAGGTT\n+GACCACAGTCGCAGACCAGCTCTCAGAAAAATTATGGAAAAGGATGACACGGCAGCTAAA\n+ACCCTGGTCCTCTGTGTTTGTGGGGTTGTCTTCAGAGGCAGCTCCCCAAAAAACAAGAGT\n+TTTGGGGACATCAGTACTCCAGGAGCTGACCCAAAGGTTGAAAACCCCTGTGCTGTCGTT\n+TGGCTGACCGATGGATGGTATTCAATTAAAGCGCAACTGGATGGACCGTTGACCTCAATG\n+CTTCACAGAGGTCGACTACCAGTCGGCGGGAAGCTGATTATCCATGGTGCTCAGCTAGTC\n+GGATCAGAGAATGCTTGTTCCCCCCTGGAGGCCCCTGTGTCTTTAATGCTAAAGATTTGC\n+GCCAACAGCAGCAGACCAGCTCGATGGGATTCTAAACTAGGATTTCACAGGGACCCGCGG\n+CCATTCCTGCTTCCTGTCTCTTCTTTGTACAGCAGTGGAGGACCAGTAGGATGTGTGGAT\n+ATTATTATACTGAGAAGCTATCCCATATTGTGGATGGAGAGGAAACCAGAAGGAGGCACT\n+GTGTTCCGTTCAGGCAGAGCAGAAGAGAAGGAGGCGAGACGATACAACATTCACAAAGAA\n+AAAGCTATGGAAATCCTGTTTGACAAGATTAAAGCAGAATTTGAAAAGGAAGAAAAAGGT\n+AACAGGAAACCGCAGTGCAGAAGGACAATCAATGGTCAAAATATTACAAGTCTTCAAGAT\n+GGAGAGGAGCTGTACGAAGCAGTGGGCGATGACCCAGCTTTCCTTGAGGCGCATCTGACT\n+GAGAAGCAGGTGGAGGTTCTTCAGAACTACAAACGTCTGGTGATGGAGAAGCAGCAGGCA\n+GAGCTGCAGGATCGCTACCGGCGAGCTGTAGAAAGTGCAGAGGACGGCGTGGGGGGCTGC\n+CCCAAGCGAGATGTCGCACCTGTGTGGAGACTGTGCATTGCTGACTCCATGGGCCATTCT\n+GGCCGTGTTTACCAGCTGAGTCTTTGGCGGCCCCCCTCAGAGCTCCAGGCATTACTGAAG\n+GAAGGCTGTCGTTATAAAGTGTATAATCTCACCACTTTAGATTCAAAGAAACAGGGTGGA\n+AATGCAACGGTTCAGCTAACTGCAACAAAAAAAACACAGTTTGAGCACCTACAGGGATCT\n+GAGGAGTGGTTATCAAAACATTTTCAGCCGAGGGTTGCAACCAATTTTGTGAGACTCCAA\n+GATCCAGAATTCAACCCATTGTGTAGCGAGGTTGATCTCACAGGATATGTCATTACTATA\n+ATAGATGGGCAAGGTTTCTCTCCTGCATTTTACCTGGCTGATGGGAAACAGAATTTTGTA\n+AAAGTTCGGTGTTTCAGCAGCTTCGCCCAATCTGGCTTGGAAGATGTAATAAAGCCACGT\n+GTCCTTTTGGCCCTAAGCAACCTGCAGCTGAGGGGTCAGTCGACATCACCTACTCCAGTC\n+GTGTATGCTGGAGATTTAACCGTCTTCTCCACAAACCCCAAAGAGGTTCATCTGCAGGAA\n+TCCTTCAGCCAGCTCAAAACTCTGGTTCAGGGCCAGGAGAACTTTTTTGTGCACGCTGAA\n+GAGAAGCTTTCTCAGTTGATGTCTGATGGCCTGAGCGCTATCGCTTCTCCAGCTGGGCAA\n+ATACAAACCCCAGCTTCCACAGTAAAGAGAAGAGGAGACATGACGGATGTGAGCTCAAAT\n+ATAATGGTTATTAACAAGACTTCTAAGGTCACATGTCAGCAGCCAGGCAGAAGCCACAGA\n+TTCTCAACGCCTATAAACAGGAACTCTACTGCTCACAGTTCAGCAGAGAGAAACCCAAGC\n+ACTATTAAGAAGAGGAAAGCTCTCGACTATCTGTCCCACATCCCGTCTCCACCGCCTCTG\n+TCCTGTCTGAGTACACTATCTTCTCCCAGCGTAAAAAAGATATTTATTCCGCCTCGCCGA\n+ACTGAAATACCTGGTACTTTAAAAACTGTAAAGACTCCAAATCAAAAACCTTCCAATACA\n+CCTGTGGATGATCAGTGGGTGAATGATGAGGAACTGGCTATGATCGACACTCAGGCATTA\n'
b
diff -r 000000000000 -r 4f9e5110914b test-data/species.nhx
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/species.nhx Tue Dec 20 16:32:25 2016 -0500
b
@@ -0,0 +1,1 @@
+(((((((((((((((((homosapiens*,gorillagorilla*,pantroglodytes*)207598,pongoabelii*)9604,nomascusleucogenys*)314295,(chlorocebussabaeus*,macacamulatta*,papioanubis*)9528)9526,callithrixjacchus*)314293,tarsiussyrichta*)376913,(microcebusmurinus*,otolemurgarnettii*)376911)9443,((oryctolaguscuniculus*,ochotonaprinceps*)9975,(((rattusnorvegicus*,musmusculus*)39107,dipodomysordii*,ictidomystridecemlineatus*)33553,caviaporcellus*)9989)314147,tupaiabelangeri*)314146,((myotislucifugus*,pteropusvampyrus*)9397,((ovisaries*,bostaurus*)9895,vicugnapacos*,susscrofa*,tursiopstruncatus*)91561,((ailuropodamelanoleuca*,canisfamiliaris*,mustelaputoriusfuro*)379584,feliscatus*)33554,(erinaceuseuropaeus*,sorexaraneus*)9362,equuscaballus*)314145)1437010,(dasypusnovemcinctus*,choloepushoffmanni*)9348,(echinopstelfairi*,loxodontaafricana*,procaviacapensis*)311790)9347,(macropuseugenii*,sarcophilusharrisii*,monodelphisdomestica*)9263)32525,ornithorhynchusanatinus*)40674,((pelodiscussinensis*,(((meleagrisgallopavo*,gallusgallus*)9005,anasplatyrhynchos*)1549675,(ficedulaalbicollis*,taeniopygiaguttata*)9126)8825)1329799,anoliscarolinensis*)32561)32524,xenopustropicalis*)32523,latimeriachalumnae*)8287,(((daniorerio*,astyanaxmexicanus*)186626,((((tetraodonnigroviridis*,takifugurubripes*)31031,gasterosteusaculeatus*)1489922,(((poeciliaformosa*,xiphophorusmaculatus*)586240,oryziaslatipes*)1489913,oreochromisniloticus*)1489908)1489872,gadusmorhua*)123368)186625,lepisosteusoculatus*)41665)117571,petromyzonmarinus*)root;
b
diff -r 000000000000 -r 4f9e5110914b test-data/test.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.fasta Tue Dec 20 16:32:25 2016 -0500
b
b'@@ -0,0 +1,11171 @@\n+>ENSLACT00000008884\n+ATGGAATGGGAAGCAGTTGAAAGTGTCAAAGCTTTAATGCGAGATGATGAGCTTACAGAT\n+GCAGGACTAGATGCAAGCAAAGACAGTTTGAACCGTGCATGCAGGAGACAGTCAGGTGGA\n+AATTTCCGAGCTAGGAAGCGGATGAGACTTGAACAAGTGTCGGCTGATGAGCCACCAGTT\n+AAGAGGCAACTCTTGGCTGAATTTGACAGGACGGTTGAAAATGGCCATAAGTCACTTCAG\n+AAGCCTTTAATATGCACTCCAAATGGTACGCTGAAAGACAGAAGGAAGTTTATGTATAGT\n+GTTCCCCTTAAGCCTGTAGTGTGTGGTCCATGGAGCAACAACTCCAAAACTGGGCAACAG\n+GTCACAAAACCCAGCATTACTTTGCCTGGCAGAGGAGTAGAAACATTTCAACCCAAGAAC\n+CACATTGCTCCCAGTCCAGTTTATGATCCACCTTCAAATAGACGAGGCCCAGTCTTTGCT\n+CCACCATTTCATGGGGCCACGTTTCGGGGACTGCAGAAACCAAGTGCTTCACACACATCA\n+AGCAAAACTGCTAAAACTTTTGTTCCTCCATTTAAAATGAAAGCCAGTGCCTCCCACACA\n+GTACATTTCAGTAGCAAAGTTATCAACACTTGTGAAAAGATTTTAGAAAATCTAGTCTAC\n+TTGAAGCCTAGTTTAGCCTCTTGTAATATTTTTCAAAGTTTGGAAGAGATGACTGCTAAC\n+CTTCAGTGTGCCAGAGACCTACAGGAAATGAGGCTGAGAAAGAAGCAAAGGCAAAATATC\n+CGGCCGCAGCCAGGCAGCCTTTACCTTGCTAAAACATCTGGTGTTGCTAGGGTCTCCCTG\n+AAGGCAGCTACAGGAAATCAGTGCCCATCCTCCTACTCCACAGAGCAGTTATATGTTCAT\n+GGTGTAGGAAAAAGCACCTTGAAAGTACGCAGTGAGAATGCAGAATCTTTTCAGTTCAGT\n+TGCAGTGATTATTTTGGTAAAGATGTTCTCCTAGCTGGAAATGGCTTGAAGCTGGCAGAT\n+GGAGGGTGGCTTATACCCAGTGATAAAGGAATGGTAGGAAAAGAAGAATTTTACAGGGCA\n+CTGTGCGATACACCTGGTGTGGCCCCAAAGCTTATTAGTGAATCTTGGGTCTACAATCAT\n+TACAGATGGATTGTATGGAAGCTTGCAGCGATGGAGGCAGCTTTTCCAAAAGAATTTGGC\n+AACCGGTGTCTAACACCAGAGAGGGTGCTGCTACAGCTTAAATACAGGTATGACATTGAA\n+GTTGACAAGTGTCGAAGATCTACTGTAAAGAAGATAATGGAAAGAGATGACACTGCCGCC\n+AAGACACTTGTGCTGTGCATTTCGAAACTTATATCCGTGGAGGACCGTTTTAAACAAACC\n+AAAAATAAGAATGAAAAGGGTGCTGAAGAAGCTAGGAAAGAAGCAGTAGCTGGAGTCATT\n+GAAACTACGGATGGGTGGTATGGGATTAAGGTACTCTTGGATCCTCCCCTTACAGTGCTG\n+GTACAGAGAGGAAGGCTGTCAGTTGGCTGTAAAATTATAACACATGGAGCTGAAATAATT\n+GGCTCCCAGGATGCCTGTACACCACTGGAAGCTCCGGAGTGTCTCATGCTAAAGATTTCT\n+GCGAACAGTACTCGACCTGCTTGCTGGAGTGCTAAACTTGGGTTTCATCGAGATCCCCGA\n+CCTTTTCCTCTCCCATTAGCATCGCTTTTTAATGATGGTGGATTAGTTGGCTGTGTTGAT\n+GTTGTTGTAGTGCGACTGTACCCCATACAGTGGATGGAAAAGAAATCAGATGGGATTTTT\n+GTGTTTCGTAATGATCGAGCAGAAGAAAGAGAGGCTCAAAGGCAAGTTGAGAATCAGCAA\n+AGAAAAATGGAAAGTTTGTTTGCAAAGATTCAAACTGAATTTGAACAGAAATATGAAGCC\n+AAAAGCAAAAGGAGAGGCCAAAAGGCACAGAAATTCAGCAAGCAGGAAATCCAGGCTCTT\n+CAAGATGGTGCAGAACTGAATGAAGCAATTGAGAATTCAATGGATCCGGGTTACTTTGAG\n+GCTTGCTTAAGAGAGGAGCAGTTAAAAGTCCTGCATGGCCACAGACAAATGTTAAATGAA\n+AAGAAACAAGCAGAGTTCCAGGCAGAGTTCAAGAAGGCACTGGAGTCTGCTGAACAGGAG\n+GGGAAGAGCTGCTGCAAGCGAGGTGTAACCCCTGTTTGGAAACTACGCATTGTGGACTAT\n+AGAAAACCAAGTGCTGCAGAATATATATTAAATATTTGGCGACCATTGGCTGATCTGCAT\n+TCCCTGTTAAAGGAAGGTAACCGGTACAGAATTTACCAGTTACTTGCATCACAATCCAAA\n+GGAAGAACCACTACTGCTGACATACAGCTAACAGCTACAAAGAAAACCCAGTATCAGCAG\n+TTCCAGTCTTTCCCAGAATTGATATCAGAGCTGTACAGTCCAAGAAAAGCTGTTAAATTC\n+AACATGTTGATGGATCCAACTTTCCGACCAGCCTATGCAGAAGTAGACCTGGTAGGATAT\n+ACCATCTCTATAGAAGGAAAGCCAGGTGTTGCTCCAGTGGTGTACCTATCTGATGAAAGT\n+CATAACTTTGTGGCAATAAAAGTTTGGACTGCCCTAAATCAGCTTGCTGTTGAAGACATT\n+GTGAAGCCATTTTCACTGATTGCTGCAAGTAACTTACAGTGGAGATCAGACAGCAGATCA\n+ATAATTCCTATGTTGTATGCTGGGGACCTTTCAATATTTTCATCAAATCCAAAAGAAGGG\n+CATCTTCAAGAGGCTTTTAATCAAAGAAGGACTGCTATACAAGAGAACATTTCTGGTACA\n+TACCTCCCCCCAGAAAAAAAAAATCTGCATCAGGAGTCTTATAAATCATGTCAATACAAC\n+ACATTGAATGTTTTGATGAATGGAAACATACACACACAGAGCCCAGTGCTGTCCAGGGTT\n+CATATGGGTACATCCTGTGCTTTTCTCTTTCTACTGCCTTCACCCTACCCTGAAAGTAAA\n+CACACTAGTCCTTTGATAACTATGAAGGCAGGAGTCAAATCTATGACTTTCCCAGGCTCT\n+GCAAAACTAATGCCACAAGCAAGTGAAAATCAAGAACTGGATACTCCCAAGAATCGTAAA\n+AAGAAGGCAGCTTTAGACTATCTTTGCCGCATTCCTTCCCCACCTGCACTTACTCCTATT\n+CGCAGTTTTGTGTCTTCCTCCTTGCAAAAGGCTTTTCACCCACCAAGGAGCTGCGTCAAA\n+CTACAAAGCGGCGAAAACCCAGTCGTCCCCACAGTTGGCAATAACGCTGTCCTAGGGATT\n+CAGTCAAAAAAAGATGAAGGGCCTGCTGCTTTTAATGAAGAGGATTCAGTAGCAGACGAG\n+GAACTAGCAATGATCAATACACAGGCATTCTTAGTCGGCTTAAGAAGGGACAAAAGACCA\n+AGTTTACTGGACAAAACTGCCAGTTTAAAGGGGCATGTTCCCTCAGAAAGATTTCTTGAA\n+GAGAAGCTCTTGTCAGTACTAAAAGAGCAGGCAAGCTCTAATTCTGAGAGAAACGCCACA\n+TCATTGGAAAATAAGAGCTGTGATAAAAGCAGGACATGTGTGAAACCATGTGAACATTCT\n+AATGACAGCATTGCAGAGGAAACTTCAGAAATCATCCCAGGCTGTCATGGTGGAGAATCT\n+GCTGTGGAAAACCAAAGTAAAAATTCCTCATTGTGCCACAAAAAACTGCAACAGAAGAAA\n+AGACGGAAGTATTACTAA\n+>ENSXETT00000064180\n+ATGGCTGCACCGCAACTTGGAAAATCTGTCTTCTATGATCTGTTTAGCACGCATTGCTCT\n+CACTCAGATT'..b'GGCGGGAAGCTG\n+ATTATTCATGGTGCTCAGCTAGTCGGTTCACAGGATGCTTGTTCTCCTTTGGAGGCCCCT\n+GAGTCTATCATGCTAAAGATTTTTGCCAACAGCAGCAGGCGAGCACGATGGGATGCTAAA\n+CTGGGATTTTATAGGGACCCACGGCCATTCCTGCTCCCTGTCTCTTCTTTGTACAACAGT\n+GGGGGACCTGTAGGATGTGTGGATATTATTATATTAAGAAGCTATCCCACATTATGGATG\n+GAGAGAAAACCAGAAGGAGGCACTGTGTTCCGGTCAGGCCGAGCAGAAGAAAAGGAGGCT\n+AGACGGTACAACGTCCACAAGGAAAAAGCTATGGAGATTCTGTTTGACAAGATTCAAGCG\n+GAATTTGAAAAGGAAGAGAGGGATAACAGGAAACCTCGGAGCAGAAGACGGACAATCGGT\n+GATCAAGATATCAAAAGTCTTCAAGATGGAGAGGAGCTGTACGAAGCAGTGGGCGATGAC\n+CCAGCTTACCTTGAGGCACATTTGACTGAGCAGCAGGCAGAGACTCTACAGAACTACAAA\n+CGTCTGCTGATAGAAAAGAAGCAAGCAGAGCTGCAGGATCGCTACCGGCGAGCTGTAGAA\n+ACTGCAGAGGATGGCACAGGCAGCTGTCCCAAGCGAGATGTAGCACCTGTATGGAGACTC\n+AGCATTGCTGACTTCATGGAAAAGCCAGGCAGTGTTTACCAGCTGAACATTTGGCGGCCT\n+CCCTCAGAGCTCCAGTCTTTACTAAAAGAAGGCTGTCGATATAAGGTGTATAATCTCACC\n+ACAACAGATTCAAAGAAACAAGGTGGAAACACAACCGTTCAGCTAAGTGGAACAAAAAAA\n+ACACAATTTGAGGACCTTCAGGCATCCGAGGAATTGTTGTCAACATATTTTCAGCCAAGG\n+GTCTCGGCCACATTCATCGATCTCCAAGATCCAGAATTCCATTCGTTGTGTGGTGAGGTT\n+GATCTCACAGGATACGTCATCAGTATAATAGATGGACAAGGTTTCTCACCTGCTTTTTAC\n+CTAACTGATGGGAAACAAAATTTTGTAAAAGTGCGTTGTTTCAGCAGCTTCGCTCAGTCA\n+GGCTTGGAAGATGTAATAAAGCCAAGTGTCCTTTTAGCTTTAAGCAACCTCCAACTGAGA\n+GGTCAGGCAACATCACCCACTCCAGTCTTGTACGCTGGAGATCTAACCGTCTTCTCCACA\n+AACCCCAAAGAAGTTCATCTGCAGGAATCCTTCAGCCAGCTCAAAACCCTGGTTCAG\n+>ENSTRUT00000015099\n+CAGCTGGCACGGGATATGCAGGATATGCGAATCAGAAAAAAGAAACGCCAGACCATTCGT\n+CCATTACCGGGAAGTTTGTTTCAGAAGAAGTCCTCTGGAGTCGCCAGGATTCCATTTAAA\n+GCTGCAGTAAACGGAAAGCCACCTGCACGCTACACTGCCAAACCGCTGTGTGGCCTCGGG\n+GTTCCTCTGAATGTGTTGGAGATCACCAGTGAGACTGCAGAATCTTTTCGCTTCAGCTTG\n+CAGCACTTTGTTAAGCTGGAGTCTCTCATAGATAAAGGTGGCATACAGCTCGCTGATGGA\n+GGATGGCTGATTCCCACGAATGACGGGACAGCGGGAAAAGAAGAGTTTTATCGAGCATTG\n+TGTGATACCCCGGGGGTTGATCCTAAACTAATGAGTGAGGAGTGGGTGTATAATCACTAC\n+CGATGGATTGTATGGAAACAAGCTTCCATGGAAAGGTCATTTCCAGAAGAGATGGGCAGC\n+CTCTGTCTCACCCCAGAGCAGGTTCTCCTACAACTTAAGTACAGATATGACATAGAGGTT\n+GACCACAGTCGCAGACCAGCTCTCAGAAAAATTATGGAAAAGGATGACACGGCAGCTAAA\n+ACCCTGGTCCTCTGTGTTTGTGGGGTTGTCTTCAGAGGCAGCTCCCCAAAAAACAAGAGT\n+TTTGGGGACATCAGTACTCCAGGAGCTGACCCAAAGGTTGAAAACCCCTGTGCTGTCGTT\n+TGGCTGACCGATGGATGGTATTCAATTAAAGCGCAACTGGATGGACCGTTGACCTCAATG\n+CTTCACAGAGGTCGACTACCAGTCGGCGGGAAGCTGATTATCCATGGTGCTCAGCTAGTC\n+GGATCAGAGAATGCTTGTTCCCCCCTGGAGGCCCCTGTGTCTTTAATGCTAAAGATTTGC\n+GCCAACAGCAGCAGACCAGCTCGATGGGATTCTAAACTAGGATTTCACAGGGACCCGCGG\n+CCATTCCTGCTTCCTGTCTCTTCTTTGTACAGCAGTGGAGGACCAGTAGGATGTGTGGAT\n+ATTATTATACTGAGAAGCTATCCCATATTGTGGATGGAGAGGAAACCAGAAGGAGGCACT\n+GTGTTCCGTTCAGGCAGAGCAGAAGAGAAGGAGGCGAGACGATACAACATTCACAAAGAA\n+AAAGCTATGGAAATCCTGTTTGACAAGATTAAAGCAGAATTTGAAAAGGAAGAAAAAGGT\n+AACAGGAAACCGCAGTGCAGAAGGACAATCAATGGTCAAAATATTACAAGTCTTCAAGAT\n+GGAGAGGAGCTGTACGAAGCAGTGGGCGATGACCCAGCTTTCCTTGAGGCGCATCTGACT\n+GAGAAGCAGGTGGAGGTTCTTCAGAACTACAAACGTCTGGTGATGGAGAAGCAGCAGGCA\n+GAGCTGCAGGATCGCTACCGGCGAGCTGTAGAAAGTGCAGAGGACGGCGTGGGGGGCTGC\n+CCCAAGCGAGATGTCGCACCTGTGTGGAGACTGTGCATTGCTGACTCCATGGGCCATTCT\n+GGCCGTGTTTACCAGCTGAGTCTTTGGCGGCCCCCCTCAGAGCTCCAGGCATTACTGAAG\n+GAAGGCTGTCGTTATAAAGTGTATAATCTCACCACTTTAGATTCAAAGAAACAGGGTGGA\n+AATGCAACGGTTCAGCTAACTGCAACAAAAAAAACACAGTTTGAGCACCTACAGGGATCT\n+GAGGAGTGGTTATCAAAACATTTTCAGCCGAGGGTTGCAACCAATTTTGTGAGACTCCAA\n+GATCCAGAATTCAACCCATTGTGTAGCGAGGTTGATCTCACAGGATATGTCATTACTATA\n+ATAGATGGGCAAGGTTTCTCTCCTGCATTTTACCTGGCTGATGGGAAACAGAATTTTGTA\n+AAAGTTCGGTGTTTCAGCAGCTTCGCCCAATCTGGCTTGGAAGATGTAATAAAGCCACGT\n+GTCCTTTTGGCCCTAAGCAACCTGCAGCTGAGGGGTCAGTCGACATCACCTACTCCAGTC\n+GTGTATGCTGGAGATTTAACCGTCTTCTCCACAAACCCCAAAGAGGTTCATCTGCAGGAA\n+TCCTTCAGCCAGCTCAAAACTCTGGTTCAGGGCCAGGAGAACTTTTTTGTGCACGCTGAA\n+GAGAAGCTTTCTCAGTTGATGTCTGATGGCCTGAGCGCTATCGCTTCTCCAGCTGGGCAA\n+ATACAAACCCCAGCTTCCACAGTAAAGAGAAGAGGAGACATGACGGATGTGAGCTCAAAT\n+ATAATGGTTATTAACAAGACTTCTAAGGTCACATGTCAGCAGCCAGGCAGAAGCCACAGA\n+TTCTCAACGCCTATAAACAGGAACTCTACTGCTCACAGTTCAGCAGAGAGAAACCCAAGC\n+ACTATTAAGAAGAGGAAAGCTCTCGACTATCTGTCCCACATCCCGTCTCCACCGCCTCTG\n+TCCTGTCTGAGTACACTATCTTCTCCCAGCGTAAAAAAGATATTTATTCCGCCTCGCCGA\n+ACTGAAATACCTGGTACTTTAAAAACTGTAAAGACTCCAAATCAAAAACCTTCCAATACA\n+CCTGTGGATGATCAGTGGGTGAATGATGAGGAACTGGCTATGATCGACACTCAGGCATTA\n'
b
diff -r 000000000000 -r 4f9e5110914b test-data/test.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.json Tue Dec 20 16:32:25 2016 -0500
[
b'@@ -0,0 +1,31795 @@\n+{\n+    "ENSTNIG00000016261": {\n+        "source": "ensembl",\n+        "object_type": "Gene",\n+        "logic_name": "ensembl",\n+        "version": 1,\n+        "species": "tetraodon_nigroviridis",\n+        "description": "breast cancer 2, early onset [Source:ZFIN;Acc:ZDB-GENE-060510-3]",\n+        "display_name": "brca2",\n+        "assembly_name": "TETRAODON8",\n+        "biotype": "protein_coding",\n+        "end": 4705074,\n+        "seq_region_name": "16",\n+        "db_type": "core",\n+        "strand": 1,\n+        "id": "ENSTNIG00000016261",\n+        "Transcript": [\n+            {\n+                "source": "ensembl",\n+                "object_type": "Transcript",\n+                "logic_name": "ensembl",\n+                "Exon": [\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+                        "species": "tetraodon_nigroviridis",\n+                        "assembly_name": "TETRAODON8",\n+                        "end": 4700679,\n+                        "seq_region_name": "16",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSTNIE00000057385",\n+                        "start": 4700614\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+                        "species": "tetraodon_nigroviridis",\n+                        "assembly_name": "TETRAODON8",\n+                        "end": 4701157,\n+                        "seq_region_name": "16",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSTNIE00000041338",\n+                        "start": 4701103\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+                        "species": "tetraodon_nigroviridis",\n+                        "assembly_name": "TETRAODON8",\n+                        "end": 4701424,\n+                        "seq_region_name": "16",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSTNIE00000031348",\n+                        "start": 4701218\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+                        "species": "tetraodon_nigroviridis",\n+                        "assembly_name": "TETRAODON8",\n+                        "end": 4701571,\n+                        "seq_region_name": "16",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSTNIE00000063263",\n+                        "start": 4701502\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+                        "species": "tetraodon_nigroviridis",\n+                        "assembly_name": "TETRAODON8",\n+                        "end": 4701608,\n+                        "seq_region_name": "16",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSTNIE00000054769",\n+                        "start": 4701587\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+                        "species": "tetraodon_nigroviridis",\n+                        "assembly_name": "TETRAODON8",\n+                        "end": 4701940,\n+                        "seq_region_name": "16",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSTNIE00000041082",\n+                        "start": 4701626\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+        '..b'                "assembly_name": "cavPor3",\n+                        "end": 33841095,\n+                        "seq_region_name": "scaffold_6",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSCPOE00000227172",\n+                        "start": 33841075\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+                        "species": "cavia_porcellus",\n+                        "assembly_name": "cavPor3",\n+                        "end": 33841317,\n+                        "seq_region_name": "scaffold_6",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSCPOE00000067342",\n+                        "start": 33841179\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+                        "species": "cavia_porcellus",\n+                        "assembly_name": "cavPor3",\n+                        "end": 33850200,\n+                        "seq_region_name": "scaffold_6",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSCPOE00000067343",\n+                        "start": 33849956\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 1,\n+                        "species": "cavia_porcellus",\n+                        "assembly_name": "cavPor3",\n+                        "end": 33851341,\n+                        "seq_region_name": "scaffold_6",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSCPOE00000067344",\n+                        "start": 33851195\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 2,\n+                        "species": "cavia_porcellus",\n+                        "assembly_name": "cavPor3",\n+                        "end": 33852801,\n+                        "seq_region_name": "scaffold_6",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSCPOE00000067345",\n+                        "start": 33852516\n+                    },\n+                    {\n+                        "object_type": "Exon",\n+                        "version": 2,\n+                        "species": "cavia_porcellus",\n+                        "assembly_name": "cavPor3",\n+                        "end": 33853154,\n+                        "seq_region_name": "scaffold_6",\n+                        "db_type": "core",\n+                        "strand": 1,\n+                        "id": "ENSCPOE00000067347",\n+                        "start": 33852814\n+                    }\n+                ],\n+                "Parent": "ENSCPOG00000005153",\n+                "seq_region_name": "scaffold_6",\n+                "db_type": "core",\n+                "is_canonical": 1,\n+                "strand": 1,\n+                "id": "ENSCPOT00000005208",\n+                "version": 2,\n+                "species": "cavia_porcellus",\n+                "assembly_name": "cavPor3",\n+                "display_name": "BRCA2-201",\n+                "end": 33853154,\n+                "biotype": "protein_coding",\n+                "Translation": {\n+                    "object_type": "Translation",\n+                    "species": "cavia_porcellus",\n+                    "Parent": "ENSCPOT00000005208",\n+                    "end": 33853154,\n+                    "length": 3313,\n+                    "db_type": "core",\n+                    "id": "ENSCPOP00000004635",\n+                    "start": 33778275\n+                },\n+                "start": 33778275\n+            }\n+        ],\n+        "start": 33778275\n+    }\n+}\n\\ No newline at end of file\n'