Repository 'ensembl_get_genetree'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/ensembl_get_genetree

Changeset 1:98aba0efe77a (2016-12-12)
Previous changeset 0:f0018341e9f6 (2016-08-11) Next changeset 2:950d9d11b6fb (2016-12-21)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/Ensembl-REST commit aaf8d501c3a92ed415fdf9293a65468c72aae984-dirty
modified:
test-data/genetree.json
test-data/genetree.phyloxml
added:
get_feature_info.py
get_genetree.py
get_genetree.xml
get_sequences.py
removed:
get_feature_info/get_feature_info.py
get_genetree/get_genetree.py
get_genetree/get_genetree.xml
get_sequences/get_sequences.py
b
diff -r f0018341e9f6 -r 98aba0efe77a get_feature_info.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_feature_info.py Mon Dec 12 07:47:26 2016 -0500
[
@@ -0,0 +1,41 @@
+# A simple tool to connect to the Ensembl server and retrieve feature
+# information using the Ensembl REST API.
+import json
+import optparse
+from urlparse import urljoin
+
+import requests
+
+parser = optparse.OptionParser()
+parser.add_option('-i', '--input', help='List of Ensembl IDs')
+parser.add_option('-e', '--expand', type='choice', choices=['0', '1'],
+                  default='0',
+                  help='Expands the search to include any connected features. e.g. If the object is a gene, its transcripts, translations and exons will be returned as well.')
+
+parser.add_option('-s', '--species', type='choice',
+                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
+                  help='Specify the genome databases for vertebrates and other eukaryotic species')
+
+parser.add_option('-f', '--format', type='choice',
+                  choices=['full', 'condensed'], default='full',
+                  help='Specify the formats to emit from this endpoint')
+options, args = parser.parse_args()
+if options.input is None:
+    raise Exception('-i option must be specified')
+
+
+server = 'http://rest.%s.org' % options.species
+ext = 'lookup/id'
+
+headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
+params = dict((k, getattr(options, k)) for k in ['format', 'expand'])
+with open(options.input) as f:
+    ids = [line.strip() for line in f]
+data = {'ids': ids}
+r = requests.post(urljoin(server, ext), params=params, headers=headers,
+                  data=json.dumps(data))
+
+if not r.ok:
+    r.raise_for_status()
+
+print r.text
b
diff -r f0018341e9f6 -r 98aba0efe77a get_feature_info/get_feature_info.py
--- a/get_feature_info/get_feature_info.py Thu Aug 11 14:29:07 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,41 +0,0 @@
-# A simple tool to connect to the Ensembl server and retrieve feature
-# information using the Ensembl REST API.
-import json
-import optparse
-from urlparse import urljoin
-
-import requests
-
-parser = optparse.OptionParser()
-parser.add_option('-i', '--input', help='List of Ensembl IDs')
-parser.add_option('-e', '--expand', type='choice', choices=['0', '1'],
-                  default='0',
-                  help='Expands the search to include any connected features. e.g. If the object is a gene, its transcripts, translations and exons will be returned as well.')
-
-parser.add_option('-s', '--species', type='choice',
-                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
-                  help='Specify the genome databases for vertebrates and other eukaryotic species')
-
-parser.add_option('-f', '--format', type='choice',
-                  choices=['full', 'condensed'], default='full',
-                  help='Specify the formats to emit from this endpoint')
-options, args = parser.parse_args()
-if options.input is None:
-    raise Exception('-i option must be specified')
-
-
-server = 'http://rest.%s.org' % options.species
-ext = 'lookup/id'
-
-headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}
-params = dict((k, getattr(options, k)) for k in ['format', 'expand'])
-with open(options.input) as f:
-    ids = [line.strip() for line in f]
-data = {'ids': ids}
-r = requests.post(urljoin(server, ext), params=params, headers=headers,
-                  data=json.dumps(data))
-
-if not r.ok:
-    r.raise_for_status()
-
-print r.text
b
diff -r f0018341e9f6 -r 98aba0efe77a get_genetree.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_genetree.py Mon Dec 12 07:47:26 2016 -0500
[
@@ -0,0 +1,58 @@
+# A simple tool to connect to the Ensembl server and retrieve genetree using
+# the Ensembl REST API.
+import optparse
+from urlparse import urljoin
+
+import requests
+
+parser = optparse.OptionParser()
+parser.add_option('--id_type', type='choice', default='gene_id',
+                  choices=['gene_id', 'gene_tree_id'], help='Input type')
+parser.add_option('-i', '--input', help='Ensembl ID')
+parser.add_option('--format', type='choice',
+                  choices=['json', 'orthoxml', 'phyloxml', 'nh'],
+                  default='json', help='Output format')
+parser.add_option('-s', '--sequence', type='choice',
+                  choices=['protein', 'cdna', 'none'], default='protein',
+                  help='The type of sequence to bring back. Setting it to none results in no sequence being returned')
+
+parser.add_option('-g', '--species', type='choice',
+                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
+                  help='Specify the genome databases for vertebrates and other eukaryotic species')
+
+parser.add_option('-a', '--aligned', type='choice', choices=['0', '1'],
+                  default='0', help='Return the aligned string if true. Otherwise, return the original sequence (no insertions)')
+parser.add_option('-c', '--cigar_line', type='choice', choices=['0', '1'],
+                  default='0',
+                  help='Return the aligned sequence encoded in CIGAR format')
+parser.add_option('--nh_format', type='choice',
+                  choices=['full', 'display_label_composite', 'simple', 'species', 'species_short_name', 'ncbi_taxon', 'ncbi_name', 'njtree', 'phylip'],
+                  default='simple',
+                  help='The format of a NH (New Hampshire) request')
+options, args = parser.parse_args()
+if options.input is None:
+    raise Exception('-i option must be specified')
+
+server = 'http://rest.%s.org' % options.species
+
+if options.id_type == 'gene_id':
+    ext = 'genetree/member/id'
+elif options.id_type == 'gene_tree_id':
+    ext = 'genetree/id'
+
+if options.format == 'json':
+    content_type = 'application/json'
+elif options.format == 'orthoxml':
+    content_type = 'text/x-orthoxml+xml'
+elif options.format == 'phyloxml':
+    content_type = 'text/x-phyloxml+xml'
+elif options.format == 'nh':
+    content_type = 'text/x-nh'
+headers = {'Content-Type': content_type}
+params = dict((k, getattr(options, k)) for k in ['sequence', 'aligned', 'cigar_line', 'nh_format'])
+r = requests.get(urljoin(server, '/'.join([ext, options.input])), params=params, headers=headers)
+
+if not r.ok:
+    r.raise_for_status()
+
+print r.text
b
diff -r f0018341e9f6 -r 98aba0efe77a get_genetree.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_genetree.xml Mon Dec 12 07:47:26 2016 -0500
[
@@ -0,0 +1,146 @@
+<tool id="get_genetree" name="Get gene tree by Ensembl ID" version="0.1.1">
+    <description>using REST API</description>
+    <macros>
+        <xml name="sequence_conditional">
+            <conditional name="sequence">
+                <param name="sequence_selector" type="select" label="Sequence type" help="The type of sequences to bring back. If 'None', no sequence is returned">
+                    <option value="protein" selected="true">Protein</option>
+                    <option value="cdna">cDNA</option>
+                    <option value="none">None</option>
+                </param>
+                <when value="protein">
+                    <param name="aligned" type="boolean" truevalue="1" falsevalue="0" label="Return the aligned sequences instead of the original ones" help="The original sequences contain no insertions" />
+                    <yield />
+                </when>
+                <when value="cdna">
+                    <param name="aligned" type="boolean" truevalue="1" falsevalue="0" label="Return the aligned sequences instead of the original ones" help="The original sequences contain no insertions" />
+                    <yield />
+                </when>
+                <when value="none" />
+            </conditional>
+        </xml>
+    </macros>
+    <requirements>
+        <requirement type="package" version="2.7">requests</requirement>
+    </requirements>
+    <command>
+<![CDATA[
+python $__tool_directory__/get_genetree.py
+--id_type $input_type
+-i "$input"
+-g $species_selector
+--format ${output_format.output_format_selector}
+#if $output_format.output_format_selector == 'json'
+    -s ${output_format.sequence.sequence_selector}
+    #if $output_format.sequence.sequence_selector != 'none'
+        -a ${output_format.sequence.aligned}
+        -c ${output_format.sequence.cigar}
+    #end if
+#elif $output_format.output_format_selector == 'phyloxml'
+    -s ${output_format.sequence.sequence_selector}
+    #if $output_format.sequence.sequence_selector != 'none'
+        -a ${output_format.sequence.aligned}
+    #end if
+#else
+    --nh_format $output_format.nh_format
+#end if
+> "$output"
+]]>
+    </command>
+
+    <inputs>
+         <param name="input_type" type="select" label="Input type">
+            <option value="gene_id" selected="true">Ensembl gene ID</option>
+            <option value="gene_tree_id">Ensembl genetree ID</option>
+        </param>
+        <param name="input" type="text" label="Ensembl ID">
+            <validator type="empty_field" />
+        </param>
+        <param name="species_selector" type="select" label="Select Species">
+            <option value="ensembl" selected="true">Vertebrates</option>
+            <option value="ensemblgenomes">Other species</option>
+        </param>
+        <conditional name="output_format">
+            <param name="output_format_selector" type="select" label="Output format">
+                <option value="json" selected="true">JSON</option>
+                <!-- <option value="orthoxml">OrthoXML</option> -->
+                <option value="phyloxml">phyloXML</option>
+                <option value="nh">Newick</option>
+            </param>
+            <when value="json">
+                <expand macro="sequence_conditional">
+                    <param name="cigar" type="boolean" truevalue="1" falsevalue="0" label="Output the sequences also in CIGAR format" />
+                </expand>
+            </when>
+            <when value="phyloxml">
+                <expand macro="sequence_conditional" />
+            </when>
+            <when value="nh">
+                <param name="nh_format" type="select" label="NH format" help="The format of a NH (New Hampshire) request">
+                    <option value="simple" selected="true">simple</option>
+                    <option value="full">full</option>
+                    <option value="display_label_composite">display_label_composite</option>
+                    <option value="species">species</option>
+                    <option value="species_short_name">species_short_name</option>
+                    <option value="ncbi_taxon">ncbi_taxon</option>
+                    <option value="ncbi_name">ncbi_name</option>
+                    <option value="njtree">njtree</option>
+                    <option value="phylip">phylip</option>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="json" label="$(tool.name) on ${input_type} ${input}">
+            <change_format>
+                <when input="output_format.output_format_selector" value="phyloxml" format="phyloxml" />
+                <when input="output_format.output_format_selector" value="nh" format="nhx" />
+            </change_format>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_type" value="gene_id" />
+            <param name="input" value="ENSGALP00000027524" />
+            <param name="output_format_selector" value="json" />
+            <param name="sequence_selector" value="protein" />
+            <param name="aligned" value="0" />
+            <param name="cigar" value="0" />
+            <output name="output" file="genetree.json" ftype="json" />
+        </test>
+        <test>
+            <param name="input_type" value="gene_tree_id" />
+            <param name="input" value="ENSGT00390000003602" />
+            <param name="output_format_selector" value="json" />
+            <param name="sequence_selector" value="protein" />
+            <param name="aligned" value="0" />
+            <param name="cigar" value="0" />
+            <output name="output" file="genetree.json" ftype="json" />
+        </test>
+        <test>
+            <param name="input_type" value="gene_tree_id" />
+            <param name="input" value="ENSGT00390000003602" />
+            <param name="output_format_selector" value="phyloxml" />
+            <param name="sequence_selector" value="protein" />
+            <param name="aligned" value="0" />
+            <output name="output" file="genetree.phyloxml" ftype="phyloxml" compare="sim_size" />
+        </test>
+    </tests>
+
+    <help>
+<![CDATA[
+**What it does**
+
+Retrieve a gene tree from Ensembl using its REST API.
+
+Uses the `"GET genetree/id"`_ and `"GET genetree/member/id"`_ API endpoint.
+
+.. _"GET genetree/id": http://rest.ensembl.org/documentation/info/genetree
+.. _"GET genetree/member/id": http://rest.ensembl.org/documentation/info/genetree_member_id
+]]>
+    </help>
+    <citations>
+    </citations>
+</tool>
b
diff -r f0018341e9f6 -r 98aba0efe77a get_genetree/get_genetree.py
--- a/get_genetree/get_genetree.py Thu Aug 11 14:29:07 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,58 +0,0 @@
-# A simple tool to connect to the Ensembl server and retrieve genetree using
-# the Ensembl REST API.
-import optparse
-from urlparse import urljoin
-
-import requests
-
-parser = optparse.OptionParser()
-parser.add_option('--id_type', type='choice', default='gene_id',
-                  choices=['gene_id', 'gene_tree_id'], help='Input type')
-parser.add_option('-i', '--input', help='Ensembl ID')
-parser.add_option('--format', type='choice',
-                  choices=['json', 'orthoxml', 'phyloxml', 'nh'],
-                  default='json', help='Output format')
-parser.add_option('-s', '--sequence', type='choice',
-                  choices=['protein', 'cdna', 'none'], default='protein',
-                  help='The type of sequence to bring back. Setting it to none results in no sequence being returned')
-
-parser.add_option('-g', '--species', type='choice',
-                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
-                  help='Specify the genome databases for vertebrates and other eukaryotic species')
-
-parser.add_option('-a', '--aligned', type='choice', choices=['0', '1'],
-                  default='0', help='Return the aligned string if true. Otherwise, return the original sequence (no insertions)')
-parser.add_option('-c', '--cigar_line', type='choice', choices=['0', '1'],
-                  default='0',
-                  help='Return the aligned sequence encoded in CIGAR format')
-parser.add_option('--nh_format', type='choice',
-                  choices=['full', 'display_label_composite', 'simple', 'species', 'species_short_name', 'ncbi_taxon', 'ncbi_name', 'njtree', 'phylip'],
-                  default='simple',
-                  help='The format of a NH (New Hampshire) request')
-options, args = parser.parse_args()
-if options.input is None:
-    raise Exception('-i option must be specified')
-
-server = 'http://rest.%s.org' % options.species
-
-if options.id_type == 'gene_id':
-    ext = 'genetree/member/id'
-elif options.id_type == 'gene_tree_id':
-    ext = 'genetree/id'
-
-if options.format == 'json':
-    content_type = 'application/json'
-elif options.format == 'orthoxml':
-    content_type = 'text/x-orthoxml+xml'
-elif options.format == 'phyloxml':
-    content_type = 'text/x-phyloxml+xml'
-elif options.format == 'nh':
-    content_type = 'text/x-nh'
-headers = {'Content-Type': content_type}
-params = dict((k, getattr(options, k)) for k in ['sequence', 'aligned', 'cigar_line', 'nh_format'])
-r = requests.get(urljoin(server, '/'.join([ext, options.input])), params=params, headers=headers)
-
-if not r.ok:
-    r.raise_for_status()
-
-print r.text
b
diff -r f0018341e9f6 -r 98aba0efe77a get_genetree/get_genetree.xml
--- a/get_genetree/get_genetree.xml Thu Aug 11 14:29:07 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,146 +0,0 @@
-<tool id="get_genetree" name="Get gene tree by Ensembl ID" version="0.1.1">
-    <description>using REST API</description>
-    <macros>
-        <xml name="sequence_conditional">
-            <conditional name="sequence">
-                <param name="sequence_selector" type="select" label="Sequence type" help="The type of sequences to bring back. If 'None', no sequence is returned">
-                    <option value="protein" selected="true">Protein</option>
-                    <option value="cdna">cDNA</option>
-                    <option value="none">None</option>
-                </param>
-                <when value="protein">
-                    <param name="aligned" type="boolean" truevalue="1" falsevalue="0" label="Return the aligned sequences instead of the original ones" help="The original sequences contain no insertions" />
-                    <yield />
-                </when>
-                <when value="cdna">
-                    <param name="aligned" type="boolean" truevalue="1" falsevalue="0" label="Return the aligned sequences instead of the original ones" help="The original sequences contain no insertions" />
-                    <yield />
-                </when>
-                <when value="none" />
-            </conditional>
-        </xml>
-    </macros>
-    <requirements>
-        <requirement type="package" version="2.7">requests</requirement>
-    </requirements>
-    <command>
-<![CDATA[
-python $__tool_directory__/get_genetree.py
---id_type $input_type
--i "$input"
--g $species_selector
---format ${output_format.output_format_selector}
-#if $output_format.output_format_selector == 'json'
-    -s ${output_format.sequence.sequence_selector}
-    #if $output_format.sequence.sequence_selector != 'none'
-        -a ${output_format.sequence.aligned}
-        -c ${output_format.sequence.cigar}
-    #end if
-#elif $output_format.output_format_selector == 'phyloxml'
-    -s ${output_format.sequence.sequence_selector}
-    #if $output_format.sequence.sequence_selector != 'none'
-        -a ${output_format.sequence.aligned}
-    #end if
-#else
-    --nh_format $output_format.nh_format
-#end if
-> "$output"
-]]>
-    </command>
-
-    <inputs>
-         <param name="input_type" type="select" label="Input type">
-            <option value="gene_id" selected="true">Ensembl gene ID</option>
-            <option value="gene_tree_id">Ensembl genetree ID</option>
-        </param>
-        <param name="input" type="text" label="Ensembl ID">
-            <validator type="empty_field" />
-        </param>
-        <param name="species_selector" type="select" label="Select Species">
-            <option value="ensembl" selected="true">Vertebrates</option>
-            <option value="ensemblgenomes">Other species</option>
-        </param>
-        <conditional name="output_format">
-            <param name="output_format_selector" type="select" label="Output format">
-                <option value="json" selected="true">JSON</option>
-                <!-- <option value="orthoxml">OrthoXML</option> -->
-                <option value="phyloxml">phyloXML</option>
-                <option value="nh">Newick</option>
-            </param>
-            <when value="json">
-                <expand macro="sequence_conditional">
-                    <param name="cigar" type="boolean" truevalue="1" falsevalue="0" label="Output the sequences also in CIGAR format" />
-                </expand>
-            </when>
-            <when value="phyloxml">
-                <expand macro="sequence_conditional" />
-            </when>
-            <when value="nh">
-                <param name="nh_format" type="select" label="NH format" help="The format of a NH (New Hampshire) request">
-                    <option value="simple" selected="true">simple</option>
-                    <option value="full">full</option>
-                    <option value="display_label_composite">display_label_composite</option>
-                    <option value="species">species</option>
-                    <option value="species_short_name">species_short_name</option>
-                    <option value="ncbi_taxon">ncbi_taxon</option>
-                    <option value="ncbi_name">ncbi_name</option>
-                    <option value="njtree">njtree</option>
-                    <option value="phylip">phylip</option>
-                </param>
-            </when>
-        </conditional>
-    </inputs>
-
-    <outputs>
-        <data name="output" format="json" label="$(tool.name) on ${input_type} ${input}">
-            <change_format>
-                <when input="output_format.output_format_selector" value="phyloxml" format="phyloxml" />
-                <when input="output_format.output_format_selector" value="nh" format="nhx" />
-            </change_format>
-        </data>
-    </outputs>
-
-    <tests>
-        <test>
-            <param name="input_type" value="gene_id" />
-            <param name="input" value="ENSGALP00000027524" />
-            <param name="output_format_selector" value="json" />
-            <param name="sequence_selector" value="protein" />
-            <param name="aligned" value="0" />
-            <param name="cigar" value="0" />
-            <output name="output" file="genetree.json" ftype="json" />
-        </test>
-        <test>
-            <param name="input_type" value="gene_tree_id" />
-            <param name="input" value="ENSGT00390000003602" />
-            <param name="output_format_selector" value="json" />
-            <param name="sequence_selector" value="protein" />
-            <param name="aligned" value="0" />
-            <param name="cigar" value="0" />
-            <output name="output" file="genetree.json" fype="json" />
-        </test>
-        <test>
-            <param name="input_type" value="gene_tree_id" />
-            <param name="input" value="ENSGT00390000003602" />
-            <param name="output_format_selector" value="phyloxml" />
-            <param name="sequence_selector" value="protein" />
-            <param name="aligned" value="0" />
-            <output name="output" file="genetree.phyloxml" ftype="phyloxml" compare="sim_size" />
-        </test>
-    </tests>
-
-    <help>
-<![CDATA[
-**What it does**
-
-Retrieve a gene tree from Ensembl using its REST API.
-
-Uses the `"GET genetree/id"`_ and `"GET genetree/member/id"`_ API endpoint.
-
-.. _"GET genetree/id": http://rest.ensembl.org/documentation/info/genetree
-.. _"GET genetree/member/id": http://rest.ensembl.org/documentation/info/genetree_member_id
-]]>
-    </help>
-    <citations>
-    </citations>
-</tool>
b
diff -r f0018341e9f6 -r 98aba0efe77a get_sequences.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_sequences.py Mon Dec 12 07:47:26 2016 -0500
[
@@ -0,0 +1,46 @@
+# A simple tool to connect to the Ensembl server and retrieve sequences using
+# the Ensembl REST API.
+import json
+import optparse
+from itertools import islice
+from urlparse import urljoin
+
+import requests
+
+parser = optparse.OptionParser()
+parser.add_option('-i', '--input', help='List of Ensembl IDs')
+
+parser.add_option('-s', '--species', type='choice',
+                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
+                  help='Specify the genome databases for vertebrates and other eukaryotic species')
+
+parser.add_option('-t', '--type', type='choice',
+                  choices=['genomic', 'cds', 'cdna', 'protein'],
+                  default='genomic', help='Type of sequence')
+parser.add_option('--expand_3prime', type='int', default=0,
+                  help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type')
+parser.add_option('--expand_5prime', type='int', default=0,
+                  help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type')
+options, args = parser.parse_args()
+if options.input is None:
+    raise Exception('-i option must be specified')
+
+server = 'http://rest.%s.org' % options.species
+ext = 'sequence/id'
+
+headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'}
+params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime'])
+with open(options.input) as f:
+    # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl
+    while True:
+        ids = [line.strip() for line in islice(f, 50)]
+        if not ids:
+            break
+        data = {'ids': ids}
+        r = requests.post(urljoin(server, ext), params=params, headers=headers,
+                          data=json.dumps(data))
+
+        if not r.ok:
+            r.raise_for_status()
+
+        print r.text
b
diff -r f0018341e9f6 -r 98aba0efe77a get_sequences/get_sequences.py
--- a/get_sequences/get_sequences.py Thu Aug 11 14:29:07 2016 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,46 +0,0 @@
-# A simple tool to connect to the Ensembl server and retrieve sequences using
-# the Ensembl REST API.
-import json
-import optparse
-from itertools import islice
-from urlparse import urljoin
-
-import requests
-
-parser = optparse.OptionParser()
-parser.add_option('-i', '--input', help='List of Ensembl IDs')
-
-parser.add_option('-s', '--species', type='choice',
-                  choices=['ensembl', 'ensemblgenomes'], default='ensembl',
-                  help='Specify the genome databases for vertebrates and other eukaryotic species')
-
-parser.add_option('-t', '--type', type='choice',
-                  choices=['genomic', 'cds', 'cdna', 'protein'],
-                  default='genomic', help='Type of sequence')
-parser.add_option('--expand_3prime', type='int', default=0,
-                  help='Expand the sequence downstream of the sequence by this many basepairs. Only available when using genomic sequence type')
-parser.add_option('--expand_5prime', type='int', default=0,
-                  help='Expand the sequence upstream of the sequence by this many basepairs. Only available when using genomic sequence type')
-options, args = parser.parse_args()
-if options.input is None:
-    raise Exception('-i option must be specified')
-
-server = 'http://rest.%s.org' % options.species
-ext = 'sequence/id'
-
-headers = {'Content-Type': 'text/x-fasta', 'Accept': 'text/x-fasta'}
-params = dict((k, getattr(options, k)) for k in ['type', 'expand_3prime', 'expand_5prime'])
-with open(options.input) as f:
-    # Need to split the file in chunks of 50 lines because of the limit imposed by Ensembl
-    while True:
-        ids = [line.strip() for line in islice(f, 50)]
-        if not ids:
-            break
-        data = {'ids': ids}
-        r = requests.post(urljoin(server, ext), params=params, headers=headers,
-                          data=json.dumps(data))
-
-        if not r.ok:
-            r.raise_for_status()
-
-        print r.text
b
diff -r f0018341e9f6 -r 98aba0efe77a test-data/genetree.json
--- a/test-data/genetree.json Thu Aug 11 14:29:07 2016 -0400
+++ b/test-data/genetree.json Mon Dec 12 07:47:26 2016 -0500
[
b'@@ -1,1 +1,1 @@\n-{"tree":{"events":{"type":"speciation"},"branch_length":0,"children":[{"events":{"type":"speciation"},"branch_length":0.153275,"children":[{"events":{"type":"speciation"},"branch_length":0.155187,"children":[{"events":{"type":"speciation"},"branch_length":0.133192,"children":[{"events":{"type":"speciation"},"branch_length":0.201095,"children":[{"events":{"type":"speciation"},"branch_length":0.015782,"children":[{"events":{"type":"speciation"},"branch_length":0.217419,"children":[{"sequence":{"mol_seq":{"seq":"QLARDMQDMRIRKKKRQTIRPLPGSLFQKKSSGVARIPFKAAVNGKPPARYTAKPLCGLGVPLNVLEITSETAESFRFSLQHFVKLESLIDKGGIQLADGGWLIPTNDGTAGKEEFYRALCDTPGVDPKLMSEEWVYNHYRWIVWKQASMERSFPEEMGSLCLTPEQVLLQLKYRYDIEVDHSRRPALRKIMEKDDTAAKTLVLCVCGVVFRGSSPKNKSFGDISTPGADPKVENPCAVVWLTDGWYSIKAQLDGPLTSMLHRGRLPVGGKLIIHGAQLVGSENACSPLEAPVSLMLKICANSSRPARWDSKLGFHRDPRPFLLPVSSLYSSGGPVGCVDIIILRSYPILWMERKPEGGTVFRSGRAEEKEARRYNIHKEKAMEILFDKIKAEFEKEEKGNRKPQCRRTINGQNITSLQDGEELYEAVGDDPAFLEAHLTEKQVEVLQNYKRLVMEKQQAELQDRYRRAVESAEDGVGGCPKRDVAPVWRLCIADSMGHSGRVYQLSLWRPPSELQALLKEGCRYKVYNLTTLDSKKQGGNATVQLTATKKTQFEHLQGSEEWLSKHFQPRVATNFVRLQDPEFNPLCSEVDLTGYVITIIDGQGFSPAFYLADGKQNFVKVRCFSSFAQSGLEDVIKPRVLLALSNLQLRGQSTSPTPVVYAGDLTVFSTNPKEVHLQESFSQLKTLVQGQENFFVHAEEKLSQLMSDGLSAIASPAGQIQTPASTVKRRGDMTDVSSNIMVINKTSKVTCQQPGRSHRFSTPINRNSTAHSSAERNPSTIKKRKALDYLSHIPSPPPLSCLSTLSSPSVKKIFIPPRRTEIPGTLKTVKTPNQKPSNTPVDDQWVNDEELAMIDTQAL","is_aligned":0},"location":"scaffold_19:196046-199577","name":"brca2-201","id":[{"source":"EnsEMBL","accession":"ENSTRUP00000015030"}]},"branch_length":0.072273,"id":{"source":"EnsEMBL","accession":"ENSTRUG00000006177"},"confidence":{},"taxonomy":{"scientific_name":"Takifugu rubripes","id":31033}},{"sequence":{"mol_seq":{"seq":"VSFSSDTPRKPKAGSLSSEFTDRFLAQEALDCTKALLEDERLVDDPHMTGECLHRCPQFSLLVNLFVKPHTAVLIPEQPPLKRRLLEEFDRTDGSSRGSALNPEKCSPNGIMGDRRVFKCSVSFQPNITTPHRICSQKAERPVSFLSRRSGTNYVETSLPNTTPTKVSALRDSNEARLQKSNFIPPFIKNVKLDTPNSKTASTFVPPFKKSRNSSKTEEEEPKHHFIPPFTNPCATSSTKKHTAGHLHNVELARDMQGMRIRKKKRQTILPLPGSLFLKKSSGVTRIPLKSAVNGKPPARYTPKQLYGLGVPLNVLEITSETAGSFRFSLQQFVKLESLTDKGGIQLADGGWLIPRNDGTAGKEEFYRALCDTTGVDPKLISEEWVYNHYRWIVWKQASMERSFPEQLGSLCLTPEQVLLQLKYRYDIEVDQSRRPALRKIMERDDTAAKTLILCVCGVVSRGSSPQKQGLGGVAAPSSDPQVENPFAVVWLTDGWYSIKAQLDGPLTSMLNRGRLPVGGKLIIHGAQLVGSQDACSPLEAPESIMLKIFANSSRRARWDAKLGFYRDPRPFLLPVSSLYNSGGPVGCVDIIILRSYPTLWMERKPEGGTVFRSGRAEEKEARRYNVHKEKAMEILFDKIQAEFEKEERDNRKPRSRRRTIGDQDIKSLQDGEELYEAVGDDPAYLEAHLTEQQAETLQNYKRLLIEKKQAELQDRYRRAVETAEDGTGSCPKRDVAPVWRLSIADFMEKPGSVYQLNIWRPPSELQSLLKEGCRYKVYNLTTTDSKKQGGNTTVQLSGTKKTQFEDLQASEELLSTYFQPRVSATFIDLQDPEFHSLCGEVDLTGYVISIIDGQGFSPAFYLTDGKQNFVKVRCFSSFAQSGLEDVIKPSVLLALSNLQLRGQATSPTPVLYAGDLTVFSTNPKEVHLQESFSQLKTLVQ","is_aligned":0},"location":"16:4700614-4705074","name":"brca2-201","id":[{"source":"EnsEMBL","accession":"ENSTNIP00000002435"}]},"branch_length":0.113355,"id":{"source":"EnsEMBL","accession":"ENSTNIG00000016261"},"confidence":{},"taxonomy":{"scientific_name":"Tetraodon nigroviridis","id":99883}}],"confidence":{"bootstrap":100},"taxonomy":{"scientific_name":"Tetraodontidae","id":31031}},{"sequence":{"mol_seq":{"seq":"LPNVELAQDMQDMRIRKKKRQTIRPLPGSLFLTKTSGVTRIPLKAALVFLLQLYRHGVHQHVCEISSETAESFRFNLKQFIKREALLDGGGVQLADGGWLIPSKDGTAGKEEFYRALCDTPGVDPKLISDGWVDNHYRWVVWKQASMERSFPETMGGLCLTPEQVLLQLKYRYDVEVDHSRRPALRRITERDDTAAKTLVLCVCGVVSRSFDDSKTPRGADAGGGNPSAVVWLTDGWYAIRAQLDEPLTAMLRNGRVAVGSKLIVHGAQLVGSQEACSPLEAPEALMLKICANSSRPVRWDAKLGFHKDPRPFLLPLSCLYSSGGQVGCVDMIVLRSYPIQWMERKPEGGVVFRSVRAEEKEAKRFNGLKQKAMEILFAKIQDEFEKEDKGRTCDFTTQAISRQAIAGLQAGEELCEAVGEDPAHLEALLSEQQVETLNTYRRCVMEKKQAQLHDRFQRALESAEASEGSCPKREVTPVWRLGVADSRDQRGRVYQLNLWRPSSDLQALLKEGRRYKVYNLTTSDGKKHNGSSNVQLTGTKKTQFQDLQASREWLSTRFQPRVSACFVDLQNPEFQSLCGEVDLTGFVIQIVDGQGFSPAFYLADGELNFVKVRCFSSFAQSGLEDLVKPRVLLSLSNLQLRGQSASPTPVVYAGDLTVFSANPKDAHLQESLSQNKNLRQSQENFFLIAEETLSRLVQSDGRRPLSSPALHTRTPALATSMIQDTTASVKCVLLMQGASQQLVRSRGTFTPVSRKPPAANCSTEKDAGSVKRRRALNYLSHIPSPPPLLNLGSVASPCVNKTFNPPRRSGTPSTLKTVQTPAHKAQKVDSLVEDEWVNDEELAMIDTQAL","is_aligned":0},"location":"groupI:13362884-13366744","name'..b'EDISTSRNALEIRPELYPEGMCSNRASNGSGNNSEFTAGEGISININQSSLLTTGNVLKNLPSESSGHDVYSVTEHLSTVVKVKRYNDSGHFVNQNLAECNDNHVLSTQKNTANISNRNEDCTSLAPLSFSTASGKSVTVSHDSLQKARLMLSEAANDVTVDTSKQEAAYITPAIRKTEAEKEQNTVDDSDRVNANTFSFSTASGKKVNISGNSLKQVRAVCLSSDPKETSAALFNVEKSVFNEDVKDVSLLQPNVTMPKAVSFSTASGKTVQLSDESLKKARVIFSEIDTCPLMQQQTNESTVEEIVIGGGMTKSKQMPLTTEKVETTRKNNGTFGFNTASGKQVSVSESALQKVKDIFQEFDDPDNYEQNKSLVRLPVSSKIKESTPGTKRLVQTAGSSYKNDNLQCKAGNLRTFQDKQAGKKSLTYSEAAISPIESSVPIYEMQVMLKHTNNQACKYQPRVEVPLQDQRWQNILEIELPATCAPAFRETHNILFFGDLQHSTHFDICSLYSGKNPAVKHQLASHSKMQTLVISGRDSSGTLTLQFTLRIVILHTVNNQYSLNKQLFTFSSALRQVTCIPTQAHLHSKVKIFHQSLPIKSPDVASDSTSKSYSPTAAKETINCSSASKIPAKKFVPPFKKTVATLADNQSNSVQNGSSDGLIESIVYPKEDKVETICSSKDQFDDSDILQMTSNLRCSKDLQEMRIRKKLRQKIKPHPGSLYRLKMSHVKRISLQSAVAERCPTLYSREQLYRYGIVKNHIGVSSENALSFQFHCSNYFTKELLLSGNGVQLADGGWLIPTEQGNAGKEEIYRAFCDTPGVDPKLISAEWVHNHYRWIVWKLAAMEVRFPKTFACRCLTPERVLLQLKYRYDVEIDKSQRSAIKKIMERDDSPAKTLVLCIAKIISQGTRLPNACSNKTEPADSKESSAVIEVTDSWYGIKVLLDPCLTALLHKGRLFIGQKLIVHGAELIGSDDACSPLEAPESLMLKIAANSTRPVRWHTKLGYFKDPRPFCLHLSSLLSEGGVVGCVDVVIQRIYPMQWMEKMANGLYVFRNDRAEEREAEKHSANQQKKLEMLFSKIQAEFEQREVTCNRRKGLRRRSLNAQQMQTLQDGAEIYEAIQNESDPGYLESYLSAEQLKALNHHRQLLNDKKQALIQAEFRKAIECSEQDANGCTRRDVTPVWKLRIADYRNYETDAAYILNIWRPLPDVLSLLKEGCRYKMYHLAASTSKGKSLAADLQLTATKKTRFQQLQLSESILEQIYSPREVTDFSRFQEPLFSAPYAEVDLVGLIISIYKKTGAAPVVYISDESHNIVALKFWTDLGQLGLEEITKPRTYISASNLRWRSDCIEGIPTLYVGDLANISSNPKESHLQRAIQKLKLSVQNVQDFWNSSQTALMKTLQINSTDTTECSKNPTTPTWKSDVSARSGYLTPLHHSGKRLLNSVHTSDPQTENPGCSKEIQLKTCKKRKALDFLNRIPSPPPVTPVRPFVSPSLQKAFRPPRSCSVQKLGPETKGNTENVQGTTPECTKDLAKLEGEFVADEELAMINTQALLLGLEEEKKKTEQKTSRTAGKMTAHESPIENASPVPAQEQQTEEALNIPVGNSEKSYLCLRKRKRK","is_aligned":0},"location":"GL172716.1:1071058-1096238","name":"brca2-201","id":[{"source":"EnsEMBL","accession":"ENSXETP00000060681"}]},"branch_length":0.756151,"id":{"source":"EnsEMBL","accession":"ENSXETG00000017011"},"confidence":{},"taxonomy":{"scientific_name":"Xenopus tropicalis","common_name":"X.tropicalis","id":8364}}],"confidence":{"bootstrap":1},"taxonomy":{"scientific_name":"Tetrapoda","common_name":"Tetrapods","timetree_mya":371.2,"id":32523}},{"sequence":{"mol_seq":{"seq":"MEWEAVESVKALMRDDELTDAGLDASKDSLNRACRRQSGGNFRARKRMRLEQVSADEPPVKRQLLAEFDRTVENGHKSLQKPLICTPNGTLKDRRKFMYSVPLKPVVCGPWSNNSKTGQQVTKPSITLPGRGVETFQPKNHIAPSPVYDPPSNRRGPVFAPPFHGATFRGLQKPSASHTSSKTAKTFVPPFKMKASASHTVHFSSKVINTCEKILENLVYLKPSLASCNIFQSLEEMTANLQCARDLQEMRLRKKQRQNIRPQPGSLYLAKTSGVARVSLKAATGNQCPSSYSTEQLYVHGVGKSTLKVRSENAESFQFSCSDYFGKDVLLAGNGLKLADGGWLIPSDKGMVGKEEFYRALCDTPGVAPKLISESWVYNHYRWIVWKLAAMEAAFPKEFGNRCLTPERVLLQLKYRYDIEVDKCRRSTVKKIMERDDTAAKTLVLCISKLISVEDRFKQTKNKNEKGAEEARKEAVAGVIETTDGWYGIKVLLDPPLTVLVQRGRLSVGCKIITHGAEIIGSQDACTPLEAPECLMLKISANSTRPACWSAKLGFHRDPRPFPLPLASLFNDGGLVGCVDVVVVRLYPIQWMEKKSDGIFVFRNDRAEEREAQRQVENQQRKMESLFAKIQTEFEQKYEAKSKRRGQKAQKFSKQEIQALQDGAELNEAIENSMDPGYFEACLREEQLKVLHGHRQMLNEKKQAEFQAEFKKALESAEQEGKSCCKRGVTPVWKLRIVDYRKPSAAEYILNIWRPLADLHSLLKEGNRYRIYQLLASQSKGRTTTADIQLTATKKTQYQQFQSFPELISELYSPRKAVKFNMLMDPTFRPAYAEVDLVGYTISIEGKPGVAPVVYLSDESHNFVAIKVWTALNQLAVEDIVKPFSLIAASNLQWRSDSRSIIPMLYAGDLSIFSSNPKEGHLQEAFNQRRTAIQENISGTYLPPEKKNLHQESYKSCQYNTLNVLMNGNIHTQSPVLSRVHMGTSCAFLFLLPSPYPESKHTSPLITMKAGVKSMTFPGSAKLMPQASENQELDTPKNRKKKAALDYLCRIPSPPALTPIRSFVSSSLQKAFHPPRSCVKLQSGENPVVPTVGNNAVLGIQSKKDEGPAAFNEEDSVADEELAMINTQAFLVGLRRDKRPSLLDKTASLKGHVPSERFLEEKLLSVLKEQASSNSERNATSLENKSCDKSRTCVKPCEHSNDSIAEETSEIIPGCHGGESAVENQSKNSSLCHKKLQQKKRRKYY","is_aligned":0},"location":"JH127744.1:299190-332700","id":[{"source":"EnsEMBL","accession":"ENSLACP00000008815"}]},"branch_length":0.314542,"id":{"source":"EnsEMBL","accession":"ENSLACG00000007788"},"confidence":{},"taxonomy":{"scientific_name":"Latimeria chalumnae","common_name":"Coelacanth","id":7897}}],"confidence":{"bootstrap":1},"taxonomy":{"scientific_name":"Sarcopterygii","common_name":"Lobe-finned fish","timetree_mya":414.9,"id":8287}}],"confidence":{},"taxonomy":{"scientific_name":"Euteleostomi","common_name":"Bony vertebrates","timetree_mya":441,"id":117571}},"rooted":1,"id":"ENSGT00390000003602","type":"gene tree"}\n'
b
diff -r f0018341e9f6 -r 98aba0efe77a test-data/genetree.phyloxml
--- a/test-data/genetree.phyloxml Thu Aug 11 14:29:07 2016 -0400
+++ b/test-data/genetree.phyloxml Mon Dec 12 07:47:26 2016 -0500
b
b'@@ -4,171 +4,52 @@\n   <phylogeny rooted="true" type="gene tree">\n     <clade branch_length="0">\n       <taxonomy>\n+        <scientific_name>Euteleostomi</scientific_name>\n         <id>117571</id>\n-        <scientific_name>Euteleostomi</scientific_name>\n+        <common_name>Bony vertebrates</common_name>\n       </taxonomy>\n-      <clade branch_length="0.153275">\n-        <confidence type="bootstrap">95</confidence>\n+      <clade branch_length="0.149761">\n+        <confidence type="bootstrap">92</confidence>\n         <taxonomy>\n+          <scientific_name>Neopterygii</scientific_name>\n           <id>41665</id>\n-          <scientific_name>Neopterygii</scientific_name>\n+          <common_name>Ray-finned fishes</common_name>\n         </taxonomy>\n-        <clade branch_length="0.155187">\n-          <confidence type="bootstrap">19</confidence>\n+        <clade branch_length="0.148891">\n+          <confidence type="bootstrap">33</confidence>\n           <taxonomy>\n+            <scientific_name>Clupeocephala</scientific_name>\n             <id>186625</id>\n-            <scientific_name>Clupeocephala</scientific_name>\n+            <common_name>Teleost fishes</common_name>\n           </taxonomy>\n-          <clade branch_length="0.133192">\n-            <confidence type="bootstrap">18</confidence>\n+          <clade branch_length="0.181209">\n+            <confidence type="bootstrap">46</confidence>\n             <taxonomy>\n-              <id>123368</id>\n-              <scientific_name>Acanthomorphata</scientific_name>\n+              <scientific_name>Otophysi</scientific_name>\n+              <id>186626</id>\n+              <common_name>Teleost fishes</common_name>\n             </taxonomy>\n-            <clade branch_length="0.374304">\n-              <name>ENSGMOG00000009699</name>\n+            <clade branch_length="0.421267">\n+              <name>ENSAMXG00000013027</name>\n               <taxonomy>\n-                <id>8049</id>\n-                <scientific_name>Gadus morhua</scientific_name>\n+                <id>7994</id>\n+                <scientific_name>Astyanax mexicanus</scientific_name>\n+                <common_name>Cave fish</common_name>\n               </taxonomy>\n               <sequence>\n-                <accession source="Ensembl">ENSGMOP00000010385</accession>\n+                <accession source="Ensembl">ENSAMXP00000013440</accession>\n                 <name>brca2-201</name>\n-                <location>GeneScaffold_2233:16156-29802</location>\n-                <mol_seq is_aligned="0">LARDLQDMRLRKKKRQTVRPLPGSLFLAKASGGARIPLRAALRQLYQHGVHQPVWTVTAENAESFRLSFRRFFRWGSSVSRGVQLADGGWLVPRDDWTLGKEEFYRALCDSPGVDVKLLSQEWAYNHYRWVVWKLASMERSFPLTMASLWLNPEQILLQLKYRYDVEVDHSRRPALRKITERDDAAAKTLVLCVCGVVPGADQQPQGSHAPPPGVVWLTDGWYAIKAQLDAPLTAMLRRGGAGGKLVVYGAELVGSQDGCSPLEAPEGLMLKIGANSCRRARWDAKLGFQRDPRPFLLRLSSLFSTGGAVGCVDLLILRSYPVLWMEKKQDGVFVFRSGRAEEREARRFDDHNNKTMEALYAKIQADIQREDKGSARERNSGEELYEAFENDPAYLEACLNDQQLEVLQSYRRSVLEKRQAGLQERCRRALEQAQESQGGCPRRDVTPVWKLCVVDARAPPGYMLNVWRPPADLQAQLKEGARYRVYNLSVTAGKKRNPGASVQLTATSKTHFQEVQVGQDWLSDHFQARQAVHFQELQRPEVQSACGEVDLVGYVVTTADTHGTSPVVYLVDGDLNLVKVRCFSSLLQWGLEELVKPATLLALSNLQLSARRATTLPVLYASDLTAFSSNPREAHLQSSHSNADRXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXRRRALDYLSRMPSPPPLGPLGSLASPACVKKTFNPPRRSTTPAAVATTRQTPAHGPRVGPWQEEEWENDEGLAQIDTQVL</mol_seq>\n+                <location>KB882257.1:1212119-1232402</location>\n+                <mol_seq is_aligned="0">MIEDFFHQIETEELGPLSSDWFKELTAKASKDESFPGAVEHEASSRTEDGTFRAPQETPALESQMSSTPRLFRVRGPLSPDSVLGRSPNPAFQQQGMQTPLSTLPWTDSSPCLFGSAKESQRFEEDSEPLKKHDYFGLLDTPKSSLVQDSSAKRISESLGAQLHPDLSWSSNFNTPGAMSPTVILTKKDAQPSPVSFLKDKEVIIVRKLFPSLSKGTESTSEITSTAHNNASLTEENAQTKGPDESFDNVEGLWRQTVPDAINDSDVRDTVESVLDGAEDVLSIFFSNSSSALRRVKSKERTKKRVNGVSKDVKPAALATQHYTDRTGTATEAKSPPKNKDFSQWSPLSLSQVSDAKAKQDCTSDLANKHLNFESVNSDSGEDALKDSITQLGQFNTCEVEKQKSSPDSYPEKSQLRSSLLAASPALTFSRKPRKFVYQVQSPFPPTKENDLTGKHYREPFLTAENKKHIKDDPQTSDTVGDCLVHPKEPPVKQGNPGALNVDHGLDMTQLCNAFAEDFTQEIRSDAIKIDEVQTKAESVLHSDDGAEHLEFPANHKEASALAEESINLTSRSRMENTLSESLKHENGYPA'..b'IDNDKELLSHAKEESRKSNIKHSSKLINNENCGKTEHTSEHLVCQSNVSLPFIKTLGKNATILVSGNEKSQNQELQLDCLKCESTDVKHTPQKETINDNSKCNESSLSELSMKGFQTASGRNIMMSESSIQKARNIFAEEHEDSFTLRCNIQNTIQIPQPVNEPTQFPYVNLGPKPTTTSGWQEKNILRRSTEKGFMPGFCTAGGKKVSVSDNSLAKAHKLFQEECTFSKEGKLDEVKQNKLMNSEPLSLLTCESVLKQSDGFIEDISTSRNALEIRPELYPEGMCSNRASNGSGNNSEFTAGEGISININQSSLLTTGNVLKNLPSESSGHDVYSVTEHLSTVVKVKRYNDSGHFVNQNLAECNDNHVLSTQKNTANISNRNEDCTSLAPLSFSTASGKSVTVSHDSLQKARLMLSEAANDVTVDTSKQEAAYITPAIRKTEAEKEQNTVDDSDRVNANTFSFSTASGKKVNISGNSLKQVRAVCLSSDPKETSAALFNVEKSVFNEDVKDVSLLQPNVTMPKAVSFSTASGKTVQLSDESLKKARVIFSEIDTCPLMQQQTNESTVEEIVIGGGMTKSKQMPLTTEKVETTRKNNGTFGFNTASGKQVSVSESALQKVKDIFQEFDDPDNYEQNKSLVRLPVSSKIKESTPGTKRLVQTAGSSYKNDNLQCKAGNLRTFQDKQAGKKSLTYSEAAISPIESSVPIYEMQVMLKHTNNQACKYQPRVEVPLQDQRWQNILEIELPATCAPAFRETHNILFFGDLQHSTHFDICSLYSGKNPAVKHQLASHSKMQTLVISGRDSSGTLTLQFTLRIVILHTVNNQYSLNKQLFTFSSALRQVTCIPTQAHLHSKVKIFHQSLPIKSPDVASDSTSKSYSPTAAKETINCSSASKIPAKKFVPPFKKTVATLADNQSNSVQNGSSDGLIESIVYPKEDKVETICSSKDQFDDSDILQMTSNLRCSKDLQEMRIRKKLRQKIKPHPGSLYRLKMSHVKRISLQSAVAERCPTLYSREQLYRYGIVKNHIGVSSENALSFQFHCSNYFTKELLLSGNGVQLADGGWLIPTEQGNAGKEEIYRAFCDTPGVDPKLISAEWVHNHYRWIVWKLAAMEVRFPKTFACRCLTPERVLLQLKYRYDVEIDKSQRSAIKKIMERDDSPAKTLVLCIAKIISQGTRLPNACSNKTEPADSKESSAVIEVTDSWYGIKVLLDPCLTALLHKGRLFIGQKLIVHGAELIGSDDACSPLEAPESLMLKIAANSTRPVRWHTKLGYFKDPRPFCLHLSSLLSEGGVVGCVDVVIQRIYPMQWMEKMANGLYVFRNDRAEEREAEKHSANQQKKLEMLFSKIQAEFEQREVTCNRRKGLRRRSLNAQQMQTLQDGAEIYEAIQNESDPGYLESYLSAEQLKALNHHRQLLNDKKQALIQAEFRKAIECSEQDANGCTRRDVTPVWKLRIADYRNYETDAAYILNIWRPLPDVLSLLKEGCRYKMYHLAASTSKGKSLAADLQLTATKKTRFQQLQLSESILEQIYSPREVTDFSRFQEPLFSAPYAEVDLVGLIISIYKKTGAAPVVYISDESHNIVALKFWTDLGQLGLEEITKPRTYISASNLRWRSDCIEGIPTLYVGDLANISSNPKESHLQRAIQKLKLSVQNVQDFWNSSQTALMKTLQINSTDTTECSKNPTTPTWKSDVSARSGYLTPLHHSGKRLLNSVHTSDPQTENPGCSKEIQLKTCKKRKALDFLNRIPSPPPVTPVRPFVSPSLQKAFRPPRSCSVQKLGPETKGNTENVQGTTPECTKDLAKLEGEFVADEELAMINTQALLLGLEEEKKKTEQKTSRTAGKMTAHESPIENASPVPAQEQQTEEALNIPVGNSEKSYLCLRKRKRK</mol_seq>\n-            </sequence>\n-            <property datatype="xsd:string" ref="Compara:genome_db_name" applies_to="clade">xenopus_tropicalis</property>\n-          </clade>\n+        </clade>\n+        <clade branch_length="0.314542">\n+          <name>ENSLACG00000007788</name>\n+          <taxonomy>\n+            <id>7897</id>\n+            <scientific_name>Latimeria chalumnae</scientific_name>\n+            <common_name>Coelacanth</common_name>\n+          </taxonomy>\n+          <sequence>\n+            <accession source="Ensembl">ENSLACP00000008815</accession>\n+            <location>JH127744.1:299190-332700</location>\n+            <mol_seq is_aligned="0">MEWEAVESVKALMRDDELTDAGLDASKDSLNRACRRQSGGNFRARKRMRLEQVSADEPPVKRQLLAEFDRTVENGHKSLQKPLICTPNGTLKDRRKFMYSVPLKPVVCGPWSNNSKTGQQVTKPSITLPGRGVETFQPKNHIAPSPVYDPPSNRRGPVFAPPFHGATFRGLQKPSASHTSSKTAKTFVPPFKMKASASHTVHFSSKVINTCEKILENLVYLKPSLASCNIFQSLEEMTANLQCARDLQEMRLRKKQRQNIRPQPGSLYLAKTSGVARVSLKAATGNQCPSSYSTEQLYVHGVGKSTLKVRSENAESFQFSCSDYFGKDVLLAGNGLKLADGGWLIPSDKGMVGKEEFYRALCDTPGVAPKLISESWVYNHYRWIVWKLAAMEAAFPKEFGNRCLTPERVLLQLKYRYDIEVDKCRRSTVKKIMERDDTAAKTLVLCISKLISVEDRFKQTKNKNEKGAEEARKEAVAGVIETTDGWYGIKVLLDPPLTVLVQRGRLSVGCKIITHGAEIIGSQDACTPLEAPECLMLKISANSTRPACWSAKLGFHRDPRPFPLPLASLFNDGGLVGCVDVVVVRLYPIQWMEKKSDGIFVFRNDRAEEREAQRQVENQQRKMESLFAKIQTEFEQKYEAKSKRRGQKAQKFSKQEIQALQDGAELNEAIENSMDPGYFEACLREEQLKVLHGHRQMLNEKKQAEFQAEFKKALESAEQEGKSCCKRGVTPVWKLRIVDYRKPSAAEYILNIWRPLADLHSLLKEGNRYRIYQLLASQSKGRTTTADIQLTATKKTQYQQFQSFPELISELYSPRKAVKFNMLMDPTFRPAYAEVDLVGYTISIEGKPGVAPVVYLSDESHNFVAIKVWTALNQLAVEDIVKPFSLIAASNLQWRSDSRSIIPMLYAGDLSIFSSNPKEGHLQEAFNQRRTAIQENISGTYLPPEKKNLHQESYKSCQYNTLNVLMNGNIHTQSPVLSRVHMGTSCAFLFLLPSPYPESKHTSPLITMKAGVKSMTFPGSAKLMPQASENQELDTPKNRKKKAALDYLCRIPSPPALTPIRSFVSSSLQKAFHPPRSCVKLQSGENPVVPTVGNNAVLGIQSKKDEGPAAFNEEDSVADEELAMINTQAFLVGLRRDKRPSLLDKTASLKGHVPSERFLEEKLLSVLKEQASSNSERNATSLENKSCDKSRTCVKPCEHSNDSIAEETSEIIPGCHGGESAVENQSKNSSLCHKKLQQKKRRKYY</mol_seq>\n+          </sequence>\n+          <property datatype="xsd:string" ref="Compara:genome_db_name" applies_to="clade">latimeria_chalumnae</property>\n         </clade>\n       </clade>\n     </clade>\n'