Repository 'gafa'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/gafa

Changeset 0:af9f72ddf7f9 (2016-12-21)
Next changeset 1:fc8ca4ade638 (2017-02-20)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/GAFA/ commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
added:
GAFA.py
GAFA.xml
datatypes_conf.xml
gafa_datatypes.py
schema/gafa.mwb
schema/gafa.png
schema/gafa.svg
test-data/cigar.tabular
test-data/gene.json
test-data/test.gafa.sqlite
test-data/tree.nhx
b
diff -r 000000000000 -r af9f72ddf7f9 GAFA.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GAFA.py Wed Dec 21 07:31:50 2016 -0500
[
@@ -0,0 +1,120 @@
+from __future__ import print_function
+
+import json
+import optparse
+import sqlite3
+
+version = "0.1.0"
+
+
+def create_tables(conn):
+    cur = conn.cursor()
+    cur.execute('PRAGMA foreign_keys = ON')
+    cur.execute('''CREATE TABLE meta (
+        version VARCHAR)''')
+
+    cur.execute('INSERT INTO meta (version) VALUES (?)',
+                (version, ))
+
+    cur.execute('''CREATE TABLE gene_family (
+        gene_family_id INTEGER PRIMARY KEY,
+        gene_tree VARCHAR NOT NULL)''')
+
+    cur.execute('''CREATE TABLE gene (
+        gene_id VARCHAR PRIMARY KEY NOT NULL,
+        gene_symbol VARCHAR,
+        gene_json VARCHAR NOT NULL)''')
+    cur.execute('CREATE INDEX gene_symbol_index ON gene (gene_symbol)')
+
+    cur.execute('''CREATE TABLE transcript (
+        transcript_id VARCHAR PRIMARY KEY NOT NULL,
+        protein_id VARCHAR UNIQUE,
+        gene_id VARCHAR NOT NULL REFERENCES gene(gene_id))''')
+
+    cur.execute('''CREATE TABLE gene_family_member (
+        gene_family_id INTEGER NOT NULL REFERENCES gene_family(gene_family_id),
+        protein_id VARCHAR KEY NOT NULL REFERENCES transcript(protein_id),
+        alignment VARCHAR NOT NULL,
+        PRIMARY KEY (gene_family_id, protein_id))''')
+    conn.commit()
+
+
+def cigar_to_db(conn, i, fname):
+    cur = conn.cursor()
+    with open(fname) as f:
+        for element in f.readlines():
+            seq_id, cigar = element.rstrip('\n').split('\t')
+            # Trim seq_id by removing everything from the first underscore
+            seq_id = seq_id.split('_', 1)[0]
+
+            cur.execute('SELECT transcript_id, protein_id FROM transcript WHERE transcript_id=? OR protein_id=?',
+                        (seq_id, seq_id))
+            results = cur.fetchall()
+            if len(results) == 0:
+                raise Exception("Sequence id '%s' could not be found among the transcript and protein ids" % seq_id)
+            elif len(results) > 1:
+                raise Exception("Searching sequence id '%s' among the transcript and protein ids returned multiple results" % seq_id)
+            transcript_id, protein_id = results[0]
+            if protein_id is None:
+                print("Skipping transcript '%s' with no protein id" % transcript_id)
+            else:
+                cur.execute('INSERT INTO gene_family_member (gene_family_id, protein_id, alignment) VALUES (?, ?, ?)',
+                            (i, protein_id, cigar))
+                conn.commit()
+
+
+def newicktree_to_db(conn, i, fname):
+    with open(fname) as f:
+        tree = f.read().replace('\n', '')
+
+    cur = conn.cursor()
+    cur.execute('INSERT INTO gene_family (gene_family_id, gene_tree) VALUES (?, ?)',
+                (i, tree))
+    conn.commit()
+
+
+def gene_json_to_db(conn, fname):
+    with open(fname) as f:
+        all_genes_dict = json.load(f)
+
+    cur = conn.cursor()
+    for gene_dict in all_genes_dict.values():
+        gene_id = gene_dict['id']
+        gene_symbol = gene_dict.get('display_name', None)
+        cur.execute("INSERT INTO gene (gene_id, gene_symbol, gene_json) VALUES (?, ?, ?)",
+                    (gene_id, gene_symbol, json.dumps(gene_dict)))
+
+        if "Transcript" in gene_dict:
+            for transcript in gene_dict["Transcript"]:
+                transcript_id = transcript['id']
+                if 'Translation' in transcript and 'id' in transcript['Translation']:
+                    protein_id = transcript["Translation"]["id"]
+                else:
+                    protein_id = None
+                cur.execute("INSERT INTO transcript (transcript_id, protein_id, gene_id) VALUES (?, ?, ?)",
+                            (transcript_id, protein_id, gene_id))
+    conn.commit()
+
+
+def __main__():
+    parser = optparse.OptionParser()
+    parser.add_option('-t', '--tree', action='append', help='Gene tree files')
+    parser.add_option('-c', '--cigar', action='append', help='CIGAR alignments of CDS files in tabular format')
+    parser.add_option('-g', '--gene', help='Gene features file in JSON format')
+    parser.add_option('-o', '--output', help='Path of the output file')
+    options, args = parser.parse_args()
+    if args:
+        raise Exception('Use options to provide inputs')
+
+    conn = sqlite3.connect(options.output)
+    create_tables(conn)
+
+    gene_json_to_db(conn, options.gene)
+
+    for i, (tree, cigar) in enumerate(zip(options.tree, options.cigar), start=1):
+        newicktree_to_db(conn, i, tree)
+        cigar_to_db(conn, i, cigar)
+
+
+if __name__ == '__main__':
+    __main__()
b
diff -r 000000000000 -r af9f72ddf7f9 GAFA.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/GAFA.xml Wed Dec 21 07:31:50 2016 -0500
[
@@ -0,0 +1,45 @@
+<tool id="gafa" name="Gene Align and Family Aggregator" version="0.1.0">
+    <description>generates an SQLite database that can be visualised with Aequatus</description>
+    <command>
+<![CDATA[
+python '$__tool_directory__/GAFA.py'
+#for $dataset in $treeFile:
+    -t '$dataset'
+#end for
+#for $dataset in $alignmentFile:
+    -c '$dataset'
+#end for
+-g '$genesFile'
+-o '$outputFile'
+]]>
+    </command>
+    <inputs>
+        <param name="treeFile" type="data" format="nhx" multiple="true" label="Gene tree" help="Gene tree in Newick format, e.g. generated from 'TreeBeST best' or 'Get gene tree by Ensembl ID' tool" />
+        <param name="alignmentFile" type="data" format="tabular" multiple="true" label="CDS alignments" help="CDS alignments in tabular format generated by 'T-Coffee to CIGAR' tool" />
+        <param name="genesFile" type="data" format="json" label="Gene features" help="Gene features in JSON format generated by 'GFF3 to JSON' or 'Get Ensembl features by ID' tool" />
+    </inputs>
+    <outputs>
+        <data name="outputFile" format="gafa.sqlite" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="treeFile" ftype="nhx" value="tree.nhx" />
+            <param name="alignmentFile" ftype="tabular" value="cigar.tabular" />
+            <param name="genesFile" ftype="json" value="gene.json" />
+            <output name="outputFile" file="test.gafa.sqlite" compare="sim_size" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+Simple tool to generate an SQLite database that can be visualised with `Aequatus`_.
+
+Aequatus is an open-source homology browser developed with novel rendering approaches to visualise homologous, orthologous and paralogous gene structures.
+
+N.B.: The tool will modify the sequence identifiers found in the "CDS alignments" dataset by removing everything from the first underscore to the end of the string.
+
+.. _Aequatus: http://aequatus.earlham.ac.uk
+]]>
+    </help>
+    <citations>
+    </citations>
+</tool>
b
diff -r 000000000000 -r af9f72ddf7f9 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Wed Dec 21 07:31:50 2016 -0500
b
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<datatypes>
+    <datatype_files>
+        <datatype_file name="gafa_datatypes.py"/>
+    </datatype_files>
+    <registration>
+        <datatype extension="gafa.sqlite" type="galaxy.datatypes.gafa_datatypes:GAFASQLite" mimetype="application/octet-stream" display_in_upload="True" />
+    </registration>
+    <sniffers>
+        <sniffer type="galaxy.datatypes.gafa_datatypes:GAFASQLite"/>
+    </sniffers>
+</datatypes>
b
diff -r 000000000000 -r af9f72ddf7f9 gafa_datatypes.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gafa_datatypes.py Wed Dec 21 07:31:50 2016 -0500
[
@@ -0,0 +1,51 @@
+import logging
+
+from galaxy.datatypes.binary import Binary, SQlite
+from galaxy.datatypes.metadata import MetadataElement, MetadataParameter
+from galaxy.util import sqlite
+
+log = logging.getLogger(__name__)
+
+
+class GAFASQLite(SQlite):
+    """Class describing a GAFA SQLite database"""
+    MetadataElement(name='gafa_schema_version', default='0.1.0', param=MetadataParameter, desc='GAFA schema version',
+                    readonly=True, visible=True, no_value='0.1.0')
+    file_ext = 'gafa.sqlite'
+
+    def set_meta(self, dataset, overwrite=True, **kwd):
+        super(GAFASQLite, self).set_meta(dataset, overwrite=overwrite, **kwd)
+        try:
+            conn = sqlite.connect(dataset.file_name)
+            c = conn.cursor()
+            version_query = 'SELECT version FROM meta'
+            results = c.execute(version_query).fetchall()
+            if len(results) == 0:
+                raise Exception('version not found in meta table')
+            elif len(results) > 1:
+                raise Exception('Multiple versions found in meta table')
+            dataset.metadata.gafa_schema_version = results[0][0]
+        except Exception as e:
+            log.warn("%s, set_meta Exception: %s", self, e)
+
+    def sniff(self, filename):
+        if super(GAFASQLite, self).sniff(filename):
+            gafa_table_names = frozenset(['gene', 'gene_family', 'gene_family_member', 'meta', 'transcript'])
+            conn = sqlite.connect(filename)
+            c = conn.cursor()
+            tables_query = "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name"
+            results = c.execute(tables_query).fetchall()
+            found_table_names = frozenset(_[0] for _ in results)
+            return gafa_table_names <= found_table_names
+        return False
+
+
+# Since Binary.register_sniffable_binary_format() ignores the sniff order declared in datatypes_conf.xml and put TS datatypes at the end, instead of simply doing:
+# Binary.register_sniffable_binary_format("sqlite", "sqlite", SQlite)
+# we need to register specialized SQLite datatypes before SQlite
+for i, format_dict in enumerate(Binary.sniffable_binary_formats):
+    if format_dict['class'] == SQlite:
+        break
+else:
+    i += 1
+Binary.sniffable_binary_formats.insert(i, {'type': 'gafa.sqlite', 'ext': 'gafa.sqlite', 'class': GAFASQLite})
b
diff -r 000000000000 -r af9f72ddf7f9 schema/gafa.mwb
b
Binary file schema/gafa.mwb has changed
b
diff -r 000000000000 -r af9f72ddf7f9 schema/gafa.png
b
Binary file schema/gafa.png has changed
b
diff -r 000000000000 -r af9f72ddf7f9 schema/gafa.svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/schema/gafa.svg Wed Dec 21 07:31:50 2016 -0500
b
b'@@ -0,0 +1,766 @@\n+<?xml version="1.0" encoding="UTF-8"?>\n+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="1117.984252pt" height="782.900787pt" viewBox="0 0 1117.984252 782.900787" version="1.1">\n+<defs>\n+<g>\n+<symbol overflow="visible" id="glyph0-0">\n+<path style="stroke:none;" d="M 2.558594 -1.027344 C 2.710938 -1.195312 2.785156 -1.460938 2.785156 -1.828125 C 2.785156 -2.171875 2.714844 -2.429688 2.570312 -2.609375 C 2.425781 -2.789062 2.230469 -2.878906 1.988281 -2.878906 C 1.65625 -2.878906 1.425781 -2.722656 1.300781 -2.410156 C 1.234375 -2.246094 1.199219 -2.039062 1.199219 -1.796875 C 1.199219 -1.585938 1.234375 -1.398438 1.304688 -1.234375 C 1.433594 -0.929688 1.664062 -0.777344 1.996094 -0.777344 C 2.21875 -0.777344 2.40625 -0.859375 2.558594 -1.027344 Z M 2.234375 -3.625 C 2.464844 -3.527344 2.652344 -3.355469 2.792969 -3.097656 L 2.792969 -3.621094 L 3.714844 -3.621094 L 3.714844 -0.1875 C 3.714844 0.28125 3.636719 0.632812 3.476562 0.871094 C 3.207031 1.277344 2.691406 1.480469 1.921875 1.480469 C 1.460938 1.480469 1.082031 1.390625 0.789062 1.210938 C 0.496094 1.027344 0.335938 0.757812 0.304688 0.394531 L 1.335938 0.394531 C 1.363281 0.503906 1.40625 0.585938 1.464844 0.632812 C 1.566406 0.71875 1.738281 0.765625 1.980469 0.765625 C 2.320312 0.765625 2.550781 0.648438 2.664062 0.421875 C 2.738281 0.277344 2.777344 0.03125 2.777344 -0.316406 L 2.777344 -0.546875 C 2.6875 -0.394531 2.589844 -0.277344 2.484375 -0.199219 C 2.296875 -0.0546875 2.050781 0.015625 1.75 0.015625 C 1.285156 0.015625 0.914062 -0.148438 0.636719 -0.472656 C 0.359375 -0.800781 0.21875 -1.242188 0.21875 -1.800781 C 0.21875 -2.339844 0.351562 -2.789062 0.621094 -3.15625 C 0.890625 -3.523438 1.269531 -3.707031 1.761719 -3.707031 C 1.941406 -3.707031 2.101562 -3.679688 2.234375 -3.625 Z M 2.234375 -3.625 "/>\n+</symbol>\n+<symbol overflow="visible" id="glyph0-1">\n+<path style="stroke:none;" d="M 1.359375 -2.730469 C 1.238281 -2.59375 1.164062 -2.40625 1.132812 -2.171875 L 2.605469 -2.171875 C 2.589844 -2.421875 2.511719 -2.613281 2.375 -2.742188 C 2.238281 -2.871094 2.070312 -2.9375 1.871094 -2.9375 C 1.652344 -2.9375 1.480469 -2.867188 1.359375 -2.730469 Z M 2.683594 -3.546875 C 2.925781 -3.433594 3.125 -3.257812 3.28125 -3.011719 C 3.421875 -2.796875 3.515625 -2.550781 3.558594 -2.265625 C 3.582031 -2.097656 3.59375 -1.859375 3.585938 -1.546875 L 1.109375 -1.546875 C 1.121094 -1.183594 1.238281 -0.929688 1.453125 -0.785156 C 1.582031 -0.695312 1.738281 -0.648438 1.921875 -0.648438 C 2.117188 -0.648438 2.277344 -0.703125 2.398438 -0.8125 C 2.464844 -0.875 2.523438 -0.957031 2.574219 -1.0625 L 3.542969 -1.0625 C 3.515625 -0.847656 3.40625 -0.628906 3.207031 -0.410156 C 2.894531 -0.0585938 2.460938 0.121094 1.902344 0.121094 C 1.441406 0.121094 1.035156 -0.0273438 0.683594 -0.324219 C 0.332031 -0.621094 0.15625 -1.105469 0.15625 -1.773438 C 0.15625 -2.402344 0.316406 -2.882812 0.632812 -3.214844 C 0.949219 -3.550781 1.363281 -3.71875 1.871094 -3.71875 C 2.171875 -3.71875 2.441406 -3.660156 2.683594 -3.546875 Z M 2.683594 -3.546875 "/>\n+</symbol>\n+<symbol overflow="visible" id="glyph0-2">\n+<path style="stroke:none;" d="M 3.367188 -3.414062 C 3.605469 -3.21875 3.722656 -2.890625 3.722656 -2.4375 L 3.722656 0 L 2.753906 0 L 2.753906 -2.203125 C 2.753906 -2.394531 2.726562 -2.539062 2.675781 -2.640625 C 2.582031 -2.828125 2.40625 -2.917969 2.144531 -2.917969 C 1.824219 -2.917969 1.605469 -2.785156 1.484375 -2.511719 C 1.421875 -2.367188 1.390625 -2.183594 1.390625 -1.960938 L 1.390625 0 L 0.449219 0 L 0.449219 -3.613281 L 1.363281 -3.613281 L 1.363281 -3.085938 C 1.484375 -3.273438 1.597656 -3.40625 1.707031 -3.488281 C 1.902344 -3.632812 2.148438 -3.707031 2.449219 -3.707031 C 2.824219 -3.707031 3.128906 -3.609375 3.367188 -3.414062 Z M 3.367188 -3.414062 "/>\n+</symbol>\n+<symbol overflow="visible" id="glyph0-3">\n+<path style="stroke:none;" d="M 0.0703125 -2.914062 L 0.0703125 -3.585938 L 0.574219 -3.585938 L '..b'27 L 8.501248 124.498427 C 4.084635 124.498427 0.501736 120.915527 0.501736 116.498915 Z M 172.501573 116.498915 " transform="matrix(0.566929,0,0,0.566929,339.590551,68.031496)"/>\n+<path style="fill-rule:nonzero;fill:rgb(100%,100%,100%);fill-opacity:1;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(50%,50%,50%);stroke-opacity:1;stroke-miterlimit:10;" d="M 108.499837 315.501845 C 108.499837 311.078342 112.082737 307.502333 116.499349 307.502333 L 233.501682 307.502333 C 237.918294 307.502333 241.501194 311.078342 241.501194 315.501845 L 241.501194 350.497125 C 241.501194 354.920627 237.918294 358.496636 233.501682 358.496636 L 116.499349 358.496636 C 112.082737 358.496636 108.499837 354.920627 108.499837 350.497125 Z M 108.499837 315.501845 " transform="matrix(0.566929,0,0,0.566929,0,0)"/>\n+<path style="fill-rule:nonzero;fill:rgb(59.607843%,74.901961%,85.490196%);fill-opacity:1;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(70%,70%,70%);stroke-opacity:1;stroke-miterlimit:10;" d="M 0.499837 8.501845 C 0.499837 4.078342 4.082737 0.502333 8.499349 0.502333 L 125.501682 0.502333 C 129.918294 0.502333 133.501194 4.078342 133.501194 8.501845 L 133.501194 24.500868 L 0.499837 24.500868 Z M 0.499837 8.501845 " transform="matrix(0.566929,0,0,0.566929,61.228346,174.047244)"/>\n+<use xlink:href="#image212" transform="matrix(0.566929,0,0,0.566929,64.629921,176.314961)"/>\n+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">\n+  <use xlink:href="#glyph0-12" x="76.535156" y="183.078125"/>\n+  <use xlink:href="#glyph0-1" x="82.584246" y="183.078125"/>\n+  <use xlink:href="#glyph0-3" x="86.367833" y="183.078125"/>\n+  <use xlink:href="#glyph0-5" x="88.633335" y="183.078125"/>\n+</g>\n+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(40%,40%,40%);fill-opacity:1;" d="M 127.84375 178.867188 L 132.945312 178.867188 L 130.394531 183.402344 Z M 127.84375 178.867188 "/>\n+<use xlink:href="#image215" transform="matrix(0.566929,0,0,0.566929,63.496063,189.92126)"/>\n+<g style="fill:rgb(0%,0%,0%);fill-opacity:1;">\n+  <use xlink:href="#glyph1-41" x="70.867188" y="195.082031"/>\n+  <use xlink:href="#glyph1-1" x="73.985298" y="195.082031"/>\n+  <use xlink:href="#glyph1-29" x="77.453586" y="195.082031"/>\n+  <use xlink:href="#glyph1-16" x="79.530297" y="195.082031"/>\n+  <use xlink:href="#glyph1-4" x="82.648407" y="195.082031"/>\n+  <use xlink:href="#glyph1-20" x="84.033895" y="195.082031"/>\n+  <use xlink:href="#glyph1-2" x="87.502184" y="195.082031"/>\n+  <use xlink:href="#glyph1-6" x="90.970472" y="195.082031"/>\n+  <use xlink:href="#glyph1-7" x="92.703094" y="195.082031"/>\n+  <use xlink:href="#glyph1-8" x="96.862605" y="195.082031"/>\n+  <use xlink:href="#glyph1-9" x="101.022115" y="195.082031"/>\n+  <use xlink:href="#glyph1-10" x="105.525714" y="195.082031"/>\n+  <use xlink:href="#glyph1-11" x="110.029312" y="195.082031"/>\n+  <use xlink:href="#glyph1-8" x="114.532911" y="195.082031"/>\n+  <use xlink:href="#glyph1-9" x="118.692421" y="195.082031"/>\n+  <use xlink:href="#glyph1-12" x="123.19602" y="195.082031"/>\n+  <use xlink:href="#glyph1-42" x="125.27273" y="195.082031"/>\n+  <use xlink:href="#glyph1-14" x="128.741019" y="195.082031"/>\n+  <use xlink:href="#glyph1-15" x="132.209307" y="195.082031"/>\n+</g>\n+<path style="fill:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(0%,0%,0%);stroke-opacity:1;stroke-miterlimit:10;" d="M 115.864095 13.103027 " transform="matrix(0.566929,0,0,0.566929,68.598425,187.653543)"/>\n+<path style="fill-rule:nonzero;fill:rgb(59.607843%,74.901961%,85.490196%);fill-opacity:1;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke:rgb(70%,70%,70%);stroke-opacity:1;stroke-miterlimit:10;" d="M 133.501194 19.497125 L 133.501194 19.497125 C 133.501194 23.920627 129.918294 27.496636 125.501682 27.496636 L 8.499349 27.496636 C 4.082737 27.496636 0.499837 23.920627 0.499837 19.497125 Z M 133.501194 19.497125 " transform="matrix(0.566929,0,0,0.566929,61.228346,187.653543)"/>\n+</g>\n+</svg>\n'
b
diff -r 000000000000 -r af9f72ddf7f9 test-data/cigar.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cigar.tabular Wed Dec 21 07:31:50 2016 -0500
b
b'@@ -0,0 +1,64 @@\n+ENSLACP00000008815\t477DM4D2M2795D18M6D16M4D2MD17M8D14MD68MD28M2D32M2D13M27DM3D13M6D10M6D5M3D36MD139MD43M5D96M2D83MD16M2D27M2D66M5D102M12D49M4D124M7D6M3D16MD23MD26M12D68M4D85M\n+ENSXETP00000060681\t4D22MD23M3D43MD3MD17M5D19M4D5MD3M3D42M2D3M2D28M2D6M6D17MD2M3D14M11D23M4D4M3D15M3DM6D37MD7MD11M3D3MD24M4D8M3D19M4D9MD10M4D16M8D18MD14M82D17M12DM9D23M15D70M5D4M2D22M4D28M4D35MD44M29D2MD10M115D5M8D7M4D3M5D23M4D24MD8M7D103MD2M2D5MD21M2D44M5D42MDM3D64M6D11M2D75M8D34M5D9MD6M4D8MD33M8D7M7D14M11D15M15D3M12D8MD25M14D6M15D3M21D25M81DM5D10M5D16M162DM4D11M5D12MD21MD25M6D5M6D18M23D8MD5MD24M2D25M9D9M5D4M2D30M4D23M10D4M26D31M2D14M28DM10D10M38D13M59D17MD3M2D3M5D6M20D28M2D3M2D18M4D10MD27M5D19M4D31MD28M7D38M13D2M66D33M27D74MD24M2D5M10D31M6D16MD5MD17M8D13M8D2MD35M22D8MD11MD10M2D32M2D13M27DM3D14MD14M4D7M3D36MD139MD43M5D7M3D86M2D83MD16M2D27M2D67M4D102M12D49M4D37MD20M4D17M16D15MD13M7D6MD8M3D7MD23MD13MD3MD8M12D4MD11MD33MD9M17D41M19D4M16D\n+ENSOANP00000032170\t3946D47M2D14M2D27M497D6M113D\n+ENSOANP00000024376\t4D46M3D47MD18MD22M4D8M4D19M11D12M2D3MD38MD14M2D25M11D11M3D9M4D4M3D6MD16MD8MD28M10D11M3D3MD19M10D12MD17MD11MD10M4D15M9D18MD14M82D21M3D6M9D23MD2MD62M3D17M2D6M2D24M2D16MD2MD36MD10MD17MD26M29D2MD10M115D12MD7M4D4M4D24M3D18M8D7M7D2MD21M4D34M3D27M3D6MDMD9MD21M2D29MD47M14D2M3D42M4D17M7D11M2D7M30DM6D31M8D34M5D9MD6MD32MD12M8D7M8D13M11D14M16D3M12D8MD25M5D15M2D10M3D4M4D21M3D21M53D12MD12M2D13MD20M162DM4D11MD64M6D16MD13M13D17MD5MD24M2D25MD26M2D14MD113M2D6M2D14M8D3M8D3M2D57M4D22M39D22MD3M2D24M10D6M2D16M3DM2D26MD10MD6M2D19M5D10MD25M2D43MDM7D37M82D33MD17M4D37MD21M8D23M6D8M2D14MD13MD11MD5M6D16MD5MD17M8D14MD8MD37MD16M3D2MD17MD10M2D32M2D37M3DM3D14MD17M2D6M3D36MD139MD43M5D7M3D86M2D84M2D14M2D27M2D66M5D102M12D25M4D20M4D36MD40M10D3M2D45MD18MD23MD13MD3MD6M19D56M16D35M3D3M39D\n+ENSMEUP00000009812\tn+ENSSHAP00000012162\t4D4M19D23M3D47MD18MD22M4D9M3D19M11D12M2D3MD38MD14M2D5MD2M3D14M11D23M4D4M3D6MD12M6D7MD10M3D16MD7MD5MD5M3D3M2D23M4D13MD17MD11MD10M4D16MD6MD8M17D8M82D21M3D6M9D23MD2M3D29MD30M2D18M2D6M2D42M4D6MDM4D32MD2MD26M7D11M29D2MD10M115D6M7D13M6D23M4D42M2D20M4D34M3D27M3D8MD8M2D21M7D17MD13MD37M14D5M3D42M5D33M3D7M30DM6D31M8D34M5D7M3D18MD17M4D12M8D7M8D13M16D10M15D3M12D8MD25M5D15M2D10M3D4M4D44M54D6M7D12M5D10MD20M162DM4D28MD47M6D30M11D9M4D6MD30M2D25M3D24M2D4M2D24M2D9MD15M5D13M2D51M2D6M2D6M28DM10D19M18D36M4D9M24D31M2D2M32D28M2D4MD21MD17M2D19M5D19M4D12M3D7M3D33MDM7D38M13D4M57D40MD17M4D55M2D9MD12MD18M2D4M2D14MD13MD17M6D16MD5MD17M8D14MD8MD37MD16M3D2MD17M2D9M2D32M2D25M5D7M3DM3D14MD3MD13MD7M3D36MD139MD43M5D7M3D86M2D83MD16M2D95M5D102M12D25M4D20M4D37MD41M5D4M9D14MD12M7D6MD18MD23MD12MD13M12D4MD11MD27M113D\n+ENSMODP00000033276\t4D22MD23M3D47MD18MD22M4D9M3D19M11D12M2D3MD38MD14M2D5MD2M3D14M11D23M4D4M3D6MD11M7D7MD11M2D16MD7MD5MD5M3D3MD24M4D13MD17MD11MD10M4D23M2D7M17D8M82D21M3D6M9D23MD34MD30M2D18M2D6M2D24M2D16MDMD7MDM4D32MD2MD27MD16M29D2MD10M115D12MD13M6D23M4D42MD21M4D34M3D27M3D8MD9MD21M7D17MD13MD37M14D5M3D42M6D42M30DM6D31M8D34M5D9MD6M3D9MD20MD12M8D7M8D13M11D12M18D3M12D8MD25M5D15M2D10M3D5M3D11M4D31M52D12MD12M6D9MD5MD14M162DM4D28MD47M6D30M11D9M2D4M2D2MD5MD24M2D25M3D24M2D30M3D8MD15M10D61M2D6M2D6M46D12M18D35M5D9M24D31M2D2M32D28M2D4MD21MD8M3D28M4D19M4D13M2D12M3D28MDM7D8M3D26M14D4M57D40MD17M7D9MD42M2D9MD12MD18M2D4M2D14MD13MD17M6D16MD5MD17M8D14MD8MD37MD16M3D2MD17M2D9M2D59M5D7M3DM3D14MD3MD13MDMD5M3D36MD1'..b'3M9D35M3D3M19D20M\n+ENSLOCP00000009962\t3570DM9D3M3D36MD139MD42MDM3D97M2D83M3D14M2D19M2D6M2D68M4D101M12D49M4D36MD23M2D62M12DMD18MD18M3D2MD13MD3MD12MD6M21D18M119D\n+ENSDARP00000099674\t14D11M2D18M8D25MD17M5D17M22D2M4D9M6D20M7D15M3D53M2D8M3D13M7D28M4D4M3D6M19D18MD18MD13M17D20M4D8M3DM3D5M12D10M84D57M140D2M3D17M2D4M5D21M36D35MD11M5D28M29D13M115D4M28D23M4D24MD8M7D24M20D18M23D7M22D21M3D16M43DM3D13M14DM6D40M28D11M2D8M45D22M8D34M6D15M2D24M8D11M6D10M8D13M11D19M11D5M10D8MD17M2D94M53D61M162D16MD100M11D19MD5MD24M2D84M18D35M15D30M104D16M4D7MD9M24D25M42D27M3D32M6D27M5D19M4D8M7D43M8D16M108D29MD17M12D4M2D22M4D39MD24M2D14M2D12MD45MD17M8D14MD8MD37M4D7M3DM9D27M22D51M6D16MD16MD7M3D36MD139MD41M6D8M3D86M2D84M2D14M2D18M3D6M2D68M4D101M12D48M5D36MD42M15D14M3D13M19D13MD23MD12M6D12MD4M20D28M112D\n+ENSAMXP00000013440\t10D12M5D23M3D89M4D9M35D15MD53M2D8M3D13M11D14MD9M4D6MD6M19D18MD18MD13M9D28M4D5M13D2M12D11MD5M65D14M4D47M145DM3D17M2D6M2D22M4D17MDMD6M6D10M4D21MD21M10D13M29D13M115D4M37D14M4D15MD8MD8M7D4M7D13M20D31MD16M105DM3D13M14D47M33D6M2D8M36D12M10D9M8D37M3D8MD6M3D58M7D14M11D15M15D3M12D8MD25M5D15M15D5M3D21M8D12M83D14MD20M162DM17D15M7D14M2D21M23D11M28D8MD2M4D24M2D25M5D21M73D13M17D36M96D37M24D6M4D17M13D8M19D27M13D30M6D19M5D19M4D5M10D43M8D37M83D26M2D5MDM34D29MD22MD10M3D20M19DM2D12MD16M6D17MD5MD39MD8M2D36M3D12M9D16MD44MD25M19D16MD12M9D3M3D36MD139MD44M3D8M3D86M2D83M3D14M2D19M2D6M2D68M4D101M12D48M5D36MD23M23D7M2D32M9D4MD18MD23MD13MDM3D6M7D11M12D24M117D\n+ENSGMOP00000010385\t3588D34MD8M10D31MD89MD43M5D6M11D29M2D48M2D84M16DM2D18M2D6M2D64M3DM4D101M12D49M4D37MD41M69D8MD37MD12M12D4MD6M9D17M2DM117D\n+ENSONIP00000006940\tn+ENSPFOP00000001575\tn+ENSXMAP00000006983\t4D7M3D11M2D23M3D6M2D39M7D35M23D12M14D9M2D3MD53M2D8M3D13M7D28MDM35D6MD3M10D16MD7MD11M3D3M3D22M4D8M9D5M10D11MD5M65DM82D21M3D3M2DM9D23MD2M12D7M48DM4D11M2D6M2D22M56D15MD23M5D16M29D2M3D8M115D4M37DM6D7M4D15M2D7MD8M7D2MD6M2D13M13D25M22D8M22D9M2D10M2D16M87D35M16D3M9D11M2D8M36D21M18D39MD8MD52M8D7M12D9M11D53MD25MD19M3D8M12D46M27D37MD48M166D10M3D16MD47M6D60MD5MD24M2D25MD30MD27M2D46M2D56M2D14M8D3M8D22M15D51MD9M24D31M2D3M5D14M12D29M2D23M3D8M3D178M57D39MDM34D16M7D28M2D23M3D11M2D7M6DM2D12MD19M6D14MD5MD16M8D15MD21MD38M13D13MD11M3D30M2D60MD23M3D36MD139MD55M3D86M2D84M2D14M2D19M2D6M2D35MD32M4D101M12D49M4D36MD27M6D9M5DM9D13M5D12M7D6MD5M4D9MD23MD13MD3M3D5M12D5M12D28M113D\n+ENSGACP00000015199\t3583D39MD8M7D124MD40M7D8M3D86M2D83M3D14M2D19M2D6M2D66M5D102M12D49M4D36MD53M7D10M5D12M9D11M3D9MD23MD13MDM5D10M5D7M12D22M119D\n+ENSTNIP00000002435\t272D13M2514D9M465D6MD17M8D14M4D2MD16M9D14MD46M10D4M13D13MD10M4D56M24D11MD16M6D2M3D36MD139MD55M3D6MD79M2D84M2D14M2D19M2D6M2D66MDM4D101M12D49M4D36MDM338D\n+ENSTRUP00000015030\t3587D35MD139MD55M3D6MD79M2D83M3D14M2D19M2D6M2D66MDM4D101M12D49M4D36MD45M2D6M2D15M5D12M7D6MD3MDM4D9MD23MD12M6D7M12D5MD5M6D22M119D\n'
b
diff -r 000000000000 -r af9f72ddf7f9 test-data/gene.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene.json Wed Dec 21 07:31:50 2016 -0500
[
b'@@ -0,0 +1,1 @@\n+{"ENSTNIG00000016261":{"source":"ensembl","object_type":"Gene","logic_name":"ensembl","version":1,"species":"tetraodon_nigroviridis","description":"breast cancer 2, early onset [Source:ZFIN;Acc:ZDB-GENE-060510-3]","display_name":"brca2","assembly_name":"TETRAODON8","biotype":"protein_coding","end":4705074,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIG00000016261","Transcript":[{"source":"ensembl","object_type":"Transcript","logic_name":"ensembl","Exon":[{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4700679,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000057385","start":4700614},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701157,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000041338","start":4701103},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701424,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000031348","start":4701218},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701571,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000063263","start":4701502},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701608,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000054769","start":4701587},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4701940,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000041082","start":4701626},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4702349,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000031470","start":4702170},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4702609,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191315","start":4702422},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4702859,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191316","start":4702689},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4703307,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191317","start":4702938},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4703539,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191318","start":4703384},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4703769,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191319","start":4703622},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4703962,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191320","start":4703850},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4704239,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191321","start":4704041},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4704504,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191322","start":4704338},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4704734,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191323","start":4704596},{"object_type":"Exon","version":1,"species":"tetraodon_nigroviridis","assembly_name":"TETRAODON8","end":4705074,"seq_region_name":"16","db_type":"core","strand":1,"id":"ENSTNIE00000191324","start":4704818}],"Pa'..b'object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33824323,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067322","start":33824254},{"object_type":"Exon","version":2,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33825774,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067326","start":33825341},{"object_type":"Exon","version":2,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33826614,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067327","start":33826430},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33827951,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067328","start":33827764},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33833454,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067329","start":33833284},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33834207,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067330","start":33833856},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33837163,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000295952","start":33837008},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33837407,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000267997","start":33837263},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33838915,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067333","start":33838794},{"object_type":"Exon","version":2,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33840702,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067340","start":33840504},{"object_type":"Exon","version":2,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33841062,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067341","start":33840932},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33841095,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000227172","start":33841075},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33841317,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067342","start":33841179},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33850200,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067343","start":33849956},{"object_type":"Exon","version":1,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33851341,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067344","start":33851195},{"object_type":"Exon","version":2,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33852801,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067345","start":33852516},{"object_type":"Exon","version":2,"species":"cavia_porcellus","assembly_name":"cavPor3","end":33853154,"seq_region_name":"scaffold_6","db_type":"core","strand":1,"id":"ENSCPOE00000067347","start":33852814}],"Parent":"ENSCPOG00000005153","seq_region_name":"scaffold_6","db_type":"core","is_canonical":"1","strand":1,"id":"ENSCPOT00000005208","version":2,"species":"cavia_porcellus","assembly_name":"cavPor3","display_name":"BRCA2-201","end":33853154,"biotype":"protein_coding","Translation":{"object_type":"Translation","species":"cavia_porcellus","Parent":"ENSCPOT00000005208","end":33853154,"length":3313,"db_type":"core","id":"ENSCPOP00000004635","start":33778275},"start":33778275}],"start":33778275}}\n'
b
diff -r 000000000000 -r af9f72ddf7f9 test-data/test.gafa.sqlite
b
Binary file test-data/test.gafa.sqlite has changed
b
diff -r 000000000000 -r af9f72ddf7f9 test-data/tree.nhx
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tree.nhx Wed Dec 21 07:31:50 2016 -0500
b
@@ -0,0 +1,1 @@
+(((((((ENSTRUP00000015030:0.072273,ENSTNIP00000002435:0.113355):0.217419,ENSGACP00000015199:0.159219):0.015782,((ENSXMAP00000006983:0.045736,ENSPFOP00000001575:0.056524):0.320294,ENSONIP00000006940:0.283168):0.045551):0.201095,ENSGMOP00000010385:0.374304):0.133192,(ENSAMXP00000013440:0.462514,ENSDARP00000099674:0.551859):0.178712):0.155187,ENSLOCP00000009962:0.210044):0.153275,((((((((ENSMGAP00000015990:0.033173,ENSGALP00000027524:0.03556):0.083504,ENSAPLP00000007411:0.115288):0.028469,(ENSFALP00000008821:0.042194,ENSTGUP00000012130:0.064837):0.09653):0.300747,ENSPSIP00000012858:0.197004):0.039526,ENSACAP00000004459:0.35271):0.088637,((((((((((((((ENSBTAP00000001311:0.011479,ENSOARP00000011988:0.017264):0.045414,ENSSSCP00000022872:0.123054):0,ENSSSCP00000028073:0.069764):0.001255,ENSTTRP00000010004:0.034189):0.007047,ENSVPAP00000000821:0.052959):0.015307,ENSECAP00000013146:0.048224):0.000353,(((ENSAMEP00000009909:0.028288,ENSMPUP00000001928:0.031962):0.008616,ENSCAFP00000009557:0.043063):0.010565,ENSFCAP00000019777:0.056037):0.027617):0.000302,(ENSMLUP00000012516:0.064576,ENSPVAP00000000225:0.107453):0.018072):0.004506,(ENSEEUP00000008968:0.120512,ENSSARP00000002541:0.129791):0.016812):0.008809,((((((((((ENSP00000369497:0.001811,ENSPTRP00000009812:0.003163):0.000207,ENSGGOP00000015446:0.019894):0.004997,ENSPPYP00000005997:0.007654):0.002773,ENSNLEP00000001277:0.011887):0.003225,((ENSMMUP00000009432:0.001779,ENSPANP00000002726:0.00613):0.000661,ENSCSAP00000013938:0.004946):0.011844):0.008398,ENSCJAP00000034250:0.036115):0.032464,ENSTSYP00000000441:0.06932):0.004582,(ENSMICP00000010933:0.039893,ENSOGAP00000009477:0.07109):0.018724):0.005621,ENSTBEP00000013856:0.096277):0.00151,(((((ENSMUSP00000038576:0.034382,ENSRNOP00000001475:0.044347):0.156559,ENSDORP00000006609:0.117166):0.013586,ENSSTOP00000004979:0.081466):0.001026,ENSCPOP00000004635:0.137701):0.012286,(ENSOCUP00000014514:0.050845,ENSOPRP00000014082:0.126167):0.05329):0.004454):0.008989):0.011526,(ENSCHOP00000007822:0.056068,ENSDNOP00000034947:0.060711):0.028397):0.005552,((ENSLAFP00000002234:0.07083,ENSPCAP00000000440:0.210145):0.017597,ENSETEP00000003277:0.202287):0.047366):0.125005,((ENSMODP00000033276:0.091526,ENSSHAP00000012162:0.098879):0.013978,ENSMEUP00000009812:0.08857):0.153103):0.044879,(ENSOANP00000024376:0.007695,ENSOANP00000032170:0.039107):0.30595):0.104539):0.110316,ENSXETP00000060681:0.774548):0.1098,ENSLACP00000008815:0.318609):0.18077):0;