Repository 'gff3_to_json'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/gff3_to_json

Changeset 0:be6cec883b02 (2016-12-21)
Next changeset 1:befe6021e476 (2017-02-28)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty
added:
gff3_to_json.py
gff3_to_json.xml
test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.gff3
test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.json
test-data/ENSCAFT00000026349.gff
test-data/ENSMUST00000005671.gff
test-data/ENSMUST00000091291.gff
test-data/ENSPTRT00000013802.gff
test-data/ENSRNOT00000019267.gff
test-data/test.json
b
diff -r 000000000000 -r be6cec883b02 gff3_to_json.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_to_json.py Wed Dec 21 10:02:59 2016 -0500
[
b'@@ -0,0 +1,255 @@\n+from __future__ import print_function\n+\n+import json\n+import optparse\n+import sys\n+\n+cds_parent_dict = dict()\n+exon_parent_dict = dict()\n+five_prime_utr_parent_dict = dict()\n+gene_count = 0\n+gene_dict = dict()\n+transcript_dict = dict()\n+three_prime_utr_parent_dict = dict()\n+\n+\n+def feature_to_json(cols):\n+    d = {\n+        \'end\': int(cols[4]),\n+        \'start\': int(cols[3]),\n+    }\n+    for attr in cols[8].split(\';\'):\n+        if \'=\' in attr:\n+            (tag, value) = attr.split(\'=\')\n+            if tag == \'ID\':\n+                d[\'id\'] = value\n+            else:\n+                d[tag] = value\n+    if cols[6] == \'+\':\n+        d[\'strand\'] = 1\n+    elif cols[6] == \'-\':\n+        d[\'strand\'] = -1\n+    else:\n+        raise Exception("Unrecognized strand \'%s\'" % cols[6])\n+    return d\n+\n+\n+def gene_to_json(cols, species):\n+    global gene_count\n+    gene = feature_to_json(cols)\n+    gene.update({\n+        \'member_id\': gene_count,\n+        \'object_type\': \'Gene\',\n+        \'seq_region_name\': cols[0],\n+        \'species\': species,\n+        \'Transcript\': [],\n+    })\n+    gene_dict[gene[\'id\']] = gene\n+    gene_count = gene_count + 1\n+\n+\n+def transcript_to_json(cols, species):\n+    transcript = feature_to_json(cols)\n+    transcript.update({\n+        \'object_type\': \'Transcript\',\n+        \'seq_region_name\': cols[0],\n+        \'species\': species,\n+    })\n+    transcript_dict[transcript[\'id\']] = transcript\n+\n+\n+def exon_to_json(cols, species):\n+    exon = feature_to_json(cols)\n+    exon.update({\n+        \'length\': int(cols[4]) - int(cols[3]) + 1,\n+        \'object_type\': \'Exon\',\n+        \'seq_region_name\': cols[0],\n+        \'species\': species,\n+    })\n+    if \'id\' not in exon and \'Name\' in exon:\n+        exon[\'id\'] = exon[\'Name\']\n+\n+    if \'Parent\' in exon:\n+        for parent in exon[\'Parent\'].split(\',\'):\n+            if parent not in exon_parent_dict:\n+                exon_parent_dict[parent] = [exon]\n+            else:\n+                exon_parent_dict[parent].append(exon)\n+\n+\n+def five_prime_utr_to_json(cols):\n+    five_prime_utr = feature_to_json(cols)\n+    if \'Parent\' in five_prime_utr:\n+        for parent in five_prime_utr[\'Parent\'].split(\',\'):\n+            # the 5\' UTR can be split among multiple exons\n+            if parent not in five_prime_utr_parent_dict:\n+                five_prime_utr_parent_dict[parent] = [five_prime_utr]\n+            else:\n+                five_prime_utr_parent_dict[parent].append(five_prime_utr)\n+\n+\n+def three_prime_utr_to_json(cols):\n+    three_prime_utr = feature_to_json(cols)\n+    if \'Parent\' in three_prime_utr:\n+        for parent in three_prime_utr[\'Parent\'].split(\',\'):\n+            # the 3\' UTR can be split among multiple exons\n+            if parent not in three_prime_utr_parent_dict:\n+                three_prime_utr_parent_dict[parent] = [three_prime_utr]\n+            else:\n+                three_prime_utr_parent_dict[parent].append(three_prime_utr)\n+\n+\n+def cds_to_json(cols):\n+    cds = feature_to_json(cols)\n+    if \'id\' not in cds:\n+        if \'Name\' in cds:\n+            cds[\'id\'] = cds[\'Name\']\n+        elif \'Parent\' in cds:\n+            cds[\'id\'] = cds[\'Parent\']\n+    if \'Parent\' in cds:\n+        # At this point we are sure than \'id\' is in cds\n+        for parent in cds[\'Parent\'].split(\',\'):\n+            if parent not in cds_parent_dict:\n+                cds_parent_dict[parent] = [cds]\n+            else:\n+                cds_parent_dict[parent].append(cds)\n+\n+\n+def join_dicts():\n+    for parent, exon_list in exon_parent_dict.items():\n+        exon_list.sort(key=lambda _: _[\'start\'])\n+        if parent in transcript_dict:\n+            transcript_dict[parent][\'Exon\'] = exon_list\n+\n+    for transcript_id, transcript in transcript_dict.items():\n+        translation = {\n+            \'CDS\': [],\n+            \'id\': None,\n+            \'end\': transcript[\'end\'],\n+            \'object_type\': \'Translation\',\n+            \'species\': transcript[\'species\'],\n+            \'start\': transcript[\'start\''..b'one:\n+            if found_cds:\n+                if derived_translation_end < translation[\'end\']:\n+                    raise Exception("UTR overlaps with CDS")\n+            else:\n+                translation[\'end\'] = derived_translation_end\n+        if found_cds or derived_translation_start is not None or derived_translation_end is not None:\n+            transcript[\'Translation\'] = translation\n+\n+    for transcript in transcript_dict.values():\n+        if \'Parent\' in transcript:\n+            # A polycistronic transcript can have multiple parents\n+            for parent in transcript[\'Parent\'].split(\',\'):\n+                if parent in gene_dict:\n+                    gene_dict[parent][\'Transcript\'].append(transcript)\n+\n+\n+def merge_dicts(json_arg):\n+    with open(json_arg) as f:\n+        dict_from_json = json.load(f)\n+    gene_intersection = set(gene_dict.keys()) & set(dict_from_json.keys())\n+    if gene_intersection:\n+        raise Exception("JSON file \'%s\' contains information for genes \'%s\', which are also present in other files" % (json_arg, \', \'.join(gene_intersection)))\n+    gene_dict.update(dict_from_json)\n+\n+\n+def write_json(outfile=None, sort_keys=False):\n+    if outfile:\n+        with open(outfile, \'w\') as f:\n+            json.dump(gene_dict, f, sort_keys=sort_keys)\n+    else:\n+        print(json.dumps(gene_dict, indent=3, sort_keys=sort_keys))\n+\n+\n+def __main__():\n+    parser = optparse.OptionParser()\n+    parser.add_option(\'--gff3\', action=\'append\', default=[], help=\'GFF3 file to convert, in SPECIES:FILENAME format. Use multiple times to add more files\')\n+    parser.add_option(\'--json\', action=\'append\', default=[], help=\'JSON file to merge. Use multiple times to add more files\')\n+    parser.add_option(\'-s\', \'--sort\', action=\'store_true\', help=\'Sort the keys in the JSON output\')\n+    parser.add_option(\'-o\', \'--output\', help=\'Path of the output file. If not specified, will print on the standard output\')\n+    options, args = parser.parse_args()\n+\n+    if args:\n+        raise Exception(\'Use options to provide inputs\')\n+    for gff3_arg in options.gff3:\n+        try:\n+            (species, filename) = gff3_arg.split(\':\')\n+        except ValueError:\n+            raise Exception("Argument for --gff3 \'%s\' is not in the SPECIES:FILENAME format" % gff3_arg)\n+        with open(filename) as f:\n+            for i, line in enumerate(f):\n+                line = line.strip()\n+                if not line:\n+                    # skip empty lines\n+                    continue\n+                if line[0] == \'#\':\n+                    # skip comment lines\n+                    continue\n+                cols = line.split(\'\\t\')\n+                if len(cols) != 9:\n+                    raise Exception("Line %i in file \'%s\': \'%s\' does not have 9 columns" % (i, filename, line))\n+                feature_type = cols[2]\n+                try:\n+                    if feature_type == \'gene\':\n+                        gene_to_json(cols, species)\n+                    elif feature_type in (\'mRNA\', \'transcript\'):\n+                        transcript_to_json(cols, species)\n+                    elif feature_type == \'exon\':\n+                        exon_to_json(cols, species)\n+                    elif feature_type == \'five_prime_UTR\':\n+                        five_prime_utr_to_json(cols)\n+                    elif feature_type == \'three_prime_UTR\':\n+                        three_prime_utr_to_json(cols)\n+                    elif feature_type == \'CDS\':\n+                        cds_to_json(cols)\n+                    else:\n+                        print("Line %i in file \'%s\': \'%s\' is not an implemented feature type" % (i, filename, feature_type), file=sys.stderr)\n+                except Exception as e:\n+                    raise Exception("Line %i in file \'%s\': %s" % (i, filename, e))\n+    join_dicts()\n+\n+    for json_arg in options.json:\n+        merge_dicts(json_arg)\n+\n+    write_json(options.output, options.sort)\n+\n+\n+if __name__ == \'__main__\':\n+    __main__()\n'
b
diff -r 000000000000 -r be6cec883b02 gff3_to_json.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_to_json.xml Wed Dec 21 10:02:59 2016 -0500
[
@@ -0,0 +1,92 @@
+<tool id="gff3_to_json" name="GFF3 to JSON" version="0.1.1">
+    <description>converter</description>
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+    <command>
+<![CDATA[
+python $__tool_directory__/gff3_to_json.py
+#for $q in $queries
+    --gff3 "${q.genome}:${q.gff3_input}"
+#end for
+#if str($json) != 'None'
+    #for $v in $json
+        --json "$v"
+    #end for
+#end if
+$sort
+> "$output"
+]]>
+    </command>
+
+    <inputs>
+        <repeat name="queries" title="GFF3 dataset">
+            <param name="gff3_input" type="data" format="gff3" label="GFF3 dataset" />
+            <param name="genome" type="text" label="Genome name" help="Genome name without whitespaces or special characters">
+                <validator type="empty_field" />
+            </param>
+        </repeat>
+        <param name="json" type="data" format="json" multiple="true" optional="true" label="JSON datasets to merge" />
+        <param name="sort" type="boolean" truevalue="-s" falsevalue="" label="Sort the keys in the JSON output" help="Ensure reproducibility, but can slow down the JSON creation for big files" />
+    </inputs>
+
+    <outputs>
+         <data name="output" format="json" label="${tool.name} on ${on_string}" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="gff3_input" ftype="gff3" value="ENSCAFT00000026349.gff" />
+            <param name="genome" ftype="text" value="canisfamiliaris" />

+            <param name="gff3_input" ftype="gff3" value="ENSMUST00000005671.gff" />
+            <param name="genome" ftype="text" value="musmusculus" />
+
+            <param name="gff3_input" ftype="gff3" value="ENSMUST00000091291.gff" />
+            <param name="genome" ftype="text" value="musmusculus" />
+
+            <param name="gff3_input" ftype="gff3" value="ENSPTRT00000013802.gff" />
+            <param name="genome" ftype="text" value="pantroglodytes" />
+
+            <param name="gff3_input" ftype="gff3" value="ENSRNOT00000019267.gff" />
+            <param name="genome" value="rattusnorvegicus" />
+
+            <param name="sort" value="-s" />
+            <output name="output" file="test.json" ftype="json" />
+        </test>
+        <test>
+            <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.gff3" />
+            <param name="genome" value="caenorhabditiselegans" />
+            <param name="sort" value="-s" />
+            <output name="output" file="Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.json" ftype="json" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+**What it does**
+
+Simple tool to convert a set of GFF3 datasets into JSON format and to merge with other JSON files.
+
+Example GFF3 file::
+
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  gene            44968   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030;biotype=protein_coding
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  mRNA            44968   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1;Parent=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030.1;biotype=protein_coding;_AED=0.31
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  three_prime_utr 44968   46637   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.3utr1;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            44968   47432   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon1;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  CDS             46638   47432   .   -   0   ID=MYZPE13164_G006_v1.0_000000030.1.cds1;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            53325   53539   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon2;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  CDS             53325   53539   .   -   2   ID=MYZPE13164_G006_v1.0_000000030.1.cds2;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            54614   54719   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon3;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  CDS             54614   54719   .   -   0   ID=MYZPE13164_G006_v1.0_000000030.1.cds3;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  CDS             54852   55106   .   -   0   ID=MYZPE13164_G006_v1.0_000000030.1.cds4;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            54852   55117   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon4;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  five_prime_utr  55107   55117   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.5utr1;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  five_prime_utr  68851   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.5utr2;Parent=MYZPE13164_G006_v1.0_000000030.1
+    scaffold_0  MYZPE13164_Clone_G006_v1.0  exon            68851   69413   .   -   .   ID=MYZPE13164_G006_v1.0_000000030.1.exon5;Parent=MYZPE13164_G006_v1.0_000000030.1
+
+Warning: **Gene**, **mRNA** and **exon** features are mandatory, UTR and CDS are optional. Also, **ID** and **Parent** tags are needed to create relations.
+]]>
+    </help>
+    <citations>
+    </citations>
+</tool>
b
diff -r 000000000000 -r be6cec883b02 test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.gff3 Wed Dec 21 10:02:59 2016 -0500
[
@@ -0,0 +1,41 @@
+##gff-version 3
+#!genome-build WormBase WBcel235
+#!genome-version WBcel235
+#!genome-date 2012-12
+#!genome-build-accession NCBI:GCA_000002985.3
+#!genebuild-last-updated 2014-10
+I WormBase chromosome 1 15072434 . . . ID=chromosome:I;Alias=BX284601.5,NC_003279.8
+I WormBase gene 10413 16842 . + . ID=gene:WBGene00022276;Name=nlp-40;biotype=protein_coding;description=Neuropeptide-Like Protein  [Source:RefSeq peptide%3BAcc:NP_001293206];gene_id=WBGene00022276;logic_name=wormbase
+I WormBase gene 17483 26781 . - . ID=gene:WBGene00022278;Name=rcor-1;biotype=protein_coding;description=RCOR (REST CO-Repressor) homolog  [Source:RefSeq peptide%3BAcc:NP_001293207];gene_id=WBGene00022278;logic_name=wormbase
+I WormBase miRNA_gene 6054951 6055002 . - . ID=gene:WBGene00219261;Name=mir-4926;biotype=miRNA;gene_id=WBGene00219261;logic_name=wormbase_non_coding
+I WormBase mRNA 10413 16842 . + . ID=transcript:Y74C9A.2a.2;Name=Y74C9A.2a.2;Parent=gene:WBGene00022276;biotype=protein_coding;transcript_id=Y74C9A.2a.2
+I WormBase mRNA 17483 24796 . - . ID=transcript:Y74C9A.4d;Name=Y74C9A.4d;Parent=gene:WBGene00022278;biotype=protein_coding;transcript_id=Y74C9A.4d
+I WormBase transcript 6054951 6055002 . - . ID=transcript:C27A12.13;Name=C27A12.13;Parent=gene:WBGene00219261;biotype=pre_miRNA;transcript_id=C27A12.13
+I WormBase miRNA 6054982 6055002 . - . ID=transcript:C27A12.13a;Name=C27A12.13a;Parent=gene:WBGene00219261;biotype=miRNA;transcript_id=C27A12.13a
+I WormBase exon 10413 10585 . + . Name=Y74C9A.2a.2.e1;Parent=transcript:Y74C9A.2a.2;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Y74C9A.2a.2.e1;rank=1
+I WormBase exon 11618 11689 . + . Name=Y74C9A.2a.1.e1;Parent=transcript:Y74C9A.2a.2;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Y74C9A.2a.1.e1;rank=2
+I WormBase exon 14951 15160 . + . Name=Y74C9A.2a.1.e2;Parent=transcript:Y74C9A.2a.2;constitutive=0;ensembl_end_phase=1;ensembl_phase=1;exon_id=Y74C9A.2a.1.e2;rank=3
+I WormBase exon 16473 16842 . + . Name=Y74C9A.2a.1.e3;Parent=transcript:Y74C9A.2a.2;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Y74C9A.2a.1.e3;rank=4
+I WormBase exon 24651 24796 . - . Name=Y74C9A.4d.e1;Parent=transcript:Y74C9A.4d;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Y74C9A.4d.e1;rank=1
+I WormBase exon 21013 21136 . - . Name=Y74C9A.4d.e2;Parent=transcript:Y74C9A.4d;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Y74C9A.4d.e2;rank=2
+I WormBase exon 20848 20964 . - . Name=Y74C9A.4a.e8;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Y74C9A.4a.e8;rank=3
+I WormBase exon 20271 20478 . - . Name=Y74C9A.4a.e9;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=2;ensembl_phase=1;exon_id=Y74C9A.4a.e9;rank=4
+I WormBase exon 19015 19241 . - . Name=Y74C9A.4a.e10;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Y74C9A.4a.e10;rank=5
+I WormBase exon 18006 18115 . - . Name=Y74C9A.4a.e11;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Y74C9A.4a.e11;rank=6
+I WormBase exon 17483 17958 . - . Name=Y74C9A.4a.e12;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Y74C9A.4a.e12;rank=7
+I WormBase exon 6054982 6055002 . - . Name=C27A12.13a.e1;Parent=transcript:C27A12.13a;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=C27A12.13a.e1;rank=1
+I WormBase CDS 11641 11689 . + 0 ID=CDS:Y74C9A.2a.2;Parent=transcript:Y74C9A.2a.2;protein_id=Y74C9A.2a.2
+I WormBase CDS 14951 15160 . + 2 ID=CDS:Y74C9A.2a.2;Parent=transcript:Y74C9A.2a.2;protein_id=Y74C9A.2a.2
+I WormBase CDS 16473 16585 . + 2 ID=CDS:Y74C9A.2a.2;Parent=transcript:Y74C9A.2a.2;protein_id=Y74C9A.2a.2
+I WormBase CDS 21013 21127 . - 0 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d
+I WormBase CDS 20848 20964 . - 2 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d
+I WormBase CDS 20271 20478 . - 2 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d
+I WormBase CDS 19015 19241 . - 1 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d
+I WormBase CDS 18006 18115 . - 2 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d
+I WormBase CDS 17911 17958 . - 0 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d
+I WormBase five_prime_UTR 10413 10585 . + . Parent=transcript:Y74C9A.2a.2
+I WormBase five_prime_UTR 11618 11640 . + . Parent=transcript:Y74C9A.2a.2
+I WormBase three_prime_UTR 16586 16842 . + . Parent=transcript:Y74C9A.2a.2
+I WormBase five_prime_UTR 24651 24796 . - . Parent=transcript:Y74C9A.4d
+I WormBase five_prime_UTR 21128 21136 . - . Parent=transcript:Y74C9A.4d
+I WormBase three_prime_UTR 17483 17910 . - . Parent=transcript:Y74C9A.4d
b
diff -r 000000000000 -r be6cec883b02 test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.json Wed Dec 21 10:02:59 2016 -0500
[
b'@@ -0,0 +1,343 @@\n+{\n+   "gene:WBGene00022276": {\n+      "Name": "nlp-40", \n+      "Transcript": [\n+         {\n+            "Exon": [\n+               {\n+                  "Name": "Y74C9A.2a.2.e1", \n+                  "Parent": "transcript:Y74C9A.2a.2", \n+                  "constitutive": "0", \n+                  "end": 10585, \n+                  "ensembl_end_phase": "-1", \n+                  "ensembl_phase": "-1", \n+                  "exon_id": "Y74C9A.2a.2.e1", \n+                  "id": "Y74C9A.2a.2.e1", \n+                  "length": 173, \n+                  "object_type": "Exon", \n+                  "rank": "1", \n+                  "seq_region_name": "I", \n+                  "species": "caenorhabditiselegans", \n+                  "start": 10413, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "Y74C9A.2a.1.e1", \n+                  "Parent": "transcript:Y74C9A.2a.2", \n+                  "constitutive": "0", \n+                  "end": 11689, \n+                  "ensembl_end_phase": "1", \n+                  "ensembl_phase": "-1", \n+                  "exon_id": "Y74C9A.2a.1.e1", \n+                  "id": "Y74C9A.2a.1.e1", \n+                  "length": 72, \n+                  "object_type": "Exon", \n+                  "rank": "2", \n+                  "seq_region_name": "I", \n+                  "species": "caenorhabditiselegans", \n+                  "start": 11618, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "Y74C9A.2a.1.e2", \n+                  "Parent": "transcript:Y74C9A.2a.2", \n+                  "constitutive": "0", \n+                  "end": 15160, \n+                  "ensembl_end_phase": "1", \n+                  "ensembl_phase": "1", \n+                  "exon_id": "Y74C9A.2a.1.e2", \n+                  "id": "Y74C9A.2a.1.e2", \n+                  "length": 210, \n+                  "object_type": "Exon", \n+                  "rank": "3", \n+                  "seq_region_name": "I", \n+                  "species": "caenorhabditiselegans", \n+                  "start": 14951, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "Y74C9A.2a.1.e3", \n+                  "Parent": "transcript:Y74C9A.2a.2", \n+                  "constitutive": "0", \n+                  "end": 16842, \n+                  "ensembl_end_phase": "-1", \n+                  "ensembl_phase": "1", \n+                  "exon_id": "Y74C9A.2a.1.e3", \n+                  "id": "Y74C9A.2a.1.e3", \n+                  "length": 370, \n+                  "object_type": "Exon", \n+                  "rank": "4", \n+                  "seq_region_name": "I", \n+                  "species": "caenorhabditiselegans", \n+                  "start": 16473, \n+                  "strand": 1\n+               }\n+            ], \n+            "Name": "Y74C9A.2a.2", \n+            "Parent": "gene:WBGene00022276", \n+            "Translation": {\n+               "CDS": [\n+                  {\n+                     "Parent": "transcript:Y74C9A.2a.2", \n+                     "end": 11689, \n+                     "id": "CDS:Y74C9A.2a.2", \n+                     "protein_id": "Y74C9A.2a.2", \n+                     "start": 11641, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Parent": "transcript:Y74C9A.2a.2", \n+                     "end": 15160, \n+                     "id": "CDS:Y74C9A.2a.2", \n+                     "protein_id": "Y74C9A.2a.2", \n+                     "start": 14951, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Parent": "transcript:Y74C9A.2a.2", \n+                     "end": 16585, \n+                     "id": "CDS:Y74C9A.2a.2", \n+                     "protein_id": "Y74C9A.2a.2", \n+                     "start": 16473, \n+                     "strand": 1\n+                  }\n+               ], \n+               "end": 16585, \n+               "id": "C'..b'124, \n+                  "object_type": "Exon", \n+                  "rank": "2", \n+                  "seq_region_name": "I", \n+                  "species": "caenorhabditiselegans", \n+                  "start": 21013, \n+                  "strand": -1\n+               }, \n+               {\n+                  "Name": "Y74C9A.4d.e1", \n+                  "Parent": "transcript:Y74C9A.4d", \n+                  "constitutive": "0", \n+                  "end": 24796, \n+                  "ensembl_end_phase": "-1", \n+                  "ensembl_phase": "-1", \n+                  "exon_id": "Y74C9A.4d.e1", \n+                  "id": "Y74C9A.4d.e1", \n+                  "length": 146, \n+                  "object_type": "Exon", \n+                  "rank": "1", \n+                  "seq_region_name": "I", \n+                  "species": "caenorhabditiselegans", \n+                  "start": 24651, \n+                  "strand": -1\n+               }\n+            ], \n+            "Name": "Y74C9A.4d", \n+            "Parent": "gene:WBGene00022278", \n+            "Translation": {\n+               "CDS": [\n+                  {\n+                     "Parent": "transcript:Y74C9A.4d", \n+                     "end": 17958, \n+                     "id": "CDS:Y74C9A.4d", \n+                     "protein_id": "Y74C9A.4d", \n+                     "start": 17911, \n+                     "strand": -1\n+                  }, \n+                  {\n+                     "Parent": "transcript:Y74C9A.4d", \n+                     "end": 18115, \n+                     "id": "CDS:Y74C9A.4d", \n+                     "protein_id": "Y74C9A.4d", \n+                     "start": 18006, \n+                     "strand": -1\n+                  }, \n+                  {\n+                     "Parent": "transcript:Y74C9A.4d", \n+                     "end": 19241, \n+                     "id": "CDS:Y74C9A.4d", \n+                     "protein_id": "Y74C9A.4d", \n+                     "start": 19015, \n+                     "strand": -1\n+                  }, \n+                  {\n+                     "Parent": "transcript:Y74C9A.4d", \n+                     "end": 20478, \n+                     "id": "CDS:Y74C9A.4d", \n+                     "protein_id": "Y74C9A.4d", \n+                     "start": 20271, \n+                     "strand": -1\n+                  }, \n+                  {\n+                     "Parent": "transcript:Y74C9A.4d", \n+                     "end": 20964, \n+                     "id": "CDS:Y74C9A.4d", \n+                     "protein_id": "Y74C9A.4d", \n+                     "start": 20848, \n+                     "strand": -1\n+                  }, \n+                  {\n+                     "Parent": "transcript:Y74C9A.4d", \n+                     "end": 21127, \n+                     "id": "CDS:Y74C9A.4d", \n+                     "protein_id": "Y74C9A.4d", \n+                     "start": 21013, \n+                     "strand": -1\n+                  }\n+               ], \n+               "end": 21127, \n+               "id": "CDS:Y74C9A.4d", \n+               "object_type": "Translation", \n+               "species": "caenorhabditiselegans", \n+               "start": 17911\n+            }, \n+            "biotype": "protein_coding", \n+            "end": 24796, \n+            "id": "transcript:Y74C9A.4d", \n+            "object_type": "Transcript", \n+            "seq_region_name": "I", \n+            "species": "caenorhabditiselegans", \n+            "start": 17483, \n+            "strand": -1, \n+            "transcript_id": "Y74C9A.4d"\n+         }\n+      ], \n+      "biotype": "protein_coding", \n+      "description": "RCOR (REST CO-Repressor) homolog  [Source:RefSeq peptide%3BAcc:NP_001293207]", \n+      "end": 26781, \n+      "gene_id": "WBGene00022278", \n+      "id": "gene:WBGene00022278", \n+      "logic_name": "wormbase", \n+      "member_id": 1, \n+      "object_type": "Gene", \n+      "seq_region_name": "I", \n+      "species": "caenorhabditiselegans", \n+      "start": 17483, \n+      "strand": -1\n+   }\n+}\n'
b
diff -r 000000000000 -r be6cec883b02 test-data/ENSCAFT00000026349.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ENSCAFT00000026349.gff Wed Dec 21 10:02:59 2016 -0500
b
@@ -0,0 +1,46 @@
+7 Ensembl gene 41161397 41176758 . + . ID=ENSCAFG00000024151;Name=ENSCAFG00000024151;biotype=protein_coding
+7 Ensembl transcript 41161397 41176758 . + . ID=ENSCAFT00000026349;Name=ENSCAFT00000026349;Parent=ENSCAFG00000024151;biotype=protein_coding
+7 Ensembl exon 41161397 41161537 . + . Name=ENSCAFE00000180286;Parent=ENSCAFT00000026349
+7 Ensembl exon 41164527 41165078 . + 1 Name=ENSCAFE00000180288;Parent=ENSCAFT00000026349
+7 Ensembl exon 41166549 41166852 . + 1 Name=ENSCAFE00000180303;Parent=ENSCAFT00000026349
+7 Ensembl exon 41166945 41167087 . + 2 Name=ENSCAFE00000180319;Parent=ENSCAFT00000026349
+7 Ensembl exon 41167308 41167452 . + 1 Name=ENSCAFE00000180337;Parent=ENSCAFT00000026349
+7 Ensembl exon 41168280 41168494 . + 2 Name=ENSCAFE00000180353;Parent=ENSCAFT00000026349
+7 Ensembl exon 41168736 41168862 . + 1 Name=ENSCAFE00000180364;Parent=ENSCAFT00000026349
+7 Ensembl exon 41170224 41170462 . + 2 Name=ENSCAFE00000180374;Parent=ENSCAFT00000026349
+7 Ensembl exon 41170839 41171006 . + 1 Name=ENSCAFE00000180385;Parent=ENSCAFT00000026349
+7 Ensembl exon 41171100 41171295 . + 1 Name=ENSCAFE00000180394;Parent=ENSCAFT00000026349
+7 Ensembl exon 41171404 41171445 . + 2 Name=ENSCAFE00000180397;Parent=ENSCAFT00000026349
+7 Ensembl exon 41171521 41171741 . + 2 Name=ENSCAFE00000180399;Parent=ENSCAFT00000026349
+7 Ensembl exon 41171957 41172093 . + 1 Name=ENSCAFE00000180402;Parent=ENSCAFT00000026349
+7 Ensembl exon 41172179 41172341 . + 0 Name=ENSCAFE00000180416;Parent=ENSCAFT00000026349
+7 Ensembl exon 41172550 41172655 . + 1 Name=ENSCAFE00000180432;Parent=ENSCAFT00000026349
+7 Ensembl exon 41173257 41173309 . + 2 Name=ENSCAFE00000180442;Parent=ENSCAFT00000026349
+7 Ensembl exon 41173468 41173697 . + 1 Name=ENSCAFE00000180448;Parent=ENSCAFT00000026349
+7 Ensembl exon 41174196 41174306 . + 0 Name=ENSCAFE00000180452;Parent=ENSCAFT00000026349
+7 Ensembl exon 41174442 41174601 . + 0 Name=ENSCAFE00000180460;Parent=ENSCAFT00000026349
+7 Ensembl exon 41175094 41175223 . + 1 Name=ENSCAFE00000180475;Parent=ENSCAFT00000026349
+7 Ensembl exon 41175346 41175480 . + 2 Name=ENSCAFE00000180481;Parent=ENSCAFT00000026349
+7 Ensembl exon 41175700 41176758 . + 2 Name=ENSCAFE00000309098;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41161397 41161537 . + . Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41164527 41165078 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41166549 41166852 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41166945 41167087 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41167308 41167452 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41168280 41168494 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41168736 41168862 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41170224 41170462 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41170839 41171006 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41171100 41171295 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41171404 41171445 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41171521 41171741 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41171957 41172093 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41172179 41172341 . + 0 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41172550 41172655 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41173257 41173309 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41173468 41173697 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41174196 41174306 . + 0 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41174442 41174601 . + 0 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41175094 41175223 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41175346 41175480 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
+7 Ensembl CDS 41175700 41176758 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349
b
diff -r 000000000000 -r be6cec883b02 test-data/ENSMUST00000005671.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ENSMUST00000005671.gff Wed Dec 21 10:02:59 2016 -0500
b
@@ -0,0 +1,44 @@
+7 Ensembl gene 67952859 68226780 . + . ID=ENSMUSG00000005533;Name=ENSMUSG00000005533;biotype=protein_coding
+7 Ensembl transcript 67952859 68226780 . + . ID=ENSMUST00000005671;Name=ENSMUST00000005671;Parent=ENSMUSG00000005533;biotype=protein_coding
+7 Ensembl exon 67952859 67952952 . + 0 Name=ENSMUSE00000261783;Parent=ENSMUST00000005671
+7 Ensembl exon 68003810 68004355 . + 1 Name=ENSMUSE00000261773;Parent=ENSMUST00000005671
+7 Ensembl exon 68164993 68165305 . + 1 Name=ENSMUSE00000261767;Parent=ENSMUST00000005671
+7 Ensembl exon 68169897 68170048 . + 2 Name=ENSMUSE00000261754;Parent=ENSMUST00000005671
+7 Ensembl exon 68173231 68173375 . + 1 Name=ENSMUSE00000261748;Parent=ENSMUST00000005671
+7 Ensembl exon 68183344 68183558 . + 2 Name=ENSMUSE00000261743;Parent=ENSMUST00000005671
+7 Ensembl exon 68184731 68184857 . + 1 Name=ENSMUSE00000261736;Parent=ENSMUST00000005671
+7 Ensembl exon 68186990 68187228 . + 2 Name=ENSMUSE00000261728;Parent=ENSMUST00000005671
+7 Ensembl exon 68189573 68189740 . + 1 Name=ENSMUSE00000261725;Parent=ENSMUST00000005671
+7 Ensembl exon 68189946 68190150 . + 1 Name=ENSMUSE00000261719;Parent=ENSMUST00000005671
+7 Ensembl exon 68193347 68193630 . + 2 Name=ENSMUSE00000261714;Parent=ENSMUST00000005671
+7 Ensembl exon 68194969 68195105 . + 1 Name=ENSMUSE00000200356;Parent=ENSMUST00000005671
+7 Ensembl exon 68195590 68195749 . + 0 Name=ENSMUSE00000200354;Parent=ENSMUST00000005671
+7 Ensembl exon 68201244 68201346 . + 1 Name=ENSMUSE00000200353;Parent=ENSMUST00000005671
+7 Ensembl exon 68201902 68201972 . + 2 Name=ENSMUSE00000200349;Parent=ENSMUST00000005671
+7 Ensembl exon 68207251 68207480 . + 1 Name=ENSMUSE00000200360;Parent=ENSMUST00000005671
+7 Ensembl exon 68207763 68207873 . + 0 Name=ENSMUSE00000530700;Parent=ENSMUST00000005671
+7 Ensembl exon 68211994 68212156 . + 0 Name=ENSMUSE00000261691;Parent=ENSMUST00000005671
+7 Ensembl exon 68214919 68215048 . + 1 Name=ENSMUSE00000530699;Parent=ENSMUST00000005671
+7 Ensembl exon 68218402 68218536 . + 2 Name=ENSMUSE00000200350;Parent=ENSMUST00000005671
+7 Ensembl exon 68226020 68226780 . + 2 Name=ENSMUSE00000331286;Parent=ENSMUST00000005671
+7 Ensembl CDS 67952859 67952952 . + 0 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68003810 68004355 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68164993 68165305 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68169897 68170048 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68173231 68173375 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68183344 68183558 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68184731 68184857 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68186990 68187228 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68189573 68189740 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68189946 68190150 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68193347 68193630 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68194969 68195105 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68195590 68195749 . + 0 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68201244 68201346 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68201902 68201972 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68207251 68207480 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68207763 68207873 . + 0 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68211994 68212156 . + 0 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68214919 68215048 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68218402 68218536 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
+7 Ensembl CDS 68226020 68226780 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671
b
diff -r 000000000000 -r be6cec883b02 test-data/ENSMUST00000091291.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ENSMUST00000091291.gff Wed Dec 21 10:02:59 2016 -0500
b
@@ -0,0 +1,44 @@
+8 Ensembl gene 3150922 3279617 . - . ID=ENSMUSG00000005534;Name=ENSMUSG00000005534;biotype=protein_coding
+8 Ensembl transcript 3150922 3279617 . - . ID=ENSMUST00000091291;Name=ENSMUST00000091291;Parent=ENSMUSG00000005534;biotype=protein_coding
+8 Ensembl exon 3279029 3279617 . - 1 Name=ENSMUSE00000771349;Parent=ENSMUST00000091291
+8 Ensembl exon 3258383 3258934 . - 1 Name=ENSMUSE00001230539;Parent=ENSMUST00000091291
+8 Ensembl exon 3211379 3211700 . - 2 Name=ENSMUSE00000611294;Parent=ENSMUST00000091291
+8 Ensembl exon 3204630 3204778 . - 1 Name=ENSMUSE00000611293;Parent=ENSMUST00000091291
+8 Ensembl exon 3202890 3203034 . - 2 Name=ENSMUSE00000611267;Parent=ENSMUST00000091291
+8 Ensembl exon 3198061 3198275 . - 1 Name=ENSMUSE00000638453;Parent=ENSMUST00000091291
+8 Ensembl exon 3194795 3194921 . - 2 Name=ENSMUSE00000611287;Parent=ENSMUST00000091291
+8 Ensembl exon 3192546 3192802 . - 1 Name=ENSMUSE00000611286;Parent=ENSMUST00000091291
+8 Ensembl exon 3189125 3189292 . - 1 Name=ENSMUSE00000611285;Parent=ENSMUST00000091291
+8 Ensembl exon 3184951 3185152 . - 2 Name=ENSMUSE00000611282;Parent=ENSMUST00000091291
+8 Ensembl exon 3174614 3174888 . - 1 Name=ENSMUSE00000233977;Parent=ENSMUST00000091291
+8 Ensembl exon 3173480 3173619 . - 0 Name=ENSMUSE00000233970;Parent=ENSMUST00000091291
+8 Ensembl exon 3169709 3169868 . - 1 Name=ENSMUSE00000611280;Parent=ENSMUST00000091291
+8 Ensembl exon 3167502 3167604 . - 2 Name=ENSMUSE00000611279;Parent=ENSMUST00000091291
+8 Ensembl exon 3165518 3165585 . - 1 Name=ENSMUSE00000611278;Parent=ENSMUST00000091291
+8 Ensembl exon 3163237 3163481 . - 0 Name=ENSMUSE00000611277;Parent=ENSMUST00000091291
+8 Ensembl exon 3161681 3161791 . - 0 Name=ENSMUSE00000611276;Parent=ENSMUST00000091291
+8 Ensembl exon 3161339 3161498 . - 1 Name=ENSMUSE00000611274;Parent=ENSMUST00000091291
+8 Ensembl exon 3159453 3159582 . - 2 Name=ENSMUSE00000611273;Parent=ENSMUST00000091291
+8 Ensembl exon 3158696 3158830 . - 2 Name=ENSMUSE00000611272;Parent=ENSMUST00000091291
+8 Ensembl exon 3150922 3156023 . - . Name=ENSMUSE00000569243;Parent=ENSMUST00000091291
+8 Ensembl CDS 3279029 3279617 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3258383 3258934 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3211379 3211700 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3204630 3204778 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3202890 3203034 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3198061 3198275 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3194795 3194921 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3192546 3192802 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3189125 3189292 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3184951 3185152 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3174614 3174888 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3173480 3173619 . - 0 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3169709 3169868 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3167502 3167604 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3165518 3165585 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3163237 3163481 . - 0 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3161681 3161791 . - 0 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3161339 3161498 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3159453 3159582 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3158696 3158830 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
+8 Ensembl CDS 3150922 3156023 . - . Name=ENSMUSP00000088837;Parent=ENSMUST00000091291
b
diff -r 000000000000 -r be6cec883b02 test-data/ENSPTRT00000013802.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ENSPTRT00000013802.gff Wed Dec 21 10:02:59 2016 -0500
b
@@ -0,0 +1,44 @@
+15 Ensembl gene 96156951 96470984 . + . ID=ENSPTRG00000007489;Name=ENSPTRG00000007489;biotype=protein_coding
+15 Ensembl transcript 96156951 96470984 . + . ID=ENSPTRT00000013802;Name=ENSPTRT00000013802;Parent=ENSPTRG00000007489;biotype=protein_coding
+15 Ensembl exon 96156951 96157076 . + . Name=ENSPTRE00000075393;Parent=ENSPTRT00000013802
+15 Ensembl exon 96216402 96216947 . + 1 Name=ENSPTRE00000075406;Parent=ENSPTRT00000013802
+15 Ensembl exon 96403452 96403764 . + 1 Name=ENSPTRE00000075404;Parent=ENSPTRT00000013802
+15 Ensembl exon 96408876 96409024 . + 2 Name=ENSPTRE00000075400;Parent=ENSPTRT00000013802
+15 Ensembl exon 96411580 96411724 . + 1 Name=ENSPTRE00000075399;Parent=ENSPTRT00000013802
+15 Ensembl exon 96420797 96421011 . + 2 Name=ENSPTRE00000075389;Parent=ENSPTRT00000013802
+15 Ensembl exon 96423413 96423539 . + 1 Name=ENSPTRE00000075398;Parent=ENSPTRT00000013802
+15 Ensembl exon 96425147 96425385 . + 2 Name=ENSPTRE00000340876;Parent=ENSPTRT00000013802
+15 Ensembl exon 96428065 96428232 . + 1 Name=ENSPTRE00000075396;Parent=ENSPTRT00000013802
+15 Ensembl exon 96428770 96428974 . + 1 Name=ENSPTRE00000075392;Parent=ENSPTRT00000013802
+15 Ensembl exon 96434239 96434522 . + 2 Name=ENSPTRE00000075387;Parent=ENSPTRT00000013802
+15 Ensembl exon 96435965 96436101 . + 1 Name=ENSPTRE00000075390;Parent=ENSPTRT00000013802
+15 Ensembl exon 96436614 96436773 . + 0 Name=ENSPTRE00000075386;Parent=ENSPTRT00000013802
+15 Ensembl exon 96441658 96441760 . + 1 Name=ENSPTRE00000075407;Parent=ENSPTRT00000013802
+15 Ensembl exon 96442337 96442407 . + 2 Name=ENSPTRE00000075405;Parent=ENSPTRT00000013802
+15 Ensembl exon 96446912 96447141 . + 1 Name=ENSPTRE00000075403;Parent=ENSPTRT00000013802
+15 Ensembl exon 96447405 96447515 . + 0 Name=ENSPTRE00000075397;Parent=ENSPTRT00000013802
+15 Ensembl exon 96451284 96451443 . + 0 Name=ENSPTRE00000075394;Parent=ENSPTRT00000013802
+15 Ensembl exon 96455007 96455136 . + 1 Name=ENSPTRE00000423172;Parent=ENSPTRT00000013802
+15 Ensembl exon 96460655 96460789 . + 2 Name=ENSPTRE00000075402;Parent=ENSPTRT00000013802
+15 Ensembl exon 96469783 96470984 . + 2 Name=ENSPTRE00000075401;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96156951 96157076 . + . Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96216402 96216947 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96403452 96403764 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96408876 96409024 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96411580 96411724 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96420797 96421011 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96423413 96423539 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96425147 96425385 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96428065 96428232 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96428770 96428974 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96434239 96434522 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96435965 96436101 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96436614 96436773 . + 0 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96441658 96441760 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96442337 96442407 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96446912 96447141 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96447405 96447515 . + 0 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96451284 96451443 . + 0 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96455007 96455136 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96460655 96460789 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
+15 Ensembl CDS 96469783 96470984 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802
b
diff -r 000000000000 -r be6cec883b02 test-data/ENSRNOT00000019267.gff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ENSRNOT00000019267.gff Wed Dec 21 10:02:59 2016 -0500
b
@@ -0,0 +1,44 @@
+1 Ensembl gene 128924966 129206516 . + . ID=ENSRNOG00000014187;Name=ENSRNOG00000014187;biotype=protein_coding
+1 Ensembl transcript 128924966 129206516 . + . ID=ENSRNOT00000019267;Name=ENSRNOT00000019267;Parent=ENSRNOG00000014187;biotype=protein_coding
+1 Ensembl exon 128924966 128925059 . + 0 Name=ENSRNOE00000137027;Parent=ENSRNOT00000019267
+1 Ensembl exon 128978104 128978649 . + 1 Name=ENSRNOE00000135455;Parent=ENSRNOT00000019267
+1 Ensembl exon 129142521 129142833 . + 1 Name=ENSRNOE00000135507;Parent=ENSRNOT00000019267
+1 Ensembl exon 129147060 129147211 . + 2 Name=ENSRNOE00000135552;Parent=ENSRNOT00000019267
+1 Ensembl exon 129149693 129149837 . + 1 Name=ENSRNOE00000135606;Parent=ENSRNOT00000019267
+1 Ensembl exon 129159112 129159326 . + 2 Name=ENSRNOE00000135651;Parent=ENSRNOT00000019267
+1 Ensembl exon 129161168 129161294 . + 1 Name=ENSRNOE00000135775;Parent=ENSRNOT00000019267
+1 Ensembl exon 129162727 129162965 . + 2 Name=ENSRNOE00000137123;Parent=ENSRNOT00000019267
+1 Ensembl exon 129166863 129167030 . + 1 Name=ENSRNOE00000135864;Parent=ENSRNOT00000019267
+1 Ensembl exon 129167227 129167431 . + 1 Name=ENSRNOE00000137165;Parent=ENSRNOT00000019267
+1 Ensembl exon 129172248 129172531 . + 2 Name=ENSRNOE00000137211;Parent=ENSRNOT00000019267
+1 Ensembl exon 129174080 129174216 . + 1 Name=ENSRNOE00000136003;Parent=ENSRNOT00000019267
+1 Ensembl exon 129174682 129174841 . + 0 Name=ENSRNOE00000136044;Parent=ENSRNOT00000019267
+1 Ensembl exon 129180666 129180769 . + 1 Name=ENSRNOE00000136080;Parent=ENSRNOT00000019267
+1 Ensembl exon 129180773 129180803 . + 0 Name=ENSRNOE00000520956;Parent=ENSRNOT00000019267
+1 Ensembl exon 129186970 129187229 . + 1 Name=ENSRNOE00000136171;Parent=ENSRNOT00000019267
+1 Ensembl exon 129187512 129187622 . + 0 Name=ENSRNOE00000362520;Parent=ENSRNOT00000019267
+1 Ensembl exon 129191992 129192151 . + 0 Name=ENSRNOE00000136274;Parent=ENSRNOT00000019267
+1 Ensembl exon 129195281 129195410 . + 1 Name=ENSRNOE00000329647;Parent=ENSRNOT00000019267
+1 Ensembl exon 129198768 129198902 . + 2 Name=ENSRNOE00000136353;Parent=ENSRNOT00000019267
+1 Ensembl exon 129206132 129206516 . + 2 Name=ENSRNOE00000137303;Parent=ENSRNOT00000019267
+1 Ensembl CDS 128924966 128925059 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 128978104 128978649 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129142521 129142833 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129147060 129147211 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129149693 129149837 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129159112 129159326 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129161168 129161294 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129162727 129162965 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129166863 129167030 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129167227 129167431 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129172248 129172531 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129174080 129174216 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129174682 129174841 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129180666 129180769 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129180773 129180803 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129186970 129187229 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129187512 129187622 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129191992 129192151 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129195281 129195410 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129198768 129198902 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
+1 Ensembl CDS 129206132 129206516 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267
b
diff -r 000000000000 -r be6cec883b02 test-data/test.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test.json Wed Dec 21 10:02:59 2016 -0500
[
b'@@ -0,0 +1,2307 @@\n+{\n+   "ENSCAFG00000024151": {\n+      "Name": "ENSCAFG00000024151", \n+      "Transcript": [\n+         {\n+            "Exon": [\n+               {\n+                  "Name": "ENSCAFE00000180286", \n+                  "Parent": "ENSCAFT00000026349", \n+                  "end": 41161537, \n+                  "id": "ENSCAFE00000180286", \n+                  "length": 141, \n+                  "object_type": "Exon", \n+                  "seq_region_name": "7", \n+                  "species": "canisfamiliaris", \n+                  "start": 41161397, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "ENSCAFE00000180288", \n+                  "Parent": "ENSCAFT00000026349", \n+                  "end": 41165078, \n+                  "id": "ENSCAFE00000180288", \n+                  "length": 552, \n+                  "object_type": "Exon", \n+                  "seq_region_name": "7", \n+                  "species": "canisfamiliaris", \n+                  "start": 41164527, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "ENSCAFE00000180303", \n+                  "Parent": "ENSCAFT00000026349", \n+                  "end": 41166852, \n+                  "id": "ENSCAFE00000180303", \n+                  "length": 304, \n+                  "object_type": "Exon", \n+                  "seq_region_name": "7", \n+                  "species": "canisfamiliaris", \n+                  "start": 41166549, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "ENSCAFE00000180319", \n+                  "Parent": "ENSCAFT00000026349", \n+                  "end": 41167087, \n+                  "id": "ENSCAFE00000180319", \n+                  "length": 143, \n+                  "object_type": "Exon", \n+                  "seq_region_name": "7", \n+                  "species": "canisfamiliaris", \n+                  "start": 41166945, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "ENSCAFE00000180337", \n+                  "Parent": "ENSCAFT00000026349", \n+                  "end": 41167452, \n+                  "id": "ENSCAFE00000180337", \n+                  "length": 145, \n+                  "object_type": "Exon", \n+                  "seq_region_name": "7", \n+                  "species": "canisfamiliaris", \n+                  "start": 41167308, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "ENSCAFE00000180353", \n+                  "Parent": "ENSCAFT00000026349", \n+                  "end": 41168494, \n+                  "id": "ENSCAFE00000180353", \n+                  "length": 215, \n+                  "object_type": "Exon", \n+                  "seq_region_name": "7", \n+                  "species": "canisfamiliaris", \n+                  "start": 41168280, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "ENSCAFE00000180364", \n+                  "Parent": "ENSCAFT00000026349", \n+                  "end": 41168862, \n+                  "id": "ENSCAFE00000180364", \n+                  "length": 127, \n+                  "object_type": "Exon", \n+                  "seq_region_name": "7", \n+                  "species": "canisfamiliaris", \n+                  "start": 41168736, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "ENSCAFE00000180374", \n+                  "Parent": "ENSCAFT00000026349", \n+                  "end": 41170462, \n+                  "id": "ENSCAFE00000180374", \n+                  "length": 239, \n+                  "object_type": "Exon", \n+                  "seq_region_name": "7", \n+                  "species": "canisfamiliaris", \n+                  "start": 41170224, \n+                  "strand": 1\n+               }, \n+               {\n+                  "Name": "ENSCAFE00000180385", \n+                  "Parent": "ENSCAFT00000026349"'..b'00019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129174216, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129174080, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Name": "ENSRNOP00000019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129174841, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129174682, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Name": "ENSRNOP00000019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129180769, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129180666, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Name": "ENSRNOP00000019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129180803, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129180773, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Name": "ENSRNOP00000019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129187229, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129186970, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Name": "ENSRNOP00000019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129187622, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129187512, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Name": "ENSRNOP00000019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129192151, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129191992, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Name": "ENSRNOP00000019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129195410, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129195281, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Name": "ENSRNOP00000019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129198902, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129198768, \n+                     "strand": 1\n+                  }, \n+                  {\n+                     "Name": "ENSRNOP00000019267", \n+                     "Parent": "ENSRNOT00000019267", \n+                     "end": 129206516, \n+                     "id": "ENSRNOP00000019267", \n+                     "start": 129206132, \n+                     "strand": 1\n+                  }\n+               ], \n+               "end": 129206516, \n+               "id": "ENSRNOP00000019267", \n+               "object_type": "Translation", \n+               "species": "rattusnorvegicus", \n+               "start": 128924966\n+            }, \n+            "biotype": "protein_coding", \n+            "end": 129206516, \n+            "id": "ENSRNOT00000019267", \n+            "object_type": "Transcript", \n+            "seq_region_name": "1", \n+            "species": "rattusnorvegicus", \n+            "start": 128924966, \n+            "strand": 1\n+         }\n+      ], \n+      "biotype": "protein_coding", \n+      "end": 129206516, \n+      "id": "ENSRNOG00000014187", \n+      "member_id": 0, \n+      "object_type": "Gene", \n+      "seq_region_name": "1", \n+      "species": "rattusnorvegicus", \n+      "start": 128924966, \n+      "strand": 1\n+   }\n+}\n'