Next changeset 1:befe6021e476 (2017-02-28) |
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gff3_to_json commit 822c798d43a72724eeab174043fdaafcfdac845f-dirty |
added:
gff3_to_json.py gff3_to_json.xml test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.gff3 test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.json test-data/ENSCAFT00000026349.gff test-data/ENSMUST00000005671.gff test-data/ENSMUST00000091291.gff test-data/ENSPTRT00000013802.gff test-data/ENSRNOT00000019267.gff test-data/test.json |
b |
diff -r 000000000000 -r be6cec883b02 gff3_to_json.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3_to_json.py Wed Dec 21 10:02:59 2016 -0500 |
[ |
b'@@ -0,0 +1,255 @@\n+from __future__ import print_function\n+\n+import json\n+import optparse\n+import sys\n+\n+cds_parent_dict = dict()\n+exon_parent_dict = dict()\n+five_prime_utr_parent_dict = dict()\n+gene_count = 0\n+gene_dict = dict()\n+transcript_dict = dict()\n+three_prime_utr_parent_dict = dict()\n+\n+\n+def feature_to_json(cols):\n+ d = {\n+ \'end\': int(cols[4]),\n+ \'start\': int(cols[3]),\n+ }\n+ for attr in cols[8].split(\';\'):\n+ if \'=\' in attr:\n+ (tag, value) = attr.split(\'=\')\n+ if tag == \'ID\':\n+ d[\'id\'] = value\n+ else:\n+ d[tag] = value\n+ if cols[6] == \'+\':\n+ d[\'strand\'] = 1\n+ elif cols[6] == \'-\':\n+ d[\'strand\'] = -1\n+ else:\n+ raise Exception("Unrecognized strand \'%s\'" % cols[6])\n+ return d\n+\n+\n+def gene_to_json(cols, species):\n+ global gene_count\n+ gene = feature_to_json(cols)\n+ gene.update({\n+ \'member_id\': gene_count,\n+ \'object_type\': \'Gene\',\n+ \'seq_region_name\': cols[0],\n+ \'species\': species,\n+ \'Transcript\': [],\n+ })\n+ gene_dict[gene[\'id\']] = gene\n+ gene_count = gene_count + 1\n+\n+\n+def transcript_to_json(cols, species):\n+ transcript = feature_to_json(cols)\n+ transcript.update({\n+ \'object_type\': \'Transcript\',\n+ \'seq_region_name\': cols[0],\n+ \'species\': species,\n+ })\n+ transcript_dict[transcript[\'id\']] = transcript\n+\n+\n+def exon_to_json(cols, species):\n+ exon = feature_to_json(cols)\n+ exon.update({\n+ \'length\': int(cols[4]) - int(cols[3]) + 1,\n+ \'object_type\': \'Exon\',\n+ \'seq_region_name\': cols[0],\n+ \'species\': species,\n+ })\n+ if \'id\' not in exon and \'Name\' in exon:\n+ exon[\'id\'] = exon[\'Name\']\n+\n+ if \'Parent\' in exon:\n+ for parent in exon[\'Parent\'].split(\',\'):\n+ if parent not in exon_parent_dict:\n+ exon_parent_dict[parent] = [exon]\n+ else:\n+ exon_parent_dict[parent].append(exon)\n+\n+\n+def five_prime_utr_to_json(cols):\n+ five_prime_utr = feature_to_json(cols)\n+ if \'Parent\' in five_prime_utr:\n+ for parent in five_prime_utr[\'Parent\'].split(\',\'):\n+ # the 5\' UTR can be split among multiple exons\n+ if parent not in five_prime_utr_parent_dict:\n+ five_prime_utr_parent_dict[parent] = [five_prime_utr]\n+ else:\n+ five_prime_utr_parent_dict[parent].append(five_prime_utr)\n+\n+\n+def three_prime_utr_to_json(cols):\n+ three_prime_utr = feature_to_json(cols)\n+ if \'Parent\' in three_prime_utr:\n+ for parent in three_prime_utr[\'Parent\'].split(\',\'):\n+ # the 3\' UTR can be split among multiple exons\n+ if parent not in three_prime_utr_parent_dict:\n+ three_prime_utr_parent_dict[parent] = [three_prime_utr]\n+ else:\n+ three_prime_utr_parent_dict[parent].append(three_prime_utr)\n+\n+\n+def cds_to_json(cols):\n+ cds = feature_to_json(cols)\n+ if \'id\' not in cds:\n+ if \'Name\' in cds:\n+ cds[\'id\'] = cds[\'Name\']\n+ elif \'Parent\' in cds:\n+ cds[\'id\'] = cds[\'Parent\']\n+ if \'Parent\' in cds:\n+ # At this point we are sure than \'id\' is in cds\n+ for parent in cds[\'Parent\'].split(\',\'):\n+ if parent not in cds_parent_dict:\n+ cds_parent_dict[parent] = [cds]\n+ else:\n+ cds_parent_dict[parent].append(cds)\n+\n+\n+def join_dicts():\n+ for parent, exon_list in exon_parent_dict.items():\n+ exon_list.sort(key=lambda _: _[\'start\'])\n+ if parent in transcript_dict:\n+ transcript_dict[parent][\'Exon\'] = exon_list\n+\n+ for transcript_id, transcript in transcript_dict.items():\n+ translation = {\n+ \'CDS\': [],\n+ \'id\': None,\n+ \'end\': transcript[\'end\'],\n+ \'object_type\': \'Translation\',\n+ \'species\': transcript[\'species\'],\n+ \'start\': transcript[\'start\''..b'one:\n+ if found_cds:\n+ if derived_translation_end < translation[\'end\']:\n+ raise Exception("UTR overlaps with CDS")\n+ else:\n+ translation[\'end\'] = derived_translation_end\n+ if found_cds or derived_translation_start is not None or derived_translation_end is not None:\n+ transcript[\'Translation\'] = translation\n+\n+ for transcript in transcript_dict.values():\n+ if \'Parent\' in transcript:\n+ # A polycistronic transcript can have multiple parents\n+ for parent in transcript[\'Parent\'].split(\',\'):\n+ if parent in gene_dict:\n+ gene_dict[parent][\'Transcript\'].append(transcript)\n+\n+\n+def merge_dicts(json_arg):\n+ with open(json_arg) as f:\n+ dict_from_json = json.load(f)\n+ gene_intersection = set(gene_dict.keys()) & set(dict_from_json.keys())\n+ if gene_intersection:\n+ raise Exception("JSON file \'%s\' contains information for genes \'%s\', which are also present in other files" % (json_arg, \', \'.join(gene_intersection)))\n+ gene_dict.update(dict_from_json)\n+\n+\n+def write_json(outfile=None, sort_keys=False):\n+ if outfile:\n+ with open(outfile, \'w\') as f:\n+ json.dump(gene_dict, f, sort_keys=sort_keys)\n+ else:\n+ print(json.dumps(gene_dict, indent=3, sort_keys=sort_keys))\n+\n+\n+def __main__():\n+ parser = optparse.OptionParser()\n+ parser.add_option(\'--gff3\', action=\'append\', default=[], help=\'GFF3 file to convert, in SPECIES:FILENAME format. Use multiple times to add more files\')\n+ parser.add_option(\'--json\', action=\'append\', default=[], help=\'JSON file to merge. Use multiple times to add more files\')\n+ parser.add_option(\'-s\', \'--sort\', action=\'store_true\', help=\'Sort the keys in the JSON output\')\n+ parser.add_option(\'-o\', \'--output\', help=\'Path of the output file. If not specified, will print on the standard output\')\n+ options, args = parser.parse_args()\n+\n+ if args:\n+ raise Exception(\'Use options to provide inputs\')\n+ for gff3_arg in options.gff3:\n+ try:\n+ (species, filename) = gff3_arg.split(\':\')\n+ except ValueError:\n+ raise Exception("Argument for --gff3 \'%s\' is not in the SPECIES:FILENAME format" % gff3_arg)\n+ with open(filename) as f:\n+ for i, line in enumerate(f):\n+ line = line.strip()\n+ if not line:\n+ # skip empty lines\n+ continue\n+ if line[0] == \'#\':\n+ # skip comment lines\n+ continue\n+ cols = line.split(\'\\t\')\n+ if len(cols) != 9:\n+ raise Exception("Line %i in file \'%s\': \'%s\' does not have 9 columns" % (i, filename, line))\n+ feature_type = cols[2]\n+ try:\n+ if feature_type == \'gene\':\n+ gene_to_json(cols, species)\n+ elif feature_type in (\'mRNA\', \'transcript\'):\n+ transcript_to_json(cols, species)\n+ elif feature_type == \'exon\':\n+ exon_to_json(cols, species)\n+ elif feature_type == \'five_prime_UTR\':\n+ five_prime_utr_to_json(cols)\n+ elif feature_type == \'three_prime_UTR\':\n+ three_prime_utr_to_json(cols)\n+ elif feature_type == \'CDS\':\n+ cds_to_json(cols)\n+ else:\n+ print("Line %i in file \'%s\': \'%s\' is not an implemented feature type" % (i, filename, feature_type), file=sys.stderr)\n+ except Exception as e:\n+ raise Exception("Line %i in file \'%s\': %s" % (i, filename, e))\n+ join_dicts()\n+\n+ for json_arg in options.json:\n+ merge_dicts(json_arg)\n+\n+ write_json(options.output, options.sort)\n+\n+\n+if __name__ == \'__main__\':\n+ __main__()\n' |
b |
diff -r 000000000000 -r be6cec883b02 gff3_to_json.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gff3_to_json.xml Wed Dec 21 10:02:59 2016 -0500 |
[ |
@@ -0,0 +1,92 @@ +<tool id="gff3_to_json" name="GFF3 to JSON" version="0.1.1"> + <description>converter</description> + <stdio> + <exit_code range="1:" level="fatal" /> + </stdio> + <command> +<![CDATA[ +python $__tool_directory__/gff3_to_json.py +#for $q in $queries + --gff3 "${q.genome}:${q.gff3_input}" +#end for +#if str($json) != 'None' + #for $v in $json + --json "$v" + #end for +#end if +$sort +> "$output" +]]> + </command> + + <inputs> + <repeat name="queries" title="GFF3 dataset"> + <param name="gff3_input" type="data" format="gff3" label="GFF3 dataset" /> + <param name="genome" type="text" label="Genome name" help="Genome name without whitespaces or special characters"> + <validator type="empty_field" /> + </param> + </repeat> + <param name="json" type="data" format="json" multiple="true" optional="true" label="JSON datasets to merge" /> + <param name="sort" type="boolean" truevalue="-s" falsevalue="" label="Sort the keys in the JSON output" help="Ensure reproducibility, but can slow down the JSON creation for big files" /> + </inputs> + + <outputs> + <data name="output" format="json" label="${tool.name} on ${on_string}" /> + </outputs> + + <tests> + <test> + <param name="gff3_input" ftype="gff3" value="ENSCAFT00000026349.gff" /> + <param name="genome" ftype="text" value="canisfamiliaris" /> + + <param name="gff3_input" ftype="gff3" value="ENSMUST00000005671.gff" /> + <param name="genome" ftype="text" value="musmusculus" /> + + <param name="gff3_input" ftype="gff3" value="ENSMUST00000091291.gff" /> + <param name="genome" ftype="text" value="musmusculus" /> + + <param name="gff3_input" ftype="gff3" value="ENSPTRT00000013802.gff" /> + <param name="genome" ftype="text" value="pantroglodytes" /> + + <param name="gff3_input" ftype="gff3" value="ENSRNOT00000019267.gff" /> + <param name="genome" value="rattusnorvegicus" /> + + <param name="sort" value="-s" /> + <output name="output" file="test.json" ftype="json" /> + </test> + <test> + <param name="gff3_input" ftype="gff3" value="Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.gff3" /> + <param name="genome" value="caenorhabditiselegans" /> + <param name="sort" value="-s" /> + <output name="output" file="Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.json" ftype="json" /> + </test> + </tests> + <help> +<![CDATA[ +**What it does** + +Simple tool to convert a set of GFF3 datasets into JSON format and to merge with other JSON files. + +Example GFF3 file:: + + scaffold_0 MYZPE13164_Clone_G006_v1.0 gene 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030;biotype=protein_coding + scaffold_0 MYZPE13164_Clone_G006_v1.0 mRNA 44968 69413 . - . ID=MYZPE13164_G006_v1.0_000000030.1;Parent=MYZPE13164_G006_v1.0_000000030;Name=MYZPE13164_G006_v1.0_000000030.1;biotype=protein_coding;_AED=0.31 + scaffold_0 MYZPE13164_Clone_G006_v1.0 three_prime_utr 44968 46637 . - . ID=MYZPE13164_G006_v1.0_000000030.1.3utr1;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 exon 44968 47432 . - . ID=MYZPE13164_G006_v1.0_000000030.1.exon1;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 CDS 46638 47432 . - 0 ID=MYZPE13164_G006_v1.0_000000030.1.cds1;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 exon 53325 53539 . - . ID=MYZPE13164_G006_v1.0_000000030.1.exon2;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 CDS 53325 53539 . - 2 ID=MYZPE13164_G006_v1.0_000000030.1.cds2;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 exon 54614 54719 . - . ID=MYZPE13164_G006_v1.0_000000030.1.exon3;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 CDS 54614 54719 . - 0 ID=MYZPE13164_G006_v1.0_000000030.1.cds3;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 CDS 54852 55106 . - 0 ID=MYZPE13164_G006_v1.0_000000030.1.cds4;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 exon 54852 55117 . - . ID=MYZPE13164_G006_v1.0_000000030.1.exon4;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 five_prime_utr 55107 55117 . - . ID=MYZPE13164_G006_v1.0_000000030.1.5utr1;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 five_prime_utr 68851 69413 . - . ID=MYZPE13164_G006_v1.0_000000030.1.5utr2;Parent=MYZPE13164_G006_v1.0_000000030.1 + scaffold_0 MYZPE13164_Clone_G006_v1.0 exon 68851 69413 . - . ID=MYZPE13164_G006_v1.0_000000030.1.exon5;Parent=MYZPE13164_G006_v1.0_000000030.1 + +Warning: **Gene**, **mRNA** and **exon** features are mandatory, UTR and CDS are optional. Also, **ID** and **Parent** tags are needed to create relations. +]]> + </help> + <citations> + </citations> +</tool> |
b |
diff -r 000000000000 -r be6cec883b02 test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.gff3 Wed Dec 21 10:02:59 2016 -0500 |
[ |
@@ -0,0 +1,41 @@ +##gff-version 3 +#!genome-build WormBase WBcel235 +#!genome-version WBcel235 +#!genome-date 2012-12 +#!genome-build-accession NCBI:GCA_000002985.3 +#!genebuild-last-updated 2014-10 +I WormBase chromosome 1 15072434 . . . ID=chromosome:I;Alias=BX284601.5,NC_003279.8 +I WormBase gene 10413 16842 . + . ID=gene:WBGene00022276;Name=nlp-40;biotype=protein_coding;description=Neuropeptide-Like Protein [Source:RefSeq peptide%3BAcc:NP_001293206];gene_id=WBGene00022276;logic_name=wormbase +I WormBase gene 17483 26781 . - . ID=gene:WBGene00022278;Name=rcor-1;biotype=protein_coding;description=RCOR (REST CO-Repressor) homolog [Source:RefSeq peptide%3BAcc:NP_001293207];gene_id=WBGene00022278;logic_name=wormbase +I WormBase miRNA_gene 6054951 6055002 . - . ID=gene:WBGene00219261;Name=mir-4926;biotype=miRNA;gene_id=WBGene00219261;logic_name=wormbase_non_coding +I WormBase mRNA 10413 16842 . + . ID=transcript:Y74C9A.2a.2;Name=Y74C9A.2a.2;Parent=gene:WBGene00022276;biotype=protein_coding;transcript_id=Y74C9A.2a.2 +I WormBase mRNA 17483 24796 . - . ID=transcript:Y74C9A.4d;Name=Y74C9A.4d;Parent=gene:WBGene00022278;biotype=protein_coding;transcript_id=Y74C9A.4d +I WormBase transcript 6054951 6055002 . - . ID=transcript:C27A12.13;Name=C27A12.13;Parent=gene:WBGene00219261;biotype=pre_miRNA;transcript_id=C27A12.13 +I WormBase miRNA 6054982 6055002 . - . ID=transcript:C27A12.13a;Name=C27A12.13a;Parent=gene:WBGene00219261;biotype=miRNA;transcript_id=C27A12.13a +I WormBase exon 10413 10585 . + . Name=Y74C9A.2a.2.e1;Parent=transcript:Y74C9A.2a.2;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Y74C9A.2a.2.e1;rank=1 +I WormBase exon 11618 11689 . + . Name=Y74C9A.2a.1.e1;Parent=transcript:Y74C9A.2a.2;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Y74C9A.2a.1.e1;rank=2 +I WormBase exon 14951 15160 . + . Name=Y74C9A.2a.1.e2;Parent=transcript:Y74C9A.2a.2;constitutive=0;ensembl_end_phase=1;ensembl_phase=1;exon_id=Y74C9A.2a.1.e2;rank=3 +I WormBase exon 16473 16842 . + . Name=Y74C9A.2a.1.e3;Parent=transcript:Y74C9A.2a.2;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Y74C9A.2a.1.e3;rank=4 +I WormBase exon 24651 24796 . - . Name=Y74C9A.4d.e1;Parent=transcript:Y74C9A.4d;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Y74C9A.4d.e1;rank=1 +I WormBase exon 21013 21136 . - . Name=Y74C9A.4d.e2;Parent=transcript:Y74C9A.4d;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Y74C9A.4d.e2;rank=2 +I WormBase exon 20848 20964 . - . Name=Y74C9A.4a.e8;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Y74C9A.4a.e8;rank=3 +I WormBase exon 20271 20478 . - . Name=Y74C9A.4a.e9;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=2;ensembl_phase=1;exon_id=Y74C9A.4a.e9;rank=4 +I WormBase exon 19015 19241 . - . Name=Y74C9A.4a.e10;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Y74C9A.4a.e10;rank=5 +I WormBase exon 18006 18115 . - . Name=Y74C9A.4a.e11;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Y74C9A.4a.e11;rank=6 +I WormBase exon 17483 17958 . - . Name=Y74C9A.4a.e12;Parent=transcript:Y74C9A.4d;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Y74C9A.4a.e12;rank=7 +I WormBase exon 6054982 6055002 . - . Name=C27A12.13a.e1;Parent=transcript:C27A12.13a;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=C27A12.13a.e1;rank=1 +I WormBase CDS 11641 11689 . + 0 ID=CDS:Y74C9A.2a.2;Parent=transcript:Y74C9A.2a.2;protein_id=Y74C9A.2a.2 +I WormBase CDS 14951 15160 . + 2 ID=CDS:Y74C9A.2a.2;Parent=transcript:Y74C9A.2a.2;protein_id=Y74C9A.2a.2 +I WormBase CDS 16473 16585 . + 2 ID=CDS:Y74C9A.2a.2;Parent=transcript:Y74C9A.2a.2;protein_id=Y74C9A.2a.2 +I WormBase CDS 21013 21127 . - 0 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d +I WormBase CDS 20848 20964 . - 2 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d +I WormBase CDS 20271 20478 . - 2 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d +I WormBase CDS 19015 19241 . - 1 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d +I WormBase CDS 18006 18115 . - 2 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d +I WormBase CDS 17911 17958 . - 0 ID=CDS:Y74C9A.4d;Parent=transcript:Y74C9A.4d;protein_id=Y74C9A.4d +I WormBase five_prime_UTR 10413 10585 . + . Parent=transcript:Y74C9A.2a.2 +I WormBase five_prime_UTR 11618 11640 . + . Parent=transcript:Y74C9A.2a.2 +I WormBase three_prime_UTR 16586 16842 . + . Parent=transcript:Y74C9A.2a.2 +I WormBase five_prime_UTR 24651 24796 . - . Parent=transcript:Y74C9A.4d +I WormBase five_prime_UTR 21128 21136 . - . Parent=transcript:Y74C9A.4d +I WormBase three_prime_UTR 17483 17910 . - . Parent=transcript:Y74C9A.4d |
b |
diff -r 000000000000 -r be6cec883b02 test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Caenorhabditis_elegans.WBcel235.33.chromosome.I_shortened.json Wed Dec 21 10:02:59 2016 -0500 |
[ |
b'@@ -0,0 +1,343 @@\n+{\n+ "gene:WBGene00022276": {\n+ "Name": "nlp-40", \n+ "Transcript": [\n+ {\n+ "Exon": [\n+ {\n+ "Name": "Y74C9A.2a.2.e1", \n+ "Parent": "transcript:Y74C9A.2a.2", \n+ "constitutive": "0", \n+ "end": 10585, \n+ "ensembl_end_phase": "-1", \n+ "ensembl_phase": "-1", \n+ "exon_id": "Y74C9A.2a.2.e1", \n+ "id": "Y74C9A.2a.2.e1", \n+ "length": 173, \n+ "object_type": "Exon", \n+ "rank": "1", \n+ "seq_region_name": "I", \n+ "species": "caenorhabditiselegans", \n+ "start": 10413, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "Y74C9A.2a.1.e1", \n+ "Parent": "transcript:Y74C9A.2a.2", \n+ "constitutive": "0", \n+ "end": 11689, \n+ "ensembl_end_phase": "1", \n+ "ensembl_phase": "-1", \n+ "exon_id": "Y74C9A.2a.1.e1", \n+ "id": "Y74C9A.2a.1.e1", \n+ "length": 72, \n+ "object_type": "Exon", \n+ "rank": "2", \n+ "seq_region_name": "I", \n+ "species": "caenorhabditiselegans", \n+ "start": 11618, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "Y74C9A.2a.1.e2", \n+ "Parent": "transcript:Y74C9A.2a.2", \n+ "constitutive": "0", \n+ "end": 15160, \n+ "ensembl_end_phase": "1", \n+ "ensembl_phase": "1", \n+ "exon_id": "Y74C9A.2a.1.e2", \n+ "id": "Y74C9A.2a.1.e2", \n+ "length": 210, \n+ "object_type": "Exon", \n+ "rank": "3", \n+ "seq_region_name": "I", \n+ "species": "caenorhabditiselegans", \n+ "start": 14951, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "Y74C9A.2a.1.e3", \n+ "Parent": "transcript:Y74C9A.2a.2", \n+ "constitutive": "0", \n+ "end": 16842, \n+ "ensembl_end_phase": "-1", \n+ "ensembl_phase": "1", \n+ "exon_id": "Y74C9A.2a.1.e3", \n+ "id": "Y74C9A.2a.1.e3", \n+ "length": 370, \n+ "object_type": "Exon", \n+ "rank": "4", \n+ "seq_region_name": "I", \n+ "species": "caenorhabditiselegans", \n+ "start": 16473, \n+ "strand": 1\n+ }\n+ ], \n+ "Name": "Y74C9A.2a.2", \n+ "Parent": "gene:WBGene00022276", \n+ "Translation": {\n+ "CDS": [\n+ {\n+ "Parent": "transcript:Y74C9A.2a.2", \n+ "end": 11689, \n+ "id": "CDS:Y74C9A.2a.2", \n+ "protein_id": "Y74C9A.2a.2", \n+ "start": 11641, \n+ "strand": 1\n+ }, \n+ {\n+ "Parent": "transcript:Y74C9A.2a.2", \n+ "end": 15160, \n+ "id": "CDS:Y74C9A.2a.2", \n+ "protein_id": "Y74C9A.2a.2", \n+ "start": 14951, \n+ "strand": 1\n+ }, \n+ {\n+ "Parent": "transcript:Y74C9A.2a.2", \n+ "end": 16585, \n+ "id": "CDS:Y74C9A.2a.2", \n+ "protein_id": "Y74C9A.2a.2", \n+ "start": 16473, \n+ "strand": 1\n+ }\n+ ], \n+ "end": 16585, \n+ "id": "C'..b'124, \n+ "object_type": "Exon", \n+ "rank": "2", \n+ "seq_region_name": "I", \n+ "species": "caenorhabditiselegans", \n+ "start": 21013, \n+ "strand": -1\n+ }, \n+ {\n+ "Name": "Y74C9A.4d.e1", \n+ "Parent": "transcript:Y74C9A.4d", \n+ "constitutive": "0", \n+ "end": 24796, \n+ "ensembl_end_phase": "-1", \n+ "ensembl_phase": "-1", \n+ "exon_id": "Y74C9A.4d.e1", \n+ "id": "Y74C9A.4d.e1", \n+ "length": 146, \n+ "object_type": "Exon", \n+ "rank": "1", \n+ "seq_region_name": "I", \n+ "species": "caenorhabditiselegans", \n+ "start": 24651, \n+ "strand": -1\n+ }\n+ ], \n+ "Name": "Y74C9A.4d", \n+ "Parent": "gene:WBGene00022278", \n+ "Translation": {\n+ "CDS": [\n+ {\n+ "Parent": "transcript:Y74C9A.4d", \n+ "end": 17958, \n+ "id": "CDS:Y74C9A.4d", \n+ "protein_id": "Y74C9A.4d", \n+ "start": 17911, \n+ "strand": -1\n+ }, \n+ {\n+ "Parent": "transcript:Y74C9A.4d", \n+ "end": 18115, \n+ "id": "CDS:Y74C9A.4d", \n+ "protein_id": "Y74C9A.4d", \n+ "start": 18006, \n+ "strand": -1\n+ }, \n+ {\n+ "Parent": "transcript:Y74C9A.4d", \n+ "end": 19241, \n+ "id": "CDS:Y74C9A.4d", \n+ "protein_id": "Y74C9A.4d", \n+ "start": 19015, \n+ "strand": -1\n+ }, \n+ {\n+ "Parent": "transcript:Y74C9A.4d", \n+ "end": 20478, \n+ "id": "CDS:Y74C9A.4d", \n+ "protein_id": "Y74C9A.4d", \n+ "start": 20271, \n+ "strand": -1\n+ }, \n+ {\n+ "Parent": "transcript:Y74C9A.4d", \n+ "end": 20964, \n+ "id": "CDS:Y74C9A.4d", \n+ "protein_id": "Y74C9A.4d", \n+ "start": 20848, \n+ "strand": -1\n+ }, \n+ {\n+ "Parent": "transcript:Y74C9A.4d", \n+ "end": 21127, \n+ "id": "CDS:Y74C9A.4d", \n+ "protein_id": "Y74C9A.4d", \n+ "start": 21013, \n+ "strand": -1\n+ }\n+ ], \n+ "end": 21127, \n+ "id": "CDS:Y74C9A.4d", \n+ "object_type": "Translation", \n+ "species": "caenorhabditiselegans", \n+ "start": 17911\n+ }, \n+ "biotype": "protein_coding", \n+ "end": 24796, \n+ "id": "transcript:Y74C9A.4d", \n+ "object_type": "Transcript", \n+ "seq_region_name": "I", \n+ "species": "caenorhabditiselegans", \n+ "start": 17483, \n+ "strand": -1, \n+ "transcript_id": "Y74C9A.4d"\n+ }\n+ ], \n+ "biotype": "protein_coding", \n+ "description": "RCOR (REST CO-Repressor) homolog [Source:RefSeq peptide%3BAcc:NP_001293207]", \n+ "end": 26781, \n+ "gene_id": "WBGene00022278", \n+ "id": "gene:WBGene00022278", \n+ "logic_name": "wormbase", \n+ "member_id": 1, \n+ "object_type": "Gene", \n+ "seq_region_name": "I", \n+ "species": "caenorhabditiselegans", \n+ "start": 17483, \n+ "strand": -1\n+ }\n+}\n' |
b |
diff -r 000000000000 -r be6cec883b02 test-data/ENSCAFT00000026349.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ENSCAFT00000026349.gff Wed Dec 21 10:02:59 2016 -0500 |
b |
@@ -0,0 +1,46 @@ +7 Ensembl gene 41161397 41176758 . + . ID=ENSCAFG00000024151;Name=ENSCAFG00000024151;biotype=protein_coding +7 Ensembl transcript 41161397 41176758 . + . ID=ENSCAFT00000026349;Name=ENSCAFT00000026349;Parent=ENSCAFG00000024151;biotype=protein_coding +7 Ensembl exon 41161397 41161537 . + . Name=ENSCAFE00000180286;Parent=ENSCAFT00000026349 +7 Ensembl exon 41164527 41165078 . + 1 Name=ENSCAFE00000180288;Parent=ENSCAFT00000026349 +7 Ensembl exon 41166549 41166852 . + 1 Name=ENSCAFE00000180303;Parent=ENSCAFT00000026349 +7 Ensembl exon 41166945 41167087 . + 2 Name=ENSCAFE00000180319;Parent=ENSCAFT00000026349 +7 Ensembl exon 41167308 41167452 . + 1 Name=ENSCAFE00000180337;Parent=ENSCAFT00000026349 +7 Ensembl exon 41168280 41168494 . + 2 Name=ENSCAFE00000180353;Parent=ENSCAFT00000026349 +7 Ensembl exon 41168736 41168862 . + 1 Name=ENSCAFE00000180364;Parent=ENSCAFT00000026349 +7 Ensembl exon 41170224 41170462 . + 2 Name=ENSCAFE00000180374;Parent=ENSCAFT00000026349 +7 Ensembl exon 41170839 41171006 . + 1 Name=ENSCAFE00000180385;Parent=ENSCAFT00000026349 +7 Ensembl exon 41171100 41171295 . + 1 Name=ENSCAFE00000180394;Parent=ENSCAFT00000026349 +7 Ensembl exon 41171404 41171445 . + 2 Name=ENSCAFE00000180397;Parent=ENSCAFT00000026349 +7 Ensembl exon 41171521 41171741 . + 2 Name=ENSCAFE00000180399;Parent=ENSCAFT00000026349 +7 Ensembl exon 41171957 41172093 . + 1 Name=ENSCAFE00000180402;Parent=ENSCAFT00000026349 +7 Ensembl exon 41172179 41172341 . + 0 Name=ENSCAFE00000180416;Parent=ENSCAFT00000026349 +7 Ensembl exon 41172550 41172655 . + 1 Name=ENSCAFE00000180432;Parent=ENSCAFT00000026349 +7 Ensembl exon 41173257 41173309 . + 2 Name=ENSCAFE00000180442;Parent=ENSCAFT00000026349 +7 Ensembl exon 41173468 41173697 . + 1 Name=ENSCAFE00000180448;Parent=ENSCAFT00000026349 +7 Ensembl exon 41174196 41174306 . + 0 Name=ENSCAFE00000180452;Parent=ENSCAFT00000026349 +7 Ensembl exon 41174442 41174601 . + 0 Name=ENSCAFE00000180460;Parent=ENSCAFT00000026349 +7 Ensembl exon 41175094 41175223 . + 1 Name=ENSCAFE00000180475;Parent=ENSCAFT00000026349 +7 Ensembl exon 41175346 41175480 . + 2 Name=ENSCAFE00000180481;Parent=ENSCAFT00000026349 +7 Ensembl exon 41175700 41176758 . + 2 Name=ENSCAFE00000309098;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41161397 41161537 . + . Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41164527 41165078 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41166549 41166852 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41166945 41167087 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41167308 41167452 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41168280 41168494 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41168736 41168862 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41170224 41170462 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41170839 41171006 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41171100 41171295 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41171404 41171445 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41171521 41171741 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41171957 41172093 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41172179 41172341 . + 0 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41172550 41172655 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41173257 41173309 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41173468 41173697 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41174196 41174306 . + 0 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41174442 41174601 . + 0 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41175094 41175223 . + 1 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41175346 41175480 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 +7 Ensembl CDS 41175700 41176758 . + 2 Name=ENSCAFP00000024471;Parent=ENSCAFT00000026349 |
b |
diff -r 000000000000 -r be6cec883b02 test-data/ENSMUST00000005671.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ENSMUST00000005671.gff Wed Dec 21 10:02:59 2016 -0500 |
b |
@@ -0,0 +1,44 @@ +7 Ensembl gene 67952859 68226780 . + . ID=ENSMUSG00000005533;Name=ENSMUSG00000005533;biotype=protein_coding +7 Ensembl transcript 67952859 68226780 . + . ID=ENSMUST00000005671;Name=ENSMUST00000005671;Parent=ENSMUSG00000005533;biotype=protein_coding +7 Ensembl exon 67952859 67952952 . + 0 Name=ENSMUSE00000261783;Parent=ENSMUST00000005671 +7 Ensembl exon 68003810 68004355 . + 1 Name=ENSMUSE00000261773;Parent=ENSMUST00000005671 +7 Ensembl exon 68164993 68165305 . + 1 Name=ENSMUSE00000261767;Parent=ENSMUST00000005671 +7 Ensembl exon 68169897 68170048 . + 2 Name=ENSMUSE00000261754;Parent=ENSMUST00000005671 +7 Ensembl exon 68173231 68173375 . + 1 Name=ENSMUSE00000261748;Parent=ENSMUST00000005671 +7 Ensembl exon 68183344 68183558 . + 2 Name=ENSMUSE00000261743;Parent=ENSMUST00000005671 +7 Ensembl exon 68184731 68184857 . + 1 Name=ENSMUSE00000261736;Parent=ENSMUST00000005671 +7 Ensembl exon 68186990 68187228 . + 2 Name=ENSMUSE00000261728;Parent=ENSMUST00000005671 +7 Ensembl exon 68189573 68189740 . + 1 Name=ENSMUSE00000261725;Parent=ENSMUST00000005671 +7 Ensembl exon 68189946 68190150 . + 1 Name=ENSMUSE00000261719;Parent=ENSMUST00000005671 +7 Ensembl exon 68193347 68193630 . + 2 Name=ENSMUSE00000261714;Parent=ENSMUST00000005671 +7 Ensembl exon 68194969 68195105 . + 1 Name=ENSMUSE00000200356;Parent=ENSMUST00000005671 +7 Ensembl exon 68195590 68195749 . + 0 Name=ENSMUSE00000200354;Parent=ENSMUST00000005671 +7 Ensembl exon 68201244 68201346 . + 1 Name=ENSMUSE00000200353;Parent=ENSMUST00000005671 +7 Ensembl exon 68201902 68201972 . + 2 Name=ENSMUSE00000200349;Parent=ENSMUST00000005671 +7 Ensembl exon 68207251 68207480 . + 1 Name=ENSMUSE00000200360;Parent=ENSMUST00000005671 +7 Ensembl exon 68207763 68207873 . + 0 Name=ENSMUSE00000530700;Parent=ENSMUST00000005671 +7 Ensembl exon 68211994 68212156 . + 0 Name=ENSMUSE00000261691;Parent=ENSMUST00000005671 +7 Ensembl exon 68214919 68215048 . + 1 Name=ENSMUSE00000530699;Parent=ENSMUST00000005671 +7 Ensembl exon 68218402 68218536 . + 2 Name=ENSMUSE00000200350;Parent=ENSMUST00000005671 +7 Ensembl exon 68226020 68226780 . + 2 Name=ENSMUSE00000331286;Parent=ENSMUST00000005671 +7 Ensembl CDS 67952859 67952952 . + 0 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68003810 68004355 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68164993 68165305 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68169897 68170048 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68173231 68173375 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68183344 68183558 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68184731 68184857 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68186990 68187228 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68189573 68189740 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68189946 68190150 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68193347 68193630 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68194969 68195105 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68195590 68195749 . + 0 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68201244 68201346 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68201902 68201972 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68207251 68207480 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68207763 68207873 . + 0 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68211994 68212156 . + 0 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68214919 68215048 . + 1 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68218402 68218536 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 +7 Ensembl CDS 68226020 68226780 . + 2 Name=ENSMUSP00000005671;Parent=ENSMUST00000005671 |
b |
diff -r 000000000000 -r be6cec883b02 test-data/ENSMUST00000091291.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ENSMUST00000091291.gff Wed Dec 21 10:02:59 2016 -0500 |
b |
@@ -0,0 +1,44 @@ +8 Ensembl gene 3150922 3279617 . - . ID=ENSMUSG00000005534;Name=ENSMUSG00000005534;biotype=protein_coding +8 Ensembl transcript 3150922 3279617 . - . ID=ENSMUST00000091291;Name=ENSMUST00000091291;Parent=ENSMUSG00000005534;biotype=protein_coding +8 Ensembl exon 3279029 3279617 . - 1 Name=ENSMUSE00000771349;Parent=ENSMUST00000091291 +8 Ensembl exon 3258383 3258934 . - 1 Name=ENSMUSE00001230539;Parent=ENSMUST00000091291 +8 Ensembl exon 3211379 3211700 . - 2 Name=ENSMUSE00000611294;Parent=ENSMUST00000091291 +8 Ensembl exon 3204630 3204778 . - 1 Name=ENSMUSE00000611293;Parent=ENSMUST00000091291 +8 Ensembl exon 3202890 3203034 . - 2 Name=ENSMUSE00000611267;Parent=ENSMUST00000091291 +8 Ensembl exon 3198061 3198275 . - 1 Name=ENSMUSE00000638453;Parent=ENSMUST00000091291 +8 Ensembl exon 3194795 3194921 . - 2 Name=ENSMUSE00000611287;Parent=ENSMUST00000091291 +8 Ensembl exon 3192546 3192802 . - 1 Name=ENSMUSE00000611286;Parent=ENSMUST00000091291 +8 Ensembl exon 3189125 3189292 . - 1 Name=ENSMUSE00000611285;Parent=ENSMUST00000091291 +8 Ensembl exon 3184951 3185152 . - 2 Name=ENSMUSE00000611282;Parent=ENSMUST00000091291 +8 Ensembl exon 3174614 3174888 . - 1 Name=ENSMUSE00000233977;Parent=ENSMUST00000091291 +8 Ensembl exon 3173480 3173619 . - 0 Name=ENSMUSE00000233970;Parent=ENSMUST00000091291 +8 Ensembl exon 3169709 3169868 . - 1 Name=ENSMUSE00000611280;Parent=ENSMUST00000091291 +8 Ensembl exon 3167502 3167604 . - 2 Name=ENSMUSE00000611279;Parent=ENSMUST00000091291 +8 Ensembl exon 3165518 3165585 . - 1 Name=ENSMUSE00000611278;Parent=ENSMUST00000091291 +8 Ensembl exon 3163237 3163481 . - 0 Name=ENSMUSE00000611277;Parent=ENSMUST00000091291 +8 Ensembl exon 3161681 3161791 . - 0 Name=ENSMUSE00000611276;Parent=ENSMUST00000091291 +8 Ensembl exon 3161339 3161498 . - 1 Name=ENSMUSE00000611274;Parent=ENSMUST00000091291 +8 Ensembl exon 3159453 3159582 . - 2 Name=ENSMUSE00000611273;Parent=ENSMUST00000091291 +8 Ensembl exon 3158696 3158830 . - 2 Name=ENSMUSE00000611272;Parent=ENSMUST00000091291 +8 Ensembl exon 3150922 3156023 . - . Name=ENSMUSE00000569243;Parent=ENSMUST00000091291 +8 Ensembl CDS 3279029 3279617 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3258383 3258934 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3211379 3211700 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3204630 3204778 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3202890 3203034 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3198061 3198275 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3194795 3194921 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3192546 3192802 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3189125 3189292 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3184951 3185152 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3174614 3174888 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3173480 3173619 . - 0 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3169709 3169868 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3167502 3167604 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3165518 3165585 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3163237 3163481 . - 0 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3161681 3161791 . - 0 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3161339 3161498 . - 1 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3159453 3159582 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3158696 3158830 . - 2 Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 +8 Ensembl CDS 3150922 3156023 . - . Name=ENSMUSP00000088837;Parent=ENSMUST00000091291 |
b |
diff -r 000000000000 -r be6cec883b02 test-data/ENSPTRT00000013802.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ENSPTRT00000013802.gff Wed Dec 21 10:02:59 2016 -0500 |
b |
@@ -0,0 +1,44 @@ +15 Ensembl gene 96156951 96470984 . + . ID=ENSPTRG00000007489;Name=ENSPTRG00000007489;biotype=protein_coding +15 Ensembl transcript 96156951 96470984 . + . ID=ENSPTRT00000013802;Name=ENSPTRT00000013802;Parent=ENSPTRG00000007489;biotype=protein_coding +15 Ensembl exon 96156951 96157076 . + . Name=ENSPTRE00000075393;Parent=ENSPTRT00000013802 +15 Ensembl exon 96216402 96216947 . + 1 Name=ENSPTRE00000075406;Parent=ENSPTRT00000013802 +15 Ensembl exon 96403452 96403764 . + 1 Name=ENSPTRE00000075404;Parent=ENSPTRT00000013802 +15 Ensembl exon 96408876 96409024 . + 2 Name=ENSPTRE00000075400;Parent=ENSPTRT00000013802 +15 Ensembl exon 96411580 96411724 . + 1 Name=ENSPTRE00000075399;Parent=ENSPTRT00000013802 +15 Ensembl exon 96420797 96421011 . + 2 Name=ENSPTRE00000075389;Parent=ENSPTRT00000013802 +15 Ensembl exon 96423413 96423539 . + 1 Name=ENSPTRE00000075398;Parent=ENSPTRT00000013802 +15 Ensembl exon 96425147 96425385 . + 2 Name=ENSPTRE00000340876;Parent=ENSPTRT00000013802 +15 Ensembl exon 96428065 96428232 . + 1 Name=ENSPTRE00000075396;Parent=ENSPTRT00000013802 +15 Ensembl exon 96428770 96428974 . + 1 Name=ENSPTRE00000075392;Parent=ENSPTRT00000013802 +15 Ensembl exon 96434239 96434522 . + 2 Name=ENSPTRE00000075387;Parent=ENSPTRT00000013802 +15 Ensembl exon 96435965 96436101 . + 1 Name=ENSPTRE00000075390;Parent=ENSPTRT00000013802 +15 Ensembl exon 96436614 96436773 . + 0 Name=ENSPTRE00000075386;Parent=ENSPTRT00000013802 +15 Ensembl exon 96441658 96441760 . + 1 Name=ENSPTRE00000075407;Parent=ENSPTRT00000013802 +15 Ensembl exon 96442337 96442407 . + 2 Name=ENSPTRE00000075405;Parent=ENSPTRT00000013802 +15 Ensembl exon 96446912 96447141 . + 1 Name=ENSPTRE00000075403;Parent=ENSPTRT00000013802 +15 Ensembl exon 96447405 96447515 . + 0 Name=ENSPTRE00000075397;Parent=ENSPTRT00000013802 +15 Ensembl exon 96451284 96451443 . + 0 Name=ENSPTRE00000075394;Parent=ENSPTRT00000013802 +15 Ensembl exon 96455007 96455136 . + 1 Name=ENSPTRE00000423172;Parent=ENSPTRT00000013802 +15 Ensembl exon 96460655 96460789 . + 2 Name=ENSPTRE00000075402;Parent=ENSPTRT00000013802 +15 Ensembl exon 96469783 96470984 . + 2 Name=ENSPTRE00000075401;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96156951 96157076 . + . Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96216402 96216947 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96403452 96403764 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96408876 96409024 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96411580 96411724 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96420797 96421011 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96423413 96423539 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96425147 96425385 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96428065 96428232 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96428770 96428974 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96434239 96434522 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96435965 96436101 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96436614 96436773 . + 0 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96441658 96441760 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96442337 96442407 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96446912 96447141 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96447405 96447515 . + 0 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96451284 96451443 . + 0 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96455007 96455136 . + 1 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96460655 96460789 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 +15 Ensembl CDS 96469783 96470984 . + 2 Name=ENSPTRP00000012792;Parent=ENSPTRT00000013802 |
b |
diff -r 000000000000 -r be6cec883b02 test-data/ENSRNOT00000019267.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ENSRNOT00000019267.gff Wed Dec 21 10:02:59 2016 -0500 |
b |
@@ -0,0 +1,44 @@ +1 Ensembl gene 128924966 129206516 . + . ID=ENSRNOG00000014187;Name=ENSRNOG00000014187;biotype=protein_coding +1 Ensembl transcript 128924966 129206516 . + . ID=ENSRNOT00000019267;Name=ENSRNOT00000019267;Parent=ENSRNOG00000014187;biotype=protein_coding +1 Ensembl exon 128924966 128925059 . + 0 Name=ENSRNOE00000137027;Parent=ENSRNOT00000019267 +1 Ensembl exon 128978104 128978649 . + 1 Name=ENSRNOE00000135455;Parent=ENSRNOT00000019267 +1 Ensembl exon 129142521 129142833 . + 1 Name=ENSRNOE00000135507;Parent=ENSRNOT00000019267 +1 Ensembl exon 129147060 129147211 . + 2 Name=ENSRNOE00000135552;Parent=ENSRNOT00000019267 +1 Ensembl exon 129149693 129149837 . + 1 Name=ENSRNOE00000135606;Parent=ENSRNOT00000019267 +1 Ensembl exon 129159112 129159326 . + 2 Name=ENSRNOE00000135651;Parent=ENSRNOT00000019267 +1 Ensembl exon 129161168 129161294 . + 1 Name=ENSRNOE00000135775;Parent=ENSRNOT00000019267 +1 Ensembl exon 129162727 129162965 . + 2 Name=ENSRNOE00000137123;Parent=ENSRNOT00000019267 +1 Ensembl exon 129166863 129167030 . + 1 Name=ENSRNOE00000135864;Parent=ENSRNOT00000019267 +1 Ensembl exon 129167227 129167431 . + 1 Name=ENSRNOE00000137165;Parent=ENSRNOT00000019267 +1 Ensembl exon 129172248 129172531 . + 2 Name=ENSRNOE00000137211;Parent=ENSRNOT00000019267 +1 Ensembl exon 129174080 129174216 . + 1 Name=ENSRNOE00000136003;Parent=ENSRNOT00000019267 +1 Ensembl exon 129174682 129174841 . + 0 Name=ENSRNOE00000136044;Parent=ENSRNOT00000019267 +1 Ensembl exon 129180666 129180769 . + 1 Name=ENSRNOE00000136080;Parent=ENSRNOT00000019267 +1 Ensembl exon 129180773 129180803 . + 0 Name=ENSRNOE00000520956;Parent=ENSRNOT00000019267 +1 Ensembl exon 129186970 129187229 . + 1 Name=ENSRNOE00000136171;Parent=ENSRNOT00000019267 +1 Ensembl exon 129187512 129187622 . + 0 Name=ENSRNOE00000362520;Parent=ENSRNOT00000019267 +1 Ensembl exon 129191992 129192151 . + 0 Name=ENSRNOE00000136274;Parent=ENSRNOT00000019267 +1 Ensembl exon 129195281 129195410 . + 1 Name=ENSRNOE00000329647;Parent=ENSRNOT00000019267 +1 Ensembl exon 129198768 129198902 . + 2 Name=ENSRNOE00000136353;Parent=ENSRNOT00000019267 +1 Ensembl exon 129206132 129206516 . + 2 Name=ENSRNOE00000137303;Parent=ENSRNOT00000019267 +1 Ensembl CDS 128924966 128925059 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 128978104 128978649 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129142521 129142833 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129147060 129147211 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129149693 129149837 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129159112 129159326 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129161168 129161294 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129162727 129162965 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129166863 129167030 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129167227 129167431 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129172248 129172531 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129174080 129174216 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129174682 129174841 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129180666 129180769 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129180773 129180803 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129186970 129187229 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129187512 129187622 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129191992 129192151 . + 0 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129195281 129195410 . + 1 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129198768 129198902 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 +1 Ensembl CDS 129206132 129206516 . + 2 Name=ENSRNOP00000019267;Parent=ENSRNOT00000019267 |
b |
diff -r 000000000000 -r be6cec883b02 test-data/test.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test.json Wed Dec 21 10:02:59 2016 -0500 |
[ |
b'@@ -0,0 +1,2307 @@\n+{\n+ "ENSCAFG00000024151": {\n+ "Name": "ENSCAFG00000024151", \n+ "Transcript": [\n+ {\n+ "Exon": [\n+ {\n+ "Name": "ENSCAFE00000180286", \n+ "Parent": "ENSCAFT00000026349", \n+ "end": 41161537, \n+ "id": "ENSCAFE00000180286", \n+ "length": 141, \n+ "object_type": "Exon", \n+ "seq_region_name": "7", \n+ "species": "canisfamiliaris", \n+ "start": 41161397, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSCAFE00000180288", \n+ "Parent": "ENSCAFT00000026349", \n+ "end": 41165078, \n+ "id": "ENSCAFE00000180288", \n+ "length": 552, \n+ "object_type": "Exon", \n+ "seq_region_name": "7", \n+ "species": "canisfamiliaris", \n+ "start": 41164527, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSCAFE00000180303", \n+ "Parent": "ENSCAFT00000026349", \n+ "end": 41166852, \n+ "id": "ENSCAFE00000180303", \n+ "length": 304, \n+ "object_type": "Exon", \n+ "seq_region_name": "7", \n+ "species": "canisfamiliaris", \n+ "start": 41166549, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSCAFE00000180319", \n+ "Parent": "ENSCAFT00000026349", \n+ "end": 41167087, \n+ "id": "ENSCAFE00000180319", \n+ "length": 143, \n+ "object_type": "Exon", \n+ "seq_region_name": "7", \n+ "species": "canisfamiliaris", \n+ "start": 41166945, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSCAFE00000180337", \n+ "Parent": "ENSCAFT00000026349", \n+ "end": 41167452, \n+ "id": "ENSCAFE00000180337", \n+ "length": 145, \n+ "object_type": "Exon", \n+ "seq_region_name": "7", \n+ "species": "canisfamiliaris", \n+ "start": 41167308, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSCAFE00000180353", \n+ "Parent": "ENSCAFT00000026349", \n+ "end": 41168494, \n+ "id": "ENSCAFE00000180353", \n+ "length": 215, \n+ "object_type": "Exon", \n+ "seq_region_name": "7", \n+ "species": "canisfamiliaris", \n+ "start": 41168280, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSCAFE00000180364", \n+ "Parent": "ENSCAFT00000026349", \n+ "end": 41168862, \n+ "id": "ENSCAFE00000180364", \n+ "length": 127, \n+ "object_type": "Exon", \n+ "seq_region_name": "7", \n+ "species": "canisfamiliaris", \n+ "start": 41168736, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSCAFE00000180374", \n+ "Parent": "ENSCAFT00000026349", \n+ "end": 41170462, \n+ "id": "ENSCAFE00000180374", \n+ "length": 239, \n+ "object_type": "Exon", \n+ "seq_region_name": "7", \n+ "species": "canisfamiliaris", \n+ "start": 41170224, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSCAFE00000180385", \n+ "Parent": "ENSCAFT00000026349"'..b'00019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129174216, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129174080, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSRNOP00000019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129174841, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129174682, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSRNOP00000019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129180769, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129180666, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSRNOP00000019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129180803, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129180773, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSRNOP00000019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129187229, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129186970, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSRNOP00000019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129187622, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129187512, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSRNOP00000019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129192151, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129191992, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSRNOP00000019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129195410, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129195281, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSRNOP00000019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129198902, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129198768, \n+ "strand": 1\n+ }, \n+ {\n+ "Name": "ENSRNOP00000019267", \n+ "Parent": "ENSRNOT00000019267", \n+ "end": 129206516, \n+ "id": "ENSRNOP00000019267", \n+ "start": 129206132, \n+ "strand": 1\n+ }\n+ ], \n+ "end": 129206516, \n+ "id": "ENSRNOP00000019267", \n+ "object_type": "Translation", \n+ "species": "rattusnorvegicus", \n+ "start": 128924966\n+ }, \n+ "biotype": "protein_coding", \n+ "end": 129206516, \n+ "id": "ENSRNOT00000019267", \n+ "object_type": "Transcript", \n+ "seq_region_name": "1", \n+ "species": "rattusnorvegicus", \n+ "start": 128924966, \n+ "strand": 1\n+ }\n+ ], \n+ "biotype": "protein_coding", \n+ "end": 129206516, \n+ "id": "ENSRNOG00000014187", \n+ "member_id": 0, \n+ "object_type": "Gene", \n+ "seq_region_name": "1", \n+ "species": "rattusnorvegicus", \n+ "start": 128924966, \n+ "strand": 1\n+ }\n+}\n' |