Repository 'treebest_best'
hg clone https://toolshed.g2.bx.psu.edu/repos/earlhaminst/treebest_best

Changeset 4:66170848da6c (2017-03-15)
Previous changeset 3:dd268de3a107 (2017-03-03) Next changeset 5:b78b529f66db (2018-10-14)
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 651fae48371f845578753052c6fe173e3bb35670
removed:
fasta_header_converter.py
fasta_header_converter.xml
test-data/out.fasta
test-data/test.fasta
test-data/test.json
b
diff -r dd268de3a107 -r 66170848da6c fasta_header_converter.py
--- a/fasta_header_converter.py Fri Mar 03 07:22:53 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,62 +0,0 @@
-from __future__ import print_function
-
-import collections
-import json
-import optparse
-import sys
-
-Sequence = collections.namedtuple('Sequence', ['header', 'sequence'])
-
-
-def FASTAReader_gen(fasta_filename):
-    with open(fasta_filename) as fasta_file:
-        line = fasta_file.readline()
-        while True:
-            if not line:
-                return
-            assert line.startswith('>'), "FASTA headers must start with >"
-            header = line.rstrip()
-            sequence_parts = []
-            line = fasta_file.readline()
-            while line and line[0] != '>':
-                sequence_parts.append(line.rstrip())
-                line = fasta_file.readline()
-            sequence = "\n".join(sequence_parts)
-            yield Sequence(header, sequence)
-
-
-def read_gene_info(gene_info):
-    transcript_species_dict = dict()
-    for gene_dict in gene_info.values():
-        for transcript in gene_dict['Transcript']:
-            transcript_species_dict[transcript['id']] = transcript['species'].replace("_", "")
-    return transcript_species_dict
-
-
-parser = optparse.OptionParser()
-parser.add_option('-j', '--json', dest="input_gene_filename",
-                  help='Gene feature information in JSON format')
-parser.add_option('-f', '--fasta', dest="input_fasta_filename",
-                  help='Sequences in FASTA format')
-parser.add_option('-o', '--output', dest="output_fasta_filename",
-                  help='Output FASTA file name')
-options, args = parser.parse_args()
-
-if options.input_gene_filename is None:
-    raise Exception('-j option must be specified')
-if options.input_fasta_filename is None:
-    raise Exception('-f option must be specified')
-if options.output_fasta_filename is None:
-    raise Exception('-o option must be specified')
-
-with open(options.input_gene_filename) as json_fh:
-    gene_info = json.load(json_fh)
-transcript_species_dict = read_gene_info(gene_info)
-
-with open(options.output_fasta_filename, 'w') as output_fasta_file:
-    for entry in FASTAReader_gen(options.input_fasta_filename):
-        name = entry.header[1:].lstrip()
-        if name not in transcript_species_dict:
-            print("Transcript '%s' not found in the gene feature information" % name, file=sys.stderr)
-            continue
-        output_fasta_file.write(">%s_%s\n%s\n" % (name, transcript_species_dict[name], entry.sequence))
b
diff -r dd268de3a107 -r 66170848da6c fasta_header_converter.xml
--- a/fasta_header_converter.xml Fri Mar 03 07:22:53 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,32 +0,0 @@
-<tool id="fasta_header_converter" name="FASTA header converter" version="0.1.1">
-    <description>to append species information</description>
-    <command detect_errors="exit_code">
-<![CDATA[
-python '$__tool_directory__/fasta_header_converter.py'
--f '$fastaFile'
--j '$genesFile'
--o '$outputFile'
-]]>
-    </command>
-    <inputs>
-        <param name="fastaFile" type="data" format="fasta" label="FASTA file" help="FASTA file with transcript ID as FASTA ID" />
-        <param name="genesFile" type="data" format="json" label="Gene feature information" help="In JSON format" />
-    </inputs>
-    <outputs>
-        <data format="fasta" name="outputFile" label="${tool.name} on ${on_string}" />
-    </outputs>
-    <tests>
-        <test>
-            <param name="fastaFile" ftype="nhx" value="test.fasta" />
-            <param name="genesFile" ftype="json" value="test.json" />
-            <output name="outputFile" file="out.fasta" />
-        </test>
-    </tests>
-    <help>
-    <![CDATA[
-Simple converter for FASTA files, which appends the species name to the FASTA header for usage in TreeBeST. It uses gene feature information in JSON format (similar to the result of Ensembl REST API - lookup/id).
-    ]]>
-    </help>
-    <citations>
-    </citations>
-</tool>
b
diff -r dd268de3a107 -r 66170848da6c test-data/out.fasta
--- a/test-data/out.fasta Fri Mar 03 07:22:53 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,11171 +0,0 @@\n->ENSLACT00000008884_latimeriachalumnae\n-ATGGAATGGGAAGCAGTTGAAAGTGTCAAAGCTTTAATGCGAGATGATGAGCTTACAGAT\n-GCAGGACTAGATGCAAGCAAAGACAGTTTGAACCGTGCATGCAGGAGACAGTCAGGTGGA\n-AATTTCCGAGCTAGGAAGCGGATGAGACTTGAACAAGTGTCGGCTGATGAGCCACCAGTT\n-AAGAGGCAACTCTTGGCTGAATTTGACAGGACGGTTGAAAATGGCCATAAGTCACTTCAG\n-AAGCCTTTAATATGCACTCCAAATGGTACGCTGAAAGACAGAAGGAAGTTTATGTATAGT\n-GTTCCCCTTAAGCCTGTAGTGTGTGGTCCATGGAGCAACAACTCCAAAACTGGGCAACAG\n-GTCACAAAACCCAGCATTACTTTGCCTGGCAGAGGAGTAGAAACATTTCAACCCAAGAAC\n-CACATTGCTCCCAGTCCAGTTTATGATCCACCTTCAAATAGACGAGGCCCAGTCTTTGCT\n-CCACCATTTCATGGGGCCACGTTTCGGGGACTGCAGAAACCAAGTGCTTCACACACATCA\n-AGCAAAACTGCTAAAACTTTTGTTCCTCCATTTAAAATGAAAGCCAGTGCCTCCCACACA\n-GTACATTTCAGTAGCAAAGTTATCAACACTTGTGAAAAGATTTTAGAAAATCTAGTCTAC\n-TTGAAGCCTAGTTTAGCCTCTTGTAATATTTTTCAAAGTTTGGAAGAGATGACTGCTAAC\n-CTTCAGTGTGCCAGAGACCTACAGGAAATGAGGCTGAGAAAGAAGCAAAGGCAAAATATC\n-CGGCCGCAGCCAGGCAGCCTTTACCTTGCTAAAACATCTGGTGTTGCTAGGGTCTCCCTG\n-AAGGCAGCTACAGGAAATCAGTGCCCATCCTCCTACTCCACAGAGCAGTTATATGTTCAT\n-GGTGTAGGAAAAAGCACCTTGAAAGTACGCAGTGAGAATGCAGAATCTTTTCAGTTCAGT\n-TGCAGTGATTATTTTGGTAAAGATGTTCTCCTAGCTGGAAATGGCTTGAAGCTGGCAGAT\n-GGAGGGTGGCTTATACCCAGTGATAAAGGAATGGTAGGAAAAGAAGAATTTTACAGGGCA\n-CTGTGCGATACACCTGGTGTGGCCCCAAAGCTTATTAGTGAATCTTGGGTCTACAATCAT\n-TACAGATGGATTGTATGGAAGCTTGCAGCGATGGAGGCAGCTTTTCCAAAAGAATTTGGC\n-AACCGGTGTCTAACACCAGAGAGGGTGCTGCTACAGCTTAAATACAGGTATGACATTGAA\n-GTTGACAAGTGTCGAAGATCTACTGTAAAGAAGATAATGGAAAGAGATGACACTGCCGCC\n-AAGACACTTGTGCTGTGCATTTCGAAACTTATATCCGTGGAGGACCGTTTTAAACAAACC\n-AAAAATAAGAATGAAAAGGGTGCTGAAGAAGCTAGGAAAGAAGCAGTAGCTGGAGTCATT\n-GAAACTACGGATGGGTGGTATGGGATTAAGGTACTCTTGGATCCTCCCCTTACAGTGCTG\n-GTACAGAGAGGAAGGCTGTCAGTTGGCTGTAAAATTATAACACATGGAGCTGAAATAATT\n-GGCTCCCAGGATGCCTGTACACCACTGGAAGCTCCGGAGTGTCTCATGCTAAAGATTTCT\n-GCGAACAGTACTCGACCTGCTTGCTGGAGTGCTAAACTTGGGTTTCATCGAGATCCCCGA\n-CCTTTTCCTCTCCCATTAGCATCGCTTTTTAATGATGGTGGATTAGTTGGCTGTGTTGAT\n-GTTGTTGTAGTGCGACTGTACCCCATACAGTGGATGGAAAAGAAATCAGATGGGATTTTT\n-GTGTTTCGTAATGATCGAGCAGAAGAAAGAGAGGCTCAAAGGCAAGTTGAGAATCAGCAA\n-AGAAAAATGGAAAGTTTGTTTGCAAAGATTCAAACTGAATTTGAACAGAAATATGAAGCC\n-AAAAGCAAAAGGAGAGGCCAAAAGGCACAGAAATTCAGCAAGCAGGAAATCCAGGCTCTT\n-CAAGATGGTGCAGAACTGAATGAAGCAATTGAGAATTCAATGGATCCGGGTTACTTTGAG\n-GCTTGCTTAAGAGAGGAGCAGTTAAAAGTCCTGCATGGCCACAGACAAATGTTAAATGAA\n-AAGAAACAAGCAGAGTTCCAGGCAGAGTTCAAGAAGGCACTGGAGTCTGCTGAACAGGAG\n-GGGAAGAGCTGCTGCAAGCGAGGTGTAACCCCTGTTTGGAAACTACGCATTGTGGACTAT\n-AGAAAACCAAGTGCTGCAGAATATATATTAAATATTTGGCGACCATTGGCTGATCTGCAT\n-TCCCTGTTAAAGGAAGGTAACCGGTACAGAATTTACCAGTTACTTGCATCACAATCCAAA\n-GGAAGAACCACTACTGCTGACATACAGCTAACAGCTACAAAGAAAACCCAGTATCAGCAG\n-TTCCAGTCTTTCCCAGAATTGATATCAGAGCTGTACAGTCCAAGAAAAGCTGTTAAATTC\n-AACATGTTGATGGATCCAACTTTCCGACCAGCCTATGCAGAAGTAGACCTGGTAGGATAT\n-ACCATCTCTATAGAAGGAAAGCCAGGTGTTGCTCCAGTGGTGTACCTATCTGATGAAAGT\n-CATAACTTTGTGGCAATAAAAGTTTGGACTGCCCTAAATCAGCTTGCTGTTGAAGACATT\n-GTGAAGCCATTTTCACTGATTGCTGCAAGTAACTTACAGTGGAGATCAGACAGCAGATCA\n-ATAATTCCTATGTTGTATGCTGGGGACCTTTCAATATTTTCATCAAATCCAAAAGAAGGG\n-CATCTTCAAGAGGCTTTTAATCAAAGAAGGACTGCTATACAAGAGAACATTTCTGGTACA\n-TACCTCCCCCCAGAAAAAAAAAATCTGCATCAGGAGTCTTATAAATCATGTCAATACAAC\n-ACATTGAATGTTTTGATGAATGGAAACATACACACACAGAGCCCAGTGCTGTCCAGGGTT\n-CATATGGGTACATCCTGTGCTTTTCTCTTTCTACTGCCTTCACCCTACCCTGAAAGTAAA\n-CACACTAGTCCTTTGATAACTATGAAGGCAGGAGTCAAATCTATGACTTTCCCAGGCTCT\n-GCAAAACTAATGCCACAAGCAAGTGAAAATCAAGAACTGGATACTCCCAAGAATCGTAAA\n-AAGAAGGCAGCTTTAGACTATCTTTGCCGCATTCCTTCCCCACCTGCACTTACTCCTATT\n-CGCAGTTTTGTGTCTTCCTCCTTGCAAAAGGCTTTTCACCCACCAAGGAGCTGCGTCAAA\n-CTACAAAGCGGCGAAAACCCAGTCGTCCCCACAGTTGGCAATAACGCTGTCCTAGGGATT\n-CAGTCAAAAAAAGATGAAGGGCCTGCTGCTTTTAATGAAGAGGATTCAGTAGCAGACGAG\n-GAACTAGCAATGATCAATACACAGGCATTCTTAGTCGGCTTAAGAAGGGACAAAAGACCA\n-AGTTTACTGGACAAAACTGCCAGTTTAAAGGGGCATGTTCCCTCAGAAAGATTTCTTGAA\n-GAGAAGCTCTTGTCAGTACTAAAAGAGCAGGCAAGCTCTAATTCTGAGAGAAACGCCACA\n-TCATTGGAAAATAAGAGCTGTGATAAAAGCAGGACATGTGTGAAACCATGTGAACATTCT\n-AATGACAGCATTGCAGAGGAAACTTCAGAAATCATCCCAGGCTGTCATGGTGGAGAATCT\n-GCTGTGGAAAACCAAAGTAAAAATTCCTCATTGTGCCACAAAAAACTGCAACAGAAGAAA\n-AGACGGAAGTATTACTAA\n->ENSXETT00000064180_xenopustropicalis\n-ATGGCTGCACCGCAACTTGGAAAATCTGTCTTCTA'..b'ATTCATGGTGCTCAGCTAGTCGGTTCACAGGATGCTTGTTCTCCTTTGGAGGCCCCT\n-GAGTCTATCATGCTAAAGATTTTTGCCAACAGCAGCAGGCGAGCACGATGGGATGCTAAA\n-CTGGGATTTTATAGGGACCCACGGCCATTCCTGCTCCCTGTCTCTTCTTTGTACAACAGT\n-GGGGGACCTGTAGGATGTGTGGATATTATTATATTAAGAAGCTATCCCACATTATGGATG\n-GAGAGAAAACCAGAAGGAGGCACTGTGTTCCGGTCAGGCCGAGCAGAAGAAAAGGAGGCT\n-AGACGGTACAACGTCCACAAGGAAAAAGCTATGGAGATTCTGTTTGACAAGATTCAAGCG\n-GAATTTGAAAAGGAAGAGAGGGATAACAGGAAACCTCGGAGCAGAAGACGGACAATCGGT\n-GATCAAGATATCAAAAGTCTTCAAGATGGAGAGGAGCTGTACGAAGCAGTGGGCGATGAC\n-CCAGCTTACCTTGAGGCACATTTGACTGAGCAGCAGGCAGAGACTCTACAGAACTACAAA\n-CGTCTGCTGATAGAAAAGAAGCAAGCAGAGCTGCAGGATCGCTACCGGCGAGCTGTAGAA\n-ACTGCAGAGGATGGCACAGGCAGCTGTCCCAAGCGAGATGTAGCACCTGTATGGAGACTC\n-AGCATTGCTGACTTCATGGAAAAGCCAGGCAGTGTTTACCAGCTGAACATTTGGCGGCCT\n-CCCTCAGAGCTCCAGTCTTTACTAAAAGAAGGCTGTCGATATAAGGTGTATAATCTCACC\n-ACAACAGATTCAAAGAAACAAGGTGGAAACACAACCGTTCAGCTAAGTGGAACAAAAAAA\n-ACACAATTTGAGGACCTTCAGGCATCCGAGGAATTGTTGTCAACATATTTTCAGCCAAGG\n-GTCTCGGCCACATTCATCGATCTCCAAGATCCAGAATTCCATTCGTTGTGTGGTGAGGTT\n-GATCTCACAGGATACGTCATCAGTATAATAGATGGACAAGGTTTCTCACCTGCTTTTTAC\n-CTAACTGATGGGAAACAAAATTTTGTAAAAGTGCGTTGTTTCAGCAGCTTCGCTCAGTCA\n-GGCTTGGAAGATGTAATAAAGCCAAGTGTCCTTTTAGCTTTAAGCAACCTCCAACTGAGA\n-GGTCAGGCAACATCACCCACTCCAGTCTTGTACGCTGGAGATCTAACCGTCTTCTCCACA\n-AACCCCAAAGAAGTTCATCTGCAGGAATCCTTCAGCCAGCTCAAAACCCTGGTTCAG\n->ENSTRUT00000015099_takifugurubripes\n-CAGCTGGCACGGGATATGCAGGATATGCGAATCAGAAAAAAGAAACGCCAGACCATTCGT\n-CCATTACCGGGAAGTTTGTTTCAGAAGAAGTCCTCTGGAGTCGCCAGGATTCCATTTAAA\n-GCTGCAGTAAACGGAAAGCCACCTGCACGCTACACTGCCAAACCGCTGTGTGGCCTCGGG\n-GTTCCTCTGAATGTGTTGGAGATCACCAGTGAGACTGCAGAATCTTTTCGCTTCAGCTTG\n-CAGCACTTTGTTAAGCTGGAGTCTCTCATAGATAAAGGTGGCATACAGCTCGCTGATGGA\n-GGATGGCTGATTCCCACGAATGACGGGACAGCGGGAAAAGAAGAGTTTTATCGAGCATTG\n-TGTGATACCCCGGGGGTTGATCCTAAACTAATGAGTGAGGAGTGGGTGTATAATCACTAC\n-CGATGGATTGTATGGAAACAAGCTTCCATGGAAAGGTCATTTCCAGAAGAGATGGGCAGC\n-CTCTGTCTCACCCCAGAGCAGGTTCTCCTACAACTTAAGTACAGATATGACATAGAGGTT\n-GACCACAGTCGCAGACCAGCTCTCAGAAAAATTATGGAAAAGGATGACACGGCAGCTAAA\n-ACCCTGGTCCTCTGTGTTTGTGGGGTTGTCTTCAGAGGCAGCTCCCCAAAAAACAAGAGT\n-TTTGGGGACATCAGTACTCCAGGAGCTGACCCAAAGGTTGAAAACCCCTGTGCTGTCGTT\n-TGGCTGACCGATGGATGGTATTCAATTAAAGCGCAACTGGATGGACCGTTGACCTCAATG\n-CTTCACAGAGGTCGACTACCAGTCGGCGGGAAGCTGATTATCCATGGTGCTCAGCTAGTC\n-GGATCAGAGAATGCTTGTTCCCCCCTGGAGGCCCCTGTGTCTTTAATGCTAAAGATTTGC\n-GCCAACAGCAGCAGACCAGCTCGATGGGATTCTAAACTAGGATTTCACAGGGACCCGCGG\n-CCATTCCTGCTTCCTGTCTCTTCTTTGTACAGCAGTGGAGGACCAGTAGGATGTGTGGAT\n-ATTATTATACTGAGAAGCTATCCCATATTGTGGATGGAGAGGAAACCAGAAGGAGGCACT\n-GTGTTCCGTTCAGGCAGAGCAGAAGAGAAGGAGGCGAGACGATACAACATTCACAAAGAA\n-AAAGCTATGGAAATCCTGTTTGACAAGATTAAAGCAGAATTTGAAAAGGAAGAAAAAGGT\n-AACAGGAAACCGCAGTGCAGAAGGACAATCAATGGTCAAAATATTACAAGTCTTCAAGAT\n-GGAGAGGAGCTGTACGAAGCAGTGGGCGATGACCCAGCTTTCCTTGAGGCGCATCTGACT\n-GAGAAGCAGGTGGAGGTTCTTCAGAACTACAAACGTCTGGTGATGGAGAAGCAGCAGGCA\n-GAGCTGCAGGATCGCTACCGGCGAGCTGTAGAAAGTGCAGAGGACGGCGTGGGGGGCTGC\n-CCCAAGCGAGATGTCGCACCTGTGTGGAGACTGTGCATTGCTGACTCCATGGGCCATTCT\n-GGCCGTGTTTACCAGCTGAGTCTTTGGCGGCCCCCCTCAGAGCTCCAGGCATTACTGAAG\n-GAAGGCTGTCGTTATAAAGTGTATAATCTCACCACTTTAGATTCAAAGAAACAGGGTGGA\n-AATGCAACGGTTCAGCTAACTGCAACAAAAAAAACACAGTTTGAGCACCTACAGGGATCT\n-GAGGAGTGGTTATCAAAACATTTTCAGCCGAGGGTTGCAACCAATTTTGTGAGACTCCAA\n-GATCCAGAATTCAACCCATTGTGTAGCGAGGTTGATCTCACAGGATATGTCATTACTATA\n-ATAGATGGGCAAGGTTTCTCTCCTGCATTTTACCTGGCTGATGGGAAACAGAATTTTGTA\n-AAAGTTCGGTGTTTCAGCAGCTTCGCCCAATCTGGCTTGGAAGATGTAATAAAGCCACGT\n-GTCCTTTTGGCCCTAAGCAACCTGCAGCTGAGGGGTCAGTCGACATCACCTACTCCAGTC\n-GTGTATGCTGGAGATTTAACCGTCTTCTCCACAAACCCCAAAGAGGTTCATCTGCAGGAA\n-TCCTTCAGCCAGCTCAAAACTCTGGTTCAGGGCCAGGAGAACTTTTTTGTGCACGCTGAA\n-GAGAAGCTTTCTCAGTTGATGTCTGATGGCCTGAGCGCTATCGCTTCTCCAGCTGGGCAA\n-ATACAAACCCCAGCTTCCACAGTAAAGAGAAGAGGAGACATGACGGATGTGAGCTCAAAT\n-ATAATGGTTATTAACAAGACTTCTAAGGTCACATGTCAGCAGCCAGGCAGAAGCCACAGA\n-TTCTCAACGCCTATAAACAGGAACTCTACTGCTCACAGTTCAGCAGAGAGAAACCCAAGC\n-ACTATTAAGAAGAGGAAAGCTCTCGACTATCTGTCCCACATCCCGTCTCCACCGCCTCTG\n-TCCTGTCTGAGTACACTATCTTCTCCCAGCGTAAAAAAGATATTTATTCCGCCTCGCCGA\n-ACTGAAATACCTGGTACTTTAAAAACTGTAAAGACTCCAAATCAAAAACCTTCCAATACA\n-CCTGTGGATGATCAGTGGGTGAATGATGAGGAACTGGCTATGATCGACACTCAGGCATTA\n'
b
diff -r dd268de3a107 -r 66170848da6c test-data/test.fasta
--- a/test-data/test.fasta Fri Mar 03 07:22:53 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,11171 +0,0 @@\n->ENSLACT00000008884\n-ATGGAATGGGAAGCAGTTGAAAGTGTCAAAGCTTTAATGCGAGATGATGAGCTTACAGAT\n-GCAGGACTAGATGCAAGCAAAGACAGTTTGAACCGTGCATGCAGGAGACAGTCAGGTGGA\n-AATTTCCGAGCTAGGAAGCGGATGAGACTTGAACAAGTGTCGGCTGATGAGCCACCAGTT\n-AAGAGGCAACTCTTGGCTGAATTTGACAGGACGGTTGAAAATGGCCATAAGTCACTTCAG\n-AAGCCTTTAATATGCACTCCAAATGGTACGCTGAAAGACAGAAGGAAGTTTATGTATAGT\n-GTTCCCCTTAAGCCTGTAGTGTGTGGTCCATGGAGCAACAACTCCAAAACTGGGCAACAG\n-GTCACAAAACCCAGCATTACTTTGCCTGGCAGAGGAGTAGAAACATTTCAACCCAAGAAC\n-CACATTGCTCCCAGTCCAGTTTATGATCCACCTTCAAATAGACGAGGCCCAGTCTTTGCT\n-CCACCATTTCATGGGGCCACGTTTCGGGGACTGCAGAAACCAAGTGCTTCACACACATCA\n-AGCAAAACTGCTAAAACTTTTGTTCCTCCATTTAAAATGAAAGCCAGTGCCTCCCACACA\n-GTACATTTCAGTAGCAAAGTTATCAACACTTGTGAAAAGATTTTAGAAAATCTAGTCTAC\n-TTGAAGCCTAGTTTAGCCTCTTGTAATATTTTTCAAAGTTTGGAAGAGATGACTGCTAAC\n-CTTCAGTGTGCCAGAGACCTACAGGAAATGAGGCTGAGAAAGAAGCAAAGGCAAAATATC\n-CGGCCGCAGCCAGGCAGCCTTTACCTTGCTAAAACATCTGGTGTTGCTAGGGTCTCCCTG\n-AAGGCAGCTACAGGAAATCAGTGCCCATCCTCCTACTCCACAGAGCAGTTATATGTTCAT\n-GGTGTAGGAAAAAGCACCTTGAAAGTACGCAGTGAGAATGCAGAATCTTTTCAGTTCAGT\n-TGCAGTGATTATTTTGGTAAAGATGTTCTCCTAGCTGGAAATGGCTTGAAGCTGGCAGAT\n-GGAGGGTGGCTTATACCCAGTGATAAAGGAATGGTAGGAAAAGAAGAATTTTACAGGGCA\n-CTGTGCGATACACCTGGTGTGGCCCCAAAGCTTATTAGTGAATCTTGGGTCTACAATCAT\n-TACAGATGGATTGTATGGAAGCTTGCAGCGATGGAGGCAGCTTTTCCAAAAGAATTTGGC\n-AACCGGTGTCTAACACCAGAGAGGGTGCTGCTACAGCTTAAATACAGGTATGACATTGAA\n-GTTGACAAGTGTCGAAGATCTACTGTAAAGAAGATAATGGAAAGAGATGACACTGCCGCC\n-AAGACACTTGTGCTGTGCATTTCGAAACTTATATCCGTGGAGGACCGTTTTAAACAAACC\n-AAAAATAAGAATGAAAAGGGTGCTGAAGAAGCTAGGAAAGAAGCAGTAGCTGGAGTCATT\n-GAAACTACGGATGGGTGGTATGGGATTAAGGTACTCTTGGATCCTCCCCTTACAGTGCTG\n-GTACAGAGAGGAAGGCTGTCAGTTGGCTGTAAAATTATAACACATGGAGCTGAAATAATT\n-GGCTCCCAGGATGCCTGTACACCACTGGAAGCTCCGGAGTGTCTCATGCTAAAGATTTCT\n-GCGAACAGTACTCGACCTGCTTGCTGGAGTGCTAAACTTGGGTTTCATCGAGATCCCCGA\n-CCTTTTCCTCTCCCATTAGCATCGCTTTTTAATGATGGTGGATTAGTTGGCTGTGTTGAT\n-GTTGTTGTAGTGCGACTGTACCCCATACAGTGGATGGAAAAGAAATCAGATGGGATTTTT\n-GTGTTTCGTAATGATCGAGCAGAAGAAAGAGAGGCTCAAAGGCAAGTTGAGAATCAGCAA\n-AGAAAAATGGAAAGTTTGTTTGCAAAGATTCAAACTGAATTTGAACAGAAATATGAAGCC\n-AAAAGCAAAAGGAGAGGCCAAAAGGCACAGAAATTCAGCAAGCAGGAAATCCAGGCTCTT\n-CAAGATGGTGCAGAACTGAATGAAGCAATTGAGAATTCAATGGATCCGGGTTACTTTGAG\n-GCTTGCTTAAGAGAGGAGCAGTTAAAAGTCCTGCATGGCCACAGACAAATGTTAAATGAA\n-AAGAAACAAGCAGAGTTCCAGGCAGAGTTCAAGAAGGCACTGGAGTCTGCTGAACAGGAG\n-GGGAAGAGCTGCTGCAAGCGAGGTGTAACCCCTGTTTGGAAACTACGCATTGTGGACTAT\n-AGAAAACCAAGTGCTGCAGAATATATATTAAATATTTGGCGACCATTGGCTGATCTGCAT\n-TCCCTGTTAAAGGAAGGTAACCGGTACAGAATTTACCAGTTACTTGCATCACAATCCAAA\n-GGAAGAACCACTACTGCTGACATACAGCTAACAGCTACAAAGAAAACCCAGTATCAGCAG\n-TTCCAGTCTTTCCCAGAATTGATATCAGAGCTGTACAGTCCAAGAAAAGCTGTTAAATTC\n-AACATGTTGATGGATCCAACTTTCCGACCAGCCTATGCAGAAGTAGACCTGGTAGGATAT\n-ACCATCTCTATAGAAGGAAAGCCAGGTGTTGCTCCAGTGGTGTACCTATCTGATGAAAGT\n-CATAACTTTGTGGCAATAAAAGTTTGGACTGCCCTAAATCAGCTTGCTGTTGAAGACATT\n-GTGAAGCCATTTTCACTGATTGCTGCAAGTAACTTACAGTGGAGATCAGACAGCAGATCA\n-ATAATTCCTATGTTGTATGCTGGGGACCTTTCAATATTTTCATCAAATCCAAAAGAAGGG\n-CATCTTCAAGAGGCTTTTAATCAAAGAAGGACTGCTATACAAGAGAACATTTCTGGTACA\n-TACCTCCCCCCAGAAAAAAAAAATCTGCATCAGGAGTCTTATAAATCATGTCAATACAAC\n-ACATTGAATGTTTTGATGAATGGAAACATACACACACAGAGCCCAGTGCTGTCCAGGGTT\n-CATATGGGTACATCCTGTGCTTTTCTCTTTCTACTGCCTTCACCCTACCCTGAAAGTAAA\n-CACACTAGTCCTTTGATAACTATGAAGGCAGGAGTCAAATCTATGACTTTCCCAGGCTCT\n-GCAAAACTAATGCCACAAGCAAGTGAAAATCAAGAACTGGATACTCCCAAGAATCGTAAA\n-AAGAAGGCAGCTTTAGACTATCTTTGCCGCATTCCTTCCCCACCTGCACTTACTCCTATT\n-CGCAGTTTTGTGTCTTCCTCCTTGCAAAAGGCTTTTCACCCACCAAGGAGCTGCGTCAAA\n-CTACAAAGCGGCGAAAACCCAGTCGTCCCCACAGTTGGCAATAACGCTGTCCTAGGGATT\n-CAGTCAAAAAAAGATGAAGGGCCTGCTGCTTTTAATGAAGAGGATTCAGTAGCAGACGAG\n-GAACTAGCAATGATCAATACACAGGCATTCTTAGTCGGCTTAAGAAGGGACAAAAGACCA\n-AGTTTACTGGACAAAACTGCCAGTTTAAAGGGGCATGTTCCCTCAGAAAGATTTCTTGAA\n-GAGAAGCTCTTGTCAGTACTAAAAGAGCAGGCAAGCTCTAATTCTGAGAGAAACGCCACA\n-TCATTGGAAAATAAGAGCTGTGATAAAAGCAGGACATGTGTGAAACCATGTGAACATTCT\n-AATGACAGCATTGCAGAGGAAACTTCAGAAATCATCCCAGGCTGTCATGGTGGAGAATCT\n-GCTGTGGAAAACCAAAGTAAAAATTCCTCATTGTGCCACAAAAAACTGCAACAGAAGAAA\n-AGACGGAAGTATTACTAA\n->ENSXETT00000064180\n-ATGGCTGCACCGCAACTTGGAAAATCTGTCTTCTATGATCTGTTTAGCACGCATTGCTCT\n-CACTCAGATT'..b'GGCGGGAAGCTG\n-ATTATTCATGGTGCTCAGCTAGTCGGTTCACAGGATGCTTGTTCTCCTTTGGAGGCCCCT\n-GAGTCTATCATGCTAAAGATTTTTGCCAACAGCAGCAGGCGAGCACGATGGGATGCTAAA\n-CTGGGATTTTATAGGGACCCACGGCCATTCCTGCTCCCTGTCTCTTCTTTGTACAACAGT\n-GGGGGACCTGTAGGATGTGTGGATATTATTATATTAAGAAGCTATCCCACATTATGGATG\n-GAGAGAAAACCAGAAGGAGGCACTGTGTTCCGGTCAGGCCGAGCAGAAGAAAAGGAGGCT\n-AGACGGTACAACGTCCACAAGGAAAAAGCTATGGAGATTCTGTTTGACAAGATTCAAGCG\n-GAATTTGAAAAGGAAGAGAGGGATAACAGGAAACCTCGGAGCAGAAGACGGACAATCGGT\n-GATCAAGATATCAAAAGTCTTCAAGATGGAGAGGAGCTGTACGAAGCAGTGGGCGATGAC\n-CCAGCTTACCTTGAGGCACATTTGACTGAGCAGCAGGCAGAGACTCTACAGAACTACAAA\n-CGTCTGCTGATAGAAAAGAAGCAAGCAGAGCTGCAGGATCGCTACCGGCGAGCTGTAGAA\n-ACTGCAGAGGATGGCACAGGCAGCTGTCCCAAGCGAGATGTAGCACCTGTATGGAGACTC\n-AGCATTGCTGACTTCATGGAAAAGCCAGGCAGTGTTTACCAGCTGAACATTTGGCGGCCT\n-CCCTCAGAGCTCCAGTCTTTACTAAAAGAAGGCTGTCGATATAAGGTGTATAATCTCACC\n-ACAACAGATTCAAAGAAACAAGGTGGAAACACAACCGTTCAGCTAAGTGGAACAAAAAAA\n-ACACAATTTGAGGACCTTCAGGCATCCGAGGAATTGTTGTCAACATATTTTCAGCCAAGG\n-GTCTCGGCCACATTCATCGATCTCCAAGATCCAGAATTCCATTCGTTGTGTGGTGAGGTT\n-GATCTCACAGGATACGTCATCAGTATAATAGATGGACAAGGTTTCTCACCTGCTTTTTAC\n-CTAACTGATGGGAAACAAAATTTTGTAAAAGTGCGTTGTTTCAGCAGCTTCGCTCAGTCA\n-GGCTTGGAAGATGTAATAAAGCCAAGTGTCCTTTTAGCTTTAAGCAACCTCCAACTGAGA\n-GGTCAGGCAACATCACCCACTCCAGTCTTGTACGCTGGAGATCTAACCGTCTTCTCCACA\n-AACCCCAAAGAAGTTCATCTGCAGGAATCCTTCAGCCAGCTCAAAACCCTGGTTCAG\n->ENSTRUT00000015099\n-CAGCTGGCACGGGATATGCAGGATATGCGAATCAGAAAAAAGAAACGCCAGACCATTCGT\n-CCATTACCGGGAAGTTTGTTTCAGAAGAAGTCCTCTGGAGTCGCCAGGATTCCATTTAAA\n-GCTGCAGTAAACGGAAAGCCACCTGCACGCTACACTGCCAAACCGCTGTGTGGCCTCGGG\n-GTTCCTCTGAATGTGTTGGAGATCACCAGTGAGACTGCAGAATCTTTTCGCTTCAGCTTG\n-CAGCACTTTGTTAAGCTGGAGTCTCTCATAGATAAAGGTGGCATACAGCTCGCTGATGGA\n-GGATGGCTGATTCCCACGAATGACGGGACAGCGGGAAAAGAAGAGTTTTATCGAGCATTG\n-TGTGATACCCCGGGGGTTGATCCTAAACTAATGAGTGAGGAGTGGGTGTATAATCACTAC\n-CGATGGATTGTATGGAAACAAGCTTCCATGGAAAGGTCATTTCCAGAAGAGATGGGCAGC\n-CTCTGTCTCACCCCAGAGCAGGTTCTCCTACAACTTAAGTACAGATATGACATAGAGGTT\n-GACCACAGTCGCAGACCAGCTCTCAGAAAAATTATGGAAAAGGATGACACGGCAGCTAAA\n-ACCCTGGTCCTCTGTGTTTGTGGGGTTGTCTTCAGAGGCAGCTCCCCAAAAAACAAGAGT\n-TTTGGGGACATCAGTACTCCAGGAGCTGACCCAAAGGTTGAAAACCCCTGTGCTGTCGTT\n-TGGCTGACCGATGGATGGTATTCAATTAAAGCGCAACTGGATGGACCGTTGACCTCAATG\n-CTTCACAGAGGTCGACTACCAGTCGGCGGGAAGCTGATTATCCATGGTGCTCAGCTAGTC\n-GGATCAGAGAATGCTTGTTCCCCCCTGGAGGCCCCTGTGTCTTTAATGCTAAAGATTTGC\n-GCCAACAGCAGCAGACCAGCTCGATGGGATTCTAAACTAGGATTTCACAGGGACCCGCGG\n-CCATTCCTGCTTCCTGTCTCTTCTTTGTACAGCAGTGGAGGACCAGTAGGATGTGTGGAT\n-ATTATTATACTGAGAAGCTATCCCATATTGTGGATGGAGAGGAAACCAGAAGGAGGCACT\n-GTGTTCCGTTCAGGCAGAGCAGAAGAGAAGGAGGCGAGACGATACAACATTCACAAAGAA\n-AAAGCTATGGAAATCCTGTTTGACAAGATTAAAGCAGAATTTGAAAAGGAAGAAAAAGGT\n-AACAGGAAACCGCAGTGCAGAAGGACAATCAATGGTCAAAATATTACAAGTCTTCAAGAT\n-GGAGAGGAGCTGTACGAAGCAGTGGGCGATGACCCAGCTTTCCTTGAGGCGCATCTGACT\n-GAGAAGCAGGTGGAGGTTCTTCAGAACTACAAACGTCTGGTGATGGAGAAGCAGCAGGCA\n-GAGCTGCAGGATCGCTACCGGCGAGCTGTAGAAAGTGCAGAGGACGGCGTGGGGGGCTGC\n-CCCAAGCGAGATGTCGCACCTGTGTGGAGACTGTGCATTGCTGACTCCATGGGCCATTCT\n-GGCCGTGTTTACCAGCTGAGTCTTTGGCGGCCCCCCTCAGAGCTCCAGGCATTACTGAAG\n-GAAGGCTGTCGTTATAAAGTGTATAATCTCACCACTTTAGATTCAAAGAAACAGGGTGGA\n-AATGCAACGGTTCAGCTAACTGCAACAAAAAAAACACAGTTTGAGCACCTACAGGGATCT\n-GAGGAGTGGTTATCAAAACATTTTCAGCCGAGGGTTGCAACCAATTTTGTGAGACTCCAA\n-GATCCAGAATTCAACCCATTGTGTAGCGAGGTTGATCTCACAGGATATGTCATTACTATA\n-ATAGATGGGCAAGGTTTCTCTCCTGCATTTTACCTGGCTGATGGGAAACAGAATTTTGTA\n-AAAGTTCGGTGTTTCAGCAGCTTCGCCCAATCTGGCTTGGAAGATGTAATAAAGCCACGT\n-GTCCTTTTGGCCCTAAGCAACCTGCAGCTGAGGGGTCAGTCGACATCACCTACTCCAGTC\n-GTGTATGCTGGAGATTTAACCGTCTTCTCCACAAACCCCAAAGAGGTTCATCTGCAGGAA\n-TCCTTCAGCCAGCTCAAAACTCTGGTTCAGGGCCAGGAGAACTTTTTTGTGCACGCTGAA\n-GAGAAGCTTTCTCAGTTGATGTCTGATGGCCTGAGCGCTATCGCTTCTCCAGCTGGGCAA\n-ATACAAACCCCAGCTTCCACAGTAAAGAGAAGAGGAGACATGACGGATGTGAGCTCAAAT\n-ATAATGGTTATTAACAAGACTTCTAAGGTCACATGTCAGCAGCCAGGCAGAAGCCACAGA\n-TTCTCAACGCCTATAAACAGGAACTCTACTGCTCACAGTTCAGCAGAGAGAAACCCAAGC\n-ACTATTAAGAAGAGGAAAGCTCTCGACTATCTGTCCCACATCCCGTCTCCACCGCCTCTG\n-TCCTGTCTGAGTACACTATCTTCTCCCAGCGTAAAAAAGATATTTATTCCGCCTCGCCGA\n-ACTGAAATACCTGGTACTTTAAAAACTGTAAAGACTCCAAATCAAAAACCTTCCAATACA\n-CCTGTGGATGATCAGTGGGTGAATGATGAGGAACTGGCTATGATCGACACTCAGGCATTA\n'
b
diff -r dd268de3a107 -r 66170848da6c test-data/test.json
--- a/test-data/test.json Fri Mar 03 07:22:53 2017 -0500
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,31795 +0,0 @@\n-{\n-    "ENSTNIG00000016261": {\n-        "source": "ensembl",\n-        "object_type": "Gene",\n-        "logic_name": "ensembl",\n-        "version": 1,\n-        "species": "tetraodon_nigroviridis",\n-        "description": "breast cancer 2, early onset [Source:ZFIN;Acc:ZDB-GENE-060510-3]",\n-        "display_name": "brca2",\n-        "assembly_name": "TETRAODON8",\n-        "biotype": "protein_coding",\n-        "end": 4705074,\n-        "seq_region_name": "16",\n-        "db_type": "core",\n-        "strand": 1,\n-        "id": "ENSTNIG00000016261",\n-        "Transcript": [\n-            {\n-                "source": "ensembl",\n-                "object_type": "Transcript",\n-                "logic_name": "ensembl",\n-                "Exon": [\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-                        "species": "tetraodon_nigroviridis",\n-                        "assembly_name": "TETRAODON8",\n-                        "end": 4700679,\n-                        "seq_region_name": "16",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSTNIE00000057385",\n-                        "start": 4700614\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-                        "species": "tetraodon_nigroviridis",\n-                        "assembly_name": "TETRAODON8",\n-                        "end": 4701157,\n-                        "seq_region_name": "16",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSTNIE00000041338",\n-                        "start": 4701103\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-                        "species": "tetraodon_nigroviridis",\n-                        "assembly_name": "TETRAODON8",\n-                        "end": 4701424,\n-                        "seq_region_name": "16",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSTNIE00000031348",\n-                        "start": 4701218\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-                        "species": "tetraodon_nigroviridis",\n-                        "assembly_name": "TETRAODON8",\n-                        "end": 4701571,\n-                        "seq_region_name": "16",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSTNIE00000063263",\n-                        "start": 4701502\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-                        "species": "tetraodon_nigroviridis",\n-                        "assembly_name": "TETRAODON8",\n-                        "end": 4701608,\n-                        "seq_region_name": "16",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSTNIE00000054769",\n-                        "start": 4701587\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-                        "species": "tetraodon_nigroviridis",\n-                        "assembly_name": "TETRAODON8",\n-                        "end": 4701940,\n-                        "seq_region_name": "16",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSTNIE00000041082",\n-                        "start": 4701626\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-        '..b'                "assembly_name": "cavPor3",\n-                        "end": 33841095,\n-                        "seq_region_name": "scaffold_6",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSCPOE00000227172",\n-                        "start": 33841075\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-                        "species": "cavia_porcellus",\n-                        "assembly_name": "cavPor3",\n-                        "end": 33841317,\n-                        "seq_region_name": "scaffold_6",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSCPOE00000067342",\n-                        "start": 33841179\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-                        "species": "cavia_porcellus",\n-                        "assembly_name": "cavPor3",\n-                        "end": 33850200,\n-                        "seq_region_name": "scaffold_6",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSCPOE00000067343",\n-                        "start": 33849956\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 1,\n-                        "species": "cavia_porcellus",\n-                        "assembly_name": "cavPor3",\n-                        "end": 33851341,\n-                        "seq_region_name": "scaffold_6",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSCPOE00000067344",\n-                        "start": 33851195\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 2,\n-                        "species": "cavia_porcellus",\n-                        "assembly_name": "cavPor3",\n-                        "end": 33852801,\n-                        "seq_region_name": "scaffold_6",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSCPOE00000067345",\n-                        "start": 33852516\n-                    },\n-                    {\n-                        "object_type": "Exon",\n-                        "version": 2,\n-                        "species": "cavia_porcellus",\n-                        "assembly_name": "cavPor3",\n-                        "end": 33853154,\n-                        "seq_region_name": "scaffold_6",\n-                        "db_type": "core",\n-                        "strand": 1,\n-                        "id": "ENSCPOE00000067347",\n-                        "start": 33852814\n-                    }\n-                ],\n-                "Parent": "ENSCPOG00000005153",\n-                "seq_region_name": "scaffold_6",\n-                "db_type": "core",\n-                "is_canonical": 1,\n-                "strand": 1,\n-                "id": "ENSCPOT00000005208",\n-                "version": 2,\n-                "species": "cavia_porcellus",\n-                "assembly_name": "cavPor3",\n-                "display_name": "BRCA2-201",\n-                "end": 33853154,\n-                "biotype": "protein_coding",\n-                "Translation": {\n-                    "object_type": "Translation",\n-                    "species": "cavia_porcellus",\n-                    "Parent": "ENSCPOT00000005208",\n-                    "end": 33853154,\n-                    "length": 3313,\n-                    "db_type": "core",\n-                    "id": "ENSCPOP00000004635",\n-                    "start": 33778275\n-                },\n-                "start": 33778275\n-            }\n-        ],\n-        "start": 33778275\n-    }\n-}\n\\ No newline at end of file\n'