Previous changeset 3:dd268de3a107 (2017-03-03) Next changeset 5:b78b529f66db (2018-10-14) |
Commit message:
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/TreeBest commit 651fae48371f845578753052c6fe173e3bb35670 |
removed:
fasta_header_converter.py fasta_header_converter.xml test-data/out.fasta test-data/test.fasta test-data/test.json |
b |
diff -r dd268de3a107 -r 66170848da6c fasta_header_converter.py --- a/fasta_header_converter.py Fri Mar 03 07:22:53 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,62 +0,0 @@ -from __future__ import print_function - -import collections -import json -import optparse -import sys - -Sequence = collections.namedtuple('Sequence', ['header', 'sequence']) - - -def FASTAReader_gen(fasta_filename): - with open(fasta_filename) as fasta_file: - line = fasta_file.readline() - while True: - if not line: - return - assert line.startswith('>'), "FASTA headers must start with >" - header = line.rstrip() - sequence_parts = [] - line = fasta_file.readline() - while line and line[0] != '>': - sequence_parts.append(line.rstrip()) - line = fasta_file.readline() - sequence = "\n".join(sequence_parts) - yield Sequence(header, sequence) - - -def read_gene_info(gene_info): - transcript_species_dict = dict() - for gene_dict in gene_info.values(): - for transcript in gene_dict['Transcript']: - transcript_species_dict[transcript['id']] = transcript['species'].replace("_", "") - return transcript_species_dict - - -parser = optparse.OptionParser() -parser.add_option('-j', '--json', dest="input_gene_filename", - help='Gene feature information in JSON format') -parser.add_option('-f', '--fasta', dest="input_fasta_filename", - help='Sequences in FASTA format') -parser.add_option('-o', '--output', dest="output_fasta_filename", - help='Output FASTA file name') -options, args = parser.parse_args() - -if options.input_gene_filename is None: - raise Exception('-j option must be specified') -if options.input_fasta_filename is None: - raise Exception('-f option must be specified') -if options.output_fasta_filename is None: - raise Exception('-o option must be specified') - -with open(options.input_gene_filename) as json_fh: - gene_info = json.load(json_fh) -transcript_species_dict = read_gene_info(gene_info) - -with open(options.output_fasta_filename, 'w') as output_fasta_file: - for entry in FASTAReader_gen(options.input_fasta_filename): - name = entry.header[1:].lstrip() - if name not in transcript_species_dict: - print("Transcript '%s' not found in the gene feature information" % name, file=sys.stderr) - continue - output_fasta_file.write(">%s_%s\n%s\n" % (name, transcript_species_dict[name], entry.sequence)) |
b |
diff -r dd268de3a107 -r 66170848da6c fasta_header_converter.xml --- a/fasta_header_converter.xml Fri Mar 03 07:22:53 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,32 +0,0 @@ -<tool id="fasta_header_converter" name="FASTA header converter" version="0.1.1"> - <description>to append species information</description> - <command detect_errors="exit_code"> -<![CDATA[ -python '$__tool_directory__/fasta_header_converter.py' --f '$fastaFile' --j '$genesFile' --o '$outputFile' -]]> - </command> - <inputs> - <param name="fastaFile" type="data" format="fasta" label="FASTA file" help="FASTA file with transcript ID as FASTA ID" /> - <param name="genesFile" type="data" format="json" label="Gene feature information" help="In JSON format" /> - </inputs> - <outputs> - <data format="fasta" name="outputFile" label="${tool.name} on ${on_string}" /> - </outputs> - <tests> - <test> - <param name="fastaFile" ftype="nhx" value="test.fasta" /> - <param name="genesFile" ftype="json" value="test.json" /> - <output name="outputFile" file="out.fasta" /> - </test> - </tests> - <help> - <![CDATA[ -Simple converter for FASTA files, which appends the species name to the FASTA header for usage in TreeBeST. It uses gene feature information in JSON format (similar to the result of Ensembl REST API - lookup/id). - ]]> - </help> - <citations> - </citations> -</tool> |
b |
diff -r dd268de3a107 -r 66170848da6c test-data/out.fasta --- a/test-data/out.fasta Fri Mar 03 07:22:53 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,11171 +0,0 @@\n->ENSLACT00000008884_latimeriachalumnae\n-ATGGAATGGGAAGCAGTTGAAAGTGTCAAAGCTTTAATGCGAGATGATGAGCTTACAGAT\n-GCAGGACTAGATGCAAGCAAAGACAGTTTGAACCGTGCATGCAGGAGACAGTCAGGTGGA\n-AATTTCCGAGCTAGGAAGCGGATGAGACTTGAACAAGTGTCGGCTGATGAGCCACCAGTT\n-AAGAGGCAACTCTTGGCTGAATTTGACAGGACGGTTGAAAATGGCCATAAGTCACTTCAG\n-AAGCCTTTAATATGCACTCCAAATGGTACGCTGAAAGACAGAAGGAAGTTTATGTATAGT\n-GTTCCCCTTAAGCCTGTAGTGTGTGGTCCATGGAGCAACAACTCCAAAACTGGGCAACAG\n-GTCACAAAACCCAGCATTACTTTGCCTGGCAGAGGAGTAGAAACATTTCAACCCAAGAAC\n-CACATTGCTCCCAGTCCAGTTTATGATCCACCTTCAAATAGACGAGGCCCAGTCTTTGCT\n-CCACCATTTCATGGGGCCACGTTTCGGGGACTGCAGAAACCAAGTGCTTCACACACATCA\n-AGCAAAACTGCTAAAACTTTTGTTCCTCCATTTAAAATGAAAGCCAGTGCCTCCCACACA\n-GTACATTTCAGTAGCAAAGTTATCAACACTTGTGAAAAGATTTTAGAAAATCTAGTCTAC\n-TTGAAGCCTAGTTTAGCCTCTTGTAATATTTTTCAAAGTTTGGAAGAGATGACTGCTAAC\n-CTTCAGTGTGCCAGAGACCTACAGGAAATGAGGCTGAGAAAGAAGCAAAGGCAAAATATC\n-CGGCCGCAGCCAGGCAGCCTTTACCTTGCTAAAACATCTGGTGTTGCTAGGGTCTCCCTG\n-AAGGCAGCTACAGGAAATCAGTGCCCATCCTCCTACTCCACAGAGCAGTTATATGTTCAT\n-GGTGTAGGAAAAAGCACCTTGAAAGTACGCAGTGAGAATGCAGAATCTTTTCAGTTCAGT\n-TGCAGTGATTATTTTGGTAAAGATGTTCTCCTAGCTGGAAATGGCTTGAAGCTGGCAGAT\n-GGAGGGTGGCTTATACCCAGTGATAAAGGAATGGTAGGAAAAGAAGAATTTTACAGGGCA\n-CTGTGCGATACACCTGGTGTGGCCCCAAAGCTTATTAGTGAATCTTGGGTCTACAATCAT\n-TACAGATGGATTGTATGGAAGCTTGCAGCGATGGAGGCAGCTTTTCCAAAAGAATTTGGC\n-AACCGGTGTCTAACACCAGAGAGGGTGCTGCTACAGCTTAAATACAGGTATGACATTGAA\n-GTTGACAAGTGTCGAAGATCTACTGTAAAGAAGATAATGGAAAGAGATGACACTGCCGCC\n-AAGACACTTGTGCTGTGCATTTCGAAACTTATATCCGTGGAGGACCGTTTTAAACAAACC\n-AAAAATAAGAATGAAAAGGGTGCTGAAGAAGCTAGGAAAGAAGCAGTAGCTGGAGTCATT\n-GAAACTACGGATGGGTGGTATGGGATTAAGGTACTCTTGGATCCTCCCCTTACAGTGCTG\n-GTACAGAGAGGAAGGCTGTCAGTTGGCTGTAAAATTATAACACATGGAGCTGAAATAATT\n-GGCTCCCAGGATGCCTGTACACCACTGGAAGCTCCGGAGTGTCTCATGCTAAAGATTTCT\n-GCGAACAGTACTCGACCTGCTTGCTGGAGTGCTAAACTTGGGTTTCATCGAGATCCCCGA\n-CCTTTTCCTCTCCCATTAGCATCGCTTTTTAATGATGGTGGATTAGTTGGCTGTGTTGAT\n-GTTGTTGTAGTGCGACTGTACCCCATACAGTGGATGGAAAAGAAATCAGATGGGATTTTT\n-GTGTTTCGTAATGATCGAGCAGAAGAAAGAGAGGCTCAAAGGCAAGTTGAGAATCAGCAA\n-AGAAAAATGGAAAGTTTGTTTGCAAAGATTCAAACTGAATTTGAACAGAAATATGAAGCC\n-AAAAGCAAAAGGAGAGGCCAAAAGGCACAGAAATTCAGCAAGCAGGAAATCCAGGCTCTT\n-CAAGATGGTGCAGAACTGAATGAAGCAATTGAGAATTCAATGGATCCGGGTTACTTTGAG\n-GCTTGCTTAAGAGAGGAGCAGTTAAAAGTCCTGCATGGCCACAGACAAATGTTAAATGAA\n-AAGAAACAAGCAGAGTTCCAGGCAGAGTTCAAGAAGGCACTGGAGTCTGCTGAACAGGAG\n-GGGAAGAGCTGCTGCAAGCGAGGTGTAACCCCTGTTTGGAAACTACGCATTGTGGACTAT\n-AGAAAACCAAGTGCTGCAGAATATATATTAAATATTTGGCGACCATTGGCTGATCTGCAT\n-TCCCTGTTAAAGGAAGGTAACCGGTACAGAATTTACCAGTTACTTGCATCACAATCCAAA\n-GGAAGAACCACTACTGCTGACATACAGCTAACAGCTACAAAGAAAACCCAGTATCAGCAG\n-TTCCAGTCTTTCCCAGAATTGATATCAGAGCTGTACAGTCCAAGAAAAGCTGTTAAATTC\n-AACATGTTGATGGATCCAACTTTCCGACCAGCCTATGCAGAAGTAGACCTGGTAGGATAT\n-ACCATCTCTATAGAAGGAAAGCCAGGTGTTGCTCCAGTGGTGTACCTATCTGATGAAAGT\n-CATAACTTTGTGGCAATAAAAGTTTGGACTGCCCTAAATCAGCTTGCTGTTGAAGACATT\n-GTGAAGCCATTTTCACTGATTGCTGCAAGTAACTTACAGTGGAGATCAGACAGCAGATCA\n-ATAATTCCTATGTTGTATGCTGGGGACCTTTCAATATTTTCATCAAATCCAAAAGAAGGG\n-CATCTTCAAGAGGCTTTTAATCAAAGAAGGACTGCTATACAAGAGAACATTTCTGGTACA\n-TACCTCCCCCCAGAAAAAAAAAATCTGCATCAGGAGTCTTATAAATCATGTCAATACAAC\n-ACATTGAATGTTTTGATGAATGGAAACATACACACACAGAGCCCAGTGCTGTCCAGGGTT\n-CATATGGGTACATCCTGTGCTTTTCTCTTTCTACTGCCTTCACCCTACCCTGAAAGTAAA\n-CACACTAGTCCTTTGATAACTATGAAGGCAGGAGTCAAATCTATGACTTTCCCAGGCTCT\n-GCAAAACTAATGCCACAAGCAAGTGAAAATCAAGAACTGGATACTCCCAAGAATCGTAAA\n-AAGAAGGCAGCTTTAGACTATCTTTGCCGCATTCCTTCCCCACCTGCACTTACTCCTATT\n-CGCAGTTTTGTGTCTTCCTCCTTGCAAAAGGCTTTTCACCCACCAAGGAGCTGCGTCAAA\n-CTACAAAGCGGCGAAAACCCAGTCGTCCCCACAGTTGGCAATAACGCTGTCCTAGGGATT\n-CAGTCAAAAAAAGATGAAGGGCCTGCTGCTTTTAATGAAGAGGATTCAGTAGCAGACGAG\n-GAACTAGCAATGATCAATACACAGGCATTCTTAGTCGGCTTAAGAAGGGACAAAAGACCA\n-AGTTTACTGGACAAAACTGCCAGTTTAAAGGGGCATGTTCCCTCAGAAAGATTTCTTGAA\n-GAGAAGCTCTTGTCAGTACTAAAAGAGCAGGCAAGCTCTAATTCTGAGAGAAACGCCACA\n-TCATTGGAAAATAAGAGCTGTGATAAAAGCAGGACATGTGTGAAACCATGTGAACATTCT\n-AATGACAGCATTGCAGAGGAAACTTCAGAAATCATCCCAGGCTGTCATGGTGGAGAATCT\n-GCTGTGGAAAACCAAAGTAAAAATTCCTCATTGTGCCACAAAAAACTGCAACAGAAGAAA\n-AGACGGAAGTATTACTAA\n->ENSXETT00000064180_xenopustropicalis\n-ATGGCTGCACCGCAACTTGGAAAATCTGTCTTCTA'..b'ATTCATGGTGCTCAGCTAGTCGGTTCACAGGATGCTTGTTCTCCTTTGGAGGCCCCT\n-GAGTCTATCATGCTAAAGATTTTTGCCAACAGCAGCAGGCGAGCACGATGGGATGCTAAA\n-CTGGGATTTTATAGGGACCCACGGCCATTCCTGCTCCCTGTCTCTTCTTTGTACAACAGT\n-GGGGGACCTGTAGGATGTGTGGATATTATTATATTAAGAAGCTATCCCACATTATGGATG\n-GAGAGAAAACCAGAAGGAGGCACTGTGTTCCGGTCAGGCCGAGCAGAAGAAAAGGAGGCT\n-AGACGGTACAACGTCCACAAGGAAAAAGCTATGGAGATTCTGTTTGACAAGATTCAAGCG\n-GAATTTGAAAAGGAAGAGAGGGATAACAGGAAACCTCGGAGCAGAAGACGGACAATCGGT\n-GATCAAGATATCAAAAGTCTTCAAGATGGAGAGGAGCTGTACGAAGCAGTGGGCGATGAC\n-CCAGCTTACCTTGAGGCACATTTGACTGAGCAGCAGGCAGAGACTCTACAGAACTACAAA\n-CGTCTGCTGATAGAAAAGAAGCAAGCAGAGCTGCAGGATCGCTACCGGCGAGCTGTAGAA\n-ACTGCAGAGGATGGCACAGGCAGCTGTCCCAAGCGAGATGTAGCACCTGTATGGAGACTC\n-AGCATTGCTGACTTCATGGAAAAGCCAGGCAGTGTTTACCAGCTGAACATTTGGCGGCCT\n-CCCTCAGAGCTCCAGTCTTTACTAAAAGAAGGCTGTCGATATAAGGTGTATAATCTCACC\n-ACAACAGATTCAAAGAAACAAGGTGGAAACACAACCGTTCAGCTAAGTGGAACAAAAAAA\n-ACACAATTTGAGGACCTTCAGGCATCCGAGGAATTGTTGTCAACATATTTTCAGCCAAGG\n-GTCTCGGCCACATTCATCGATCTCCAAGATCCAGAATTCCATTCGTTGTGTGGTGAGGTT\n-GATCTCACAGGATACGTCATCAGTATAATAGATGGACAAGGTTTCTCACCTGCTTTTTAC\n-CTAACTGATGGGAAACAAAATTTTGTAAAAGTGCGTTGTTTCAGCAGCTTCGCTCAGTCA\n-GGCTTGGAAGATGTAATAAAGCCAAGTGTCCTTTTAGCTTTAAGCAACCTCCAACTGAGA\n-GGTCAGGCAACATCACCCACTCCAGTCTTGTACGCTGGAGATCTAACCGTCTTCTCCACA\n-AACCCCAAAGAAGTTCATCTGCAGGAATCCTTCAGCCAGCTCAAAACCCTGGTTCAG\n->ENSTRUT00000015099_takifugurubripes\n-CAGCTGGCACGGGATATGCAGGATATGCGAATCAGAAAAAAGAAACGCCAGACCATTCGT\n-CCATTACCGGGAAGTTTGTTTCAGAAGAAGTCCTCTGGAGTCGCCAGGATTCCATTTAAA\n-GCTGCAGTAAACGGAAAGCCACCTGCACGCTACACTGCCAAACCGCTGTGTGGCCTCGGG\n-GTTCCTCTGAATGTGTTGGAGATCACCAGTGAGACTGCAGAATCTTTTCGCTTCAGCTTG\n-CAGCACTTTGTTAAGCTGGAGTCTCTCATAGATAAAGGTGGCATACAGCTCGCTGATGGA\n-GGATGGCTGATTCCCACGAATGACGGGACAGCGGGAAAAGAAGAGTTTTATCGAGCATTG\n-TGTGATACCCCGGGGGTTGATCCTAAACTAATGAGTGAGGAGTGGGTGTATAATCACTAC\n-CGATGGATTGTATGGAAACAAGCTTCCATGGAAAGGTCATTTCCAGAAGAGATGGGCAGC\n-CTCTGTCTCACCCCAGAGCAGGTTCTCCTACAACTTAAGTACAGATATGACATAGAGGTT\n-GACCACAGTCGCAGACCAGCTCTCAGAAAAATTATGGAAAAGGATGACACGGCAGCTAAA\n-ACCCTGGTCCTCTGTGTTTGTGGGGTTGTCTTCAGAGGCAGCTCCCCAAAAAACAAGAGT\n-TTTGGGGACATCAGTACTCCAGGAGCTGACCCAAAGGTTGAAAACCCCTGTGCTGTCGTT\n-TGGCTGACCGATGGATGGTATTCAATTAAAGCGCAACTGGATGGACCGTTGACCTCAATG\n-CTTCACAGAGGTCGACTACCAGTCGGCGGGAAGCTGATTATCCATGGTGCTCAGCTAGTC\n-GGATCAGAGAATGCTTGTTCCCCCCTGGAGGCCCCTGTGTCTTTAATGCTAAAGATTTGC\n-GCCAACAGCAGCAGACCAGCTCGATGGGATTCTAAACTAGGATTTCACAGGGACCCGCGG\n-CCATTCCTGCTTCCTGTCTCTTCTTTGTACAGCAGTGGAGGACCAGTAGGATGTGTGGAT\n-ATTATTATACTGAGAAGCTATCCCATATTGTGGATGGAGAGGAAACCAGAAGGAGGCACT\n-GTGTTCCGTTCAGGCAGAGCAGAAGAGAAGGAGGCGAGACGATACAACATTCACAAAGAA\n-AAAGCTATGGAAATCCTGTTTGACAAGATTAAAGCAGAATTTGAAAAGGAAGAAAAAGGT\n-AACAGGAAACCGCAGTGCAGAAGGACAATCAATGGTCAAAATATTACAAGTCTTCAAGAT\n-GGAGAGGAGCTGTACGAAGCAGTGGGCGATGACCCAGCTTTCCTTGAGGCGCATCTGACT\n-GAGAAGCAGGTGGAGGTTCTTCAGAACTACAAACGTCTGGTGATGGAGAAGCAGCAGGCA\n-GAGCTGCAGGATCGCTACCGGCGAGCTGTAGAAAGTGCAGAGGACGGCGTGGGGGGCTGC\n-CCCAAGCGAGATGTCGCACCTGTGTGGAGACTGTGCATTGCTGACTCCATGGGCCATTCT\n-GGCCGTGTTTACCAGCTGAGTCTTTGGCGGCCCCCCTCAGAGCTCCAGGCATTACTGAAG\n-GAAGGCTGTCGTTATAAAGTGTATAATCTCACCACTTTAGATTCAAAGAAACAGGGTGGA\n-AATGCAACGGTTCAGCTAACTGCAACAAAAAAAACACAGTTTGAGCACCTACAGGGATCT\n-GAGGAGTGGTTATCAAAACATTTTCAGCCGAGGGTTGCAACCAATTTTGTGAGACTCCAA\n-GATCCAGAATTCAACCCATTGTGTAGCGAGGTTGATCTCACAGGATATGTCATTACTATA\n-ATAGATGGGCAAGGTTTCTCTCCTGCATTTTACCTGGCTGATGGGAAACAGAATTTTGTA\n-AAAGTTCGGTGTTTCAGCAGCTTCGCCCAATCTGGCTTGGAAGATGTAATAAAGCCACGT\n-GTCCTTTTGGCCCTAAGCAACCTGCAGCTGAGGGGTCAGTCGACATCACCTACTCCAGTC\n-GTGTATGCTGGAGATTTAACCGTCTTCTCCACAAACCCCAAAGAGGTTCATCTGCAGGAA\n-TCCTTCAGCCAGCTCAAAACTCTGGTTCAGGGCCAGGAGAACTTTTTTGTGCACGCTGAA\n-GAGAAGCTTTCTCAGTTGATGTCTGATGGCCTGAGCGCTATCGCTTCTCCAGCTGGGCAA\n-ATACAAACCCCAGCTTCCACAGTAAAGAGAAGAGGAGACATGACGGATGTGAGCTCAAAT\n-ATAATGGTTATTAACAAGACTTCTAAGGTCACATGTCAGCAGCCAGGCAGAAGCCACAGA\n-TTCTCAACGCCTATAAACAGGAACTCTACTGCTCACAGTTCAGCAGAGAGAAACCCAAGC\n-ACTATTAAGAAGAGGAAAGCTCTCGACTATCTGTCCCACATCCCGTCTCCACCGCCTCTG\n-TCCTGTCTGAGTACACTATCTTCTCCCAGCGTAAAAAAGATATTTATTCCGCCTCGCCGA\n-ACTGAAATACCTGGTACTTTAAAAACTGTAAAGACTCCAAATCAAAAACCTTCCAATACA\n-CCTGTGGATGATCAGTGGGTGAATGATGAGGAACTGGCTATGATCGACACTCAGGCATTA\n' |
b |
diff -r dd268de3a107 -r 66170848da6c test-data/test.fasta --- a/test-data/test.fasta Fri Mar 03 07:22:53 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,11171 +0,0 @@\n->ENSLACT00000008884\n-ATGGAATGGGAAGCAGTTGAAAGTGTCAAAGCTTTAATGCGAGATGATGAGCTTACAGAT\n-GCAGGACTAGATGCAAGCAAAGACAGTTTGAACCGTGCATGCAGGAGACAGTCAGGTGGA\n-AATTTCCGAGCTAGGAAGCGGATGAGACTTGAACAAGTGTCGGCTGATGAGCCACCAGTT\n-AAGAGGCAACTCTTGGCTGAATTTGACAGGACGGTTGAAAATGGCCATAAGTCACTTCAG\n-AAGCCTTTAATATGCACTCCAAATGGTACGCTGAAAGACAGAAGGAAGTTTATGTATAGT\n-GTTCCCCTTAAGCCTGTAGTGTGTGGTCCATGGAGCAACAACTCCAAAACTGGGCAACAG\n-GTCACAAAACCCAGCATTACTTTGCCTGGCAGAGGAGTAGAAACATTTCAACCCAAGAAC\n-CACATTGCTCCCAGTCCAGTTTATGATCCACCTTCAAATAGACGAGGCCCAGTCTTTGCT\n-CCACCATTTCATGGGGCCACGTTTCGGGGACTGCAGAAACCAAGTGCTTCACACACATCA\n-AGCAAAACTGCTAAAACTTTTGTTCCTCCATTTAAAATGAAAGCCAGTGCCTCCCACACA\n-GTACATTTCAGTAGCAAAGTTATCAACACTTGTGAAAAGATTTTAGAAAATCTAGTCTAC\n-TTGAAGCCTAGTTTAGCCTCTTGTAATATTTTTCAAAGTTTGGAAGAGATGACTGCTAAC\n-CTTCAGTGTGCCAGAGACCTACAGGAAATGAGGCTGAGAAAGAAGCAAAGGCAAAATATC\n-CGGCCGCAGCCAGGCAGCCTTTACCTTGCTAAAACATCTGGTGTTGCTAGGGTCTCCCTG\n-AAGGCAGCTACAGGAAATCAGTGCCCATCCTCCTACTCCACAGAGCAGTTATATGTTCAT\n-GGTGTAGGAAAAAGCACCTTGAAAGTACGCAGTGAGAATGCAGAATCTTTTCAGTTCAGT\n-TGCAGTGATTATTTTGGTAAAGATGTTCTCCTAGCTGGAAATGGCTTGAAGCTGGCAGAT\n-GGAGGGTGGCTTATACCCAGTGATAAAGGAATGGTAGGAAAAGAAGAATTTTACAGGGCA\n-CTGTGCGATACACCTGGTGTGGCCCCAAAGCTTATTAGTGAATCTTGGGTCTACAATCAT\n-TACAGATGGATTGTATGGAAGCTTGCAGCGATGGAGGCAGCTTTTCCAAAAGAATTTGGC\n-AACCGGTGTCTAACACCAGAGAGGGTGCTGCTACAGCTTAAATACAGGTATGACATTGAA\n-GTTGACAAGTGTCGAAGATCTACTGTAAAGAAGATAATGGAAAGAGATGACACTGCCGCC\n-AAGACACTTGTGCTGTGCATTTCGAAACTTATATCCGTGGAGGACCGTTTTAAACAAACC\n-AAAAATAAGAATGAAAAGGGTGCTGAAGAAGCTAGGAAAGAAGCAGTAGCTGGAGTCATT\n-GAAACTACGGATGGGTGGTATGGGATTAAGGTACTCTTGGATCCTCCCCTTACAGTGCTG\n-GTACAGAGAGGAAGGCTGTCAGTTGGCTGTAAAATTATAACACATGGAGCTGAAATAATT\n-GGCTCCCAGGATGCCTGTACACCACTGGAAGCTCCGGAGTGTCTCATGCTAAAGATTTCT\n-GCGAACAGTACTCGACCTGCTTGCTGGAGTGCTAAACTTGGGTTTCATCGAGATCCCCGA\n-CCTTTTCCTCTCCCATTAGCATCGCTTTTTAATGATGGTGGATTAGTTGGCTGTGTTGAT\n-GTTGTTGTAGTGCGACTGTACCCCATACAGTGGATGGAAAAGAAATCAGATGGGATTTTT\n-GTGTTTCGTAATGATCGAGCAGAAGAAAGAGAGGCTCAAAGGCAAGTTGAGAATCAGCAA\n-AGAAAAATGGAAAGTTTGTTTGCAAAGATTCAAACTGAATTTGAACAGAAATATGAAGCC\n-AAAAGCAAAAGGAGAGGCCAAAAGGCACAGAAATTCAGCAAGCAGGAAATCCAGGCTCTT\n-CAAGATGGTGCAGAACTGAATGAAGCAATTGAGAATTCAATGGATCCGGGTTACTTTGAG\n-GCTTGCTTAAGAGAGGAGCAGTTAAAAGTCCTGCATGGCCACAGACAAATGTTAAATGAA\n-AAGAAACAAGCAGAGTTCCAGGCAGAGTTCAAGAAGGCACTGGAGTCTGCTGAACAGGAG\n-GGGAAGAGCTGCTGCAAGCGAGGTGTAACCCCTGTTTGGAAACTACGCATTGTGGACTAT\n-AGAAAACCAAGTGCTGCAGAATATATATTAAATATTTGGCGACCATTGGCTGATCTGCAT\n-TCCCTGTTAAAGGAAGGTAACCGGTACAGAATTTACCAGTTACTTGCATCACAATCCAAA\n-GGAAGAACCACTACTGCTGACATACAGCTAACAGCTACAAAGAAAACCCAGTATCAGCAG\n-TTCCAGTCTTTCCCAGAATTGATATCAGAGCTGTACAGTCCAAGAAAAGCTGTTAAATTC\n-AACATGTTGATGGATCCAACTTTCCGACCAGCCTATGCAGAAGTAGACCTGGTAGGATAT\n-ACCATCTCTATAGAAGGAAAGCCAGGTGTTGCTCCAGTGGTGTACCTATCTGATGAAAGT\n-CATAACTTTGTGGCAATAAAAGTTTGGACTGCCCTAAATCAGCTTGCTGTTGAAGACATT\n-GTGAAGCCATTTTCACTGATTGCTGCAAGTAACTTACAGTGGAGATCAGACAGCAGATCA\n-ATAATTCCTATGTTGTATGCTGGGGACCTTTCAATATTTTCATCAAATCCAAAAGAAGGG\n-CATCTTCAAGAGGCTTTTAATCAAAGAAGGACTGCTATACAAGAGAACATTTCTGGTACA\n-TACCTCCCCCCAGAAAAAAAAAATCTGCATCAGGAGTCTTATAAATCATGTCAATACAAC\n-ACATTGAATGTTTTGATGAATGGAAACATACACACACAGAGCCCAGTGCTGTCCAGGGTT\n-CATATGGGTACATCCTGTGCTTTTCTCTTTCTACTGCCTTCACCCTACCCTGAAAGTAAA\n-CACACTAGTCCTTTGATAACTATGAAGGCAGGAGTCAAATCTATGACTTTCCCAGGCTCT\n-GCAAAACTAATGCCACAAGCAAGTGAAAATCAAGAACTGGATACTCCCAAGAATCGTAAA\n-AAGAAGGCAGCTTTAGACTATCTTTGCCGCATTCCTTCCCCACCTGCACTTACTCCTATT\n-CGCAGTTTTGTGTCTTCCTCCTTGCAAAAGGCTTTTCACCCACCAAGGAGCTGCGTCAAA\n-CTACAAAGCGGCGAAAACCCAGTCGTCCCCACAGTTGGCAATAACGCTGTCCTAGGGATT\n-CAGTCAAAAAAAGATGAAGGGCCTGCTGCTTTTAATGAAGAGGATTCAGTAGCAGACGAG\n-GAACTAGCAATGATCAATACACAGGCATTCTTAGTCGGCTTAAGAAGGGACAAAAGACCA\n-AGTTTACTGGACAAAACTGCCAGTTTAAAGGGGCATGTTCCCTCAGAAAGATTTCTTGAA\n-GAGAAGCTCTTGTCAGTACTAAAAGAGCAGGCAAGCTCTAATTCTGAGAGAAACGCCACA\n-TCATTGGAAAATAAGAGCTGTGATAAAAGCAGGACATGTGTGAAACCATGTGAACATTCT\n-AATGACAGCATTGCAGAGGAAACTTCAGAAATCATCCCAGGCTGTCATGGTGGAGAATCT\n-GCTGTGGAAAACCAAAGTAAAAATTCCTCATTGTGCCACAAAAAACTGCAACAGAAGAAA\n-AGACGGAAGTATTACTAA\n->ENSXETT00000064180\n-ATGGCTGCACCGCAACTTGGAAAATCTGTCTTCTATGATCTGTTTAGCACGCATTGCTCT\n-CACTCAGATT'..b'GGCGGGAAGCTG\n-ATTATTCATGGTGCTCAGCTAGTCGGTTCACAGGATGCTTGTTCTCCTTTGGAGGCCCCT\n-GAGTCTATCATGCTAAAGATTTTTGCCAACAGCAGCAGGCGAGCACGATGGGATGCTAAA\n-CTGGGATTTTATAGGGACCCACGGCCATTCCTGCTCCCTGTCTCTTCTTTGTACAACAGT\n-GGGGGACCTGTAGGATGTGTGGATATTATTATATTAAGAAGCTATCCCACATTATGGATG\n-GAGAGAAAACCAGAAGGAGGCACTGTGTTCCGGTCAGGCCGAGCAGAAGAAAAGGAGGCT\n-AGACGGTACAACGTCCACAAGGAAAAAGCTATGGAGATTCTGTTTGACAAGATTCAAGCG\n-GAATTTGAAAAGGAAGAGAGGGATAACAGGAAACCTCGGAGCAGAAGACGGACAATCGGT\n-GATCAAGATATCAAAAGTCTTCAAGATGGAGAGGAGCTGTACGAAGCAGTGGGCGATGAC\n-CCAGCTTACCTTGAGGCACATTTGACTGAGCAGCAGGCAGAGACTCTACAGAACTACAAA\n-CGTCTGCTGATAGAAAAGAAGCAAGCAGAGCTGCAGGATCGCTACCGGCGAGCTGTAGAA\n-ACTGCAGAGGATGGCACAGGCAGCTGTCCCAAGCGAGATGTAGCACCTGTATGGAGACTC\n-AGCATTGCTGACTTCATGGAAAAGCCAGGCAGTGTTTACCAGCTGAACATTTGGCGGCCT\n-CCCTCAGAGCTCCAGTCTTTACTAAAAGAAGGCTGTCGATATAAGGTGTATAATCTCACC\n-ACAACAGATTCAAAGAAACAAGGTGGAAACACAACCGTTCAGCTAAGTGGAACAAAAAAA\n-ACACAATTTGAGGACCTTCAGGCATCCGAGGAATTGTTGTCAACATATTTTCAGCCAAGG\n-GTCTCGGCCACATTCATCGATCTCCAAGATCCAGAATTCCATTCGTTGTGTGGTGAGGTT\n-GATCTCACAGGATACGTCATCAGTATAATAGATGGACAAGGTTTCTCACCTGCTTTTTAC\n-CTAACTGATGGGAAACAAAATTTTGTAAAAGTGCGTTGTTTCAGCAGCTTCGCTCAGTCA\n-GGCTTGGAAGATGTAATAAAGCCAAGTGTCCTTTTAGCTTTAAGCAACCTCCAACTGAGA\n-GGTCAGGCAACATCACCCACTCCAGTCTTGTACGCTGGAGATCTAACCGTCTTCTCCACA\n-AACCCCAAAGAAGTTCATCTGCAGGAATCCTTCAGCCAGCTCAAAACCCTGGTTCAG\n->ENSTRUT00000015099\n-CAGCTGGCACGGGATATGCAGGATATGCGAATCAGAAAAAAGAAACGCCAGACCATTCGT\n-CCATTACCGGGAAGTTTGTTTCAGAAGAAGTCCTCTGGAGTCGCCAGGATTCCATTTAAA\n-GCTGCAGTAAACGGAAAGCCACCTGCACGCTACACTGCCAAACCGCTGTGTGGCCTCGGG\n-GTTCCTCTGAATGTGTTGGAGATCACCAGTGAGACTGCAGAATCTTTTCGCTTCAGCTTG\n-CAGCACTTTGTTAAGCTGGAGTCTCTCATAGATAAAGGTGGCATACAGCTCGCTGATGGA\n-GGATGGCTGATTCCCACGAATGACGGGACAGCGGGAAAAGAAGAGTTTTATCGAGCATTG\n-TGTGATACCCCGGGGGTTGATCCTAAACTAATGAGTGAGGAGTGGGTGTATAATCACTAC\n-CGATGGATTGTATGGAAACAAGCTTCCATGGAAAGGTCATTTCCAGAAGAGATGGGCAGC\n-CTCTGTCTCACCCCAGAGCAGGTTCTCCTACAACTTAAGTACAGATATGACATAGAGGTT\n-GACCACAGTCGCAGACCAGCTCTCAGAAAAATTATGGAAAAGGATGACACGGCAGCTAAA\n-ACCCTGGTCCTCTGTGTTTGTGGGGTTGTCTTCAGAGGCAGCTCCCCAAAAAACAAGAGT\n-TTTGGGGACATCAGTACTCCAGGAGCTGACCCAAAGGTTGAAAACCCCTGTGCTGTCGTT\n-TGGCTGACCGATGGATGGTATTCAATTAAAGCGCAACTGGATGGACCGTTGACCTCAATG\n-CTTCACAGAGGTCGACTACCAGTCGGCGGGAAGCTGATTATCCATGGTGCTCAGCTAGTC\n-GGATCAGAGAATGCTTGTTCCCCCCTGGAGGCCCCTGTGTCTTTAATGCTAAAGATTTGC\n-GCCAACAGCAGCAGACCAGCTCGATGGGATTCTAAACTAGGATTTCACAGGGACCCGCGG\n-CCATTCCTGCTTCCTGTCTCTTCTTTGTACAGCAGTGGAGGACCAGTAGGATGTGTGGAT\n-ATTATTATACTGAGAAGCTATCCCATATTGTGGATGGAGAGGAAACCAGAAGGAGGCACT\n-GTGTTCCGTTCAGGCAGAGCAGAAGAGAAGGAGGCGAGACGATACAACATTCACAAAGAA\n-AAAGCTATGGAAATCCTGTTTGACAAGATTAAAGCAGAATTTGAAAAGGAAGAAAAAGGT\n-AACAGGAAACCGCAGTGCAGAAGGACAATCAATGGTCAAAATATTACAAGTCTTCAAGAT\n-GGAGAGGAGCTGTACGAAGCAGTGGGCGATGACCCAGCTTTCCTTGAGGCGCATCTGACT\n-GAGAAGCAGGTGGAGGTTCTTCAGAACTACAAACGTCTGGTGATGGAGAAGCAGCAGGCA\n-GAGCTGCAGGATCGCTACCGGCGAGCTGTAGAAAGTGCAGAGGACGGCGTGGGGGGCTGC\n-CCCAAGCGAGATGTCGCACCTGTGTGGAGACTGTGCATTGCTGACTCCATGGGCCATTCT\n-GGCCGTGTTTACCAGCTGAGTCTTTGGCGGCCCCCCTCAGAGCTCCAGGCATTACTGAAG\n-GAAGGCTGTCGTTATAAAGTGTATAATCTCACCACTTTAGATTCAAAGAAACAGGGTGGA\n-AATGCAACGGTTCAGCTAACTGCAACAAAAAAAACACAGTTTGAGCACCTACAGGGATCT\n-GAGGAGTGGTTATCAAAACATTTTCAGCCGAGGGTTGCAACCAATTTTGTGAGACTCCAA\n-GATCCAGAATTCAACCCATTGTGTAGCGAGGTTGATCTCACAGGATATGTCATTACTATA\n-ATAGATGGGCAAGGTTTCTCTCCTGCATTTTACCTGGCTGATGGGAAACAGAATTTTGTA\n-AAAGTTCGGTGTTTCAGCAGCTTCGCCCAATCTGGCTTGGAAGATGTAATAAAGCCACGT\n-GTCCTTTTGGCCCTAAGCAACCTGCAGCTGAGGGGTCAGTCGACATCACCTACTCCAGTC\n-GTGTATGCTGGAGATTTAACCGTCTTCTCCACAAACCCCAAAGAGGTTCATCTGCAGGAA\n-TCCTTCAGCCAGCTCAAAACTCTGGTTCAGGGCCAGGAGAACTTTTTTGTGCACGCTGAA\n-GAGAAGCTTTCTCAGTTGATGTCTGATGGCCTGAGCGCTATCGCTTCTCCAGCTGGGCAA\n-ATACAAACCCCAGCTTCCACAGTAAAGAGAAGAGGAGACATGACGGATGTGAGCTCAAAT\n-ATAATGGTTATTAACAAGACTTCTAAGGTCACATGTCAGCAGCCAGGCAGAAGCCACAGA\n-TTCTCAACGCCTATAAACAGGAACTCTACTGCTCACAGTTCAGCAGAGAGAAACCCAAGC\n-ACTATTAAGAAGAGGAAAGCTCTCGACTATCTGTCCCACATCCCGTCTCCACCGCCTCTG\n-TCCTGTCTGAGTACACTATCTTCTCCCAGCGTAAAAAAGATATTTATTCCGCCTCGCCGA\n-ACTGAAATACCTGGTACTTTAAAAACTGTAAAGACTCCAAATCAAAAACCTTCCAATACA\n-CCTGTGGATGATCAGTGGGTGAATGATGAGGAACTGGCTATGATCGACACTCAGGCATTA\n' |
b |
diff -r dd268de3a107 -r 66170848da6c test-data/test.json --- a/test-data/test.json Fri Mar 03 07:22:53 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,31795 +0,0 @@\n-{\n- "ENSTNIG00000016261": {\n- "source": "ensembl",\n- "object_type": "Gene",\n- "logic_name": "ensembl",\n- "version": 1,\n- "species": "tetraodon_nigroviridis",\n- "description": "breast cancer 2, early onset [Source:ZFIN;Acc:ZDB-GENE-060510-3]",\n- "display_name": "brca2",\n- "assembly_name": "TETRAODON8",\n- "biotype": "protein_coding",\n- "end": 4705074,\n- "seq_region_name": "16",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSTNIG00000016261",\n- "Transcript": [\n- {\n- "source": "ensembl",\n- "object_type": "Transcript",\n- "logic_name": "ensembl",\n- "Exon": [\n- {\n- "object_type": "Exon",\n- "version": 1,\n- "species": "tetraodon_nigroviridis",\n- "assembly_name": "TETRAODON8",\n- "end": 4700679,\n- "seq_region_name": "16",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSTNIE00000057385",\n- "start": 4700614\n- },\n- {\n- "object_type": "Exon",\n- "version": 1,\n- "species": "tetraodon_nigroviridis",\n- "assembly_name": "TETRAODON8",\n- "end": 4701157,\n- "seq_region_name": "16",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSTNIE00000041338",\n- "start": 4701103\n- },\n- {\n- "object_type": "Exon",\n- "version": 1,\n- "species": "tetraodon_nigroviridis",\n- "assembly_name": "TETRAODON8",\n- "end": 4701424,\n- "seq_region_name": "16",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSTNIE00000031348",\n- "start": 4701218\n- },\n- {\n- "object_type": "Exon",\n- "version": 1,\n- "species": "tetraodon_nigroviridis",\n- "assembly_name": "TETRAODON8",\n- "end": 4701571,\n- "seq_region_name": "16",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSTNIE00000063263",\n- "start": 4701502\n- },\n- {\n- "object_type": "Exon",\n- "version": 1,\n- "species": "tetraodon_nigroviridis",\n- "assembly_name": "TETRAODON8",\n- "end": 4701608,\n- "seq_region_name": "16",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSTNIE00000054769",\n- "start": 4701587\n- },\n- {\n- "object_type": "Exon",\n- "version": 1,\n- "species": "tetraodon_nigroviridis",\n- "assembly_name": "TETRAODON8",\n- "end": 4701940,\n- "seq_region_name": "16",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSTNIE00000041082",\n- "start": 4701626\n- },\n- {\n- "object_type": "Exon",\n- "version": 1,\n- '..b' "assembly_name": "cavPor3",\n- "end": 33841095,\n- "seq_region_name": "scaffold_6",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSCPOE00000227172",\n- "start": 33841075\n- },\n- {\n- "object_type": "Exon",\n- "version": 1,\n- "species": "cavia_porcellus",\n- "assembly_name": "cavPor3",\n- "end": 33841317,\n- "seq_region_name": "scaffold_6",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSCPOE00000067342",\n- "start": 33841179\n- },\n- {\n- "object_type": "Exon",\n- "version": 1,\n- "species": "cavia_porcellus",\n- "assembly_name": "cavPor3",\n- "end": 33850200,\n- "seq_region_name": "scaffold_6",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSCPOE00000067343",\n- "start": 33849956\n- },\n- {\n- "object_type": "Exon",\n- "version": 1,\n- "species": "cavia_porcellus",\n- "assembly_name": "cavPor3",\n- "end": 33851341,\n- "seq_region_name": "scaffold_6",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSCPOE00000067344",\n- "start": 33851195\n- },\n- {\n- "object_type": "Exon",\n- "version": 2,\n- "species": "cavia_porcellus",\n- "assembly_name": "cavPor3",\n- "end": 33852801,\n- "seq_region_name": "scaffold_6",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSCPOE00000067345",\n- "start": 33852516\n- },\n- {\n- "object_type": "Exon",\n- "version": 2,\n- "species": "cavia_porcellus",\n- "assembly_name": "cavPor3",\n- "end": 33853154,\n- "seq_region_name": "scaffold_6",\n- "db_type": "core",\n- "strand": 1,\n- "id": "ENSCPOE00000067347",\n- "start": 33852814\n- }\n- ],\n- "Parent": "ENSCPOG00000005153",\n- "seq_region_name": "scaffold_6",\n- "db_type": "core",\n- "is_canonical": 1,\n- "strand": 1,\n- "id": "ENSCPOT00000005208",\n- "version": 2,\n- "species": "cavia_porcellus",\n- "assembly_name": "cavPor3",\n- "display_name": "BRCA2-201",\n- "end": 33853154,\n- "biotype": "protein_coding",\n- "Translation": {\n- "object_type": "Translation",\n- "species": "cavia_porcellus",\n- "Parent": "ENSCPOT00000005208",\n- "end": 33853154,\n- "length": 3313,\n- "db_type": "core",\n- "id": "ENSCPOP00000004635",\n- "start": 33778275\n- },\n- "start": 33778275\n- }\n- ],\n- "start": 33778275\n- }\n-}\n\\ No newline at end of file\n' |