# HG changeset patch # User iuc # Date 1464816340 14400 # Node ID 3f1a0d47ea8daeda6f9a40235b8feff27d9a22a2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1 diff -r 000000000000 -r 3f1a0d47ea8d kraken_databases.loc.sample diff -r 000000000000 -r 3f1a0d47ea8d kraken_taxonomy_report.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kraken_taxonomy_report.py Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,278 @@ +#!/usr/bin/env python + +# Reports a summary of Kraken's results +# and optionally creates a newick Tree +# Copyright (c) 2016 Daniel Blankenberg +# Licensed under the Academic Free License version 3.0 +# https://github.com/blankenberg/Kraken-Taxonomy-Report + +import sys +import os +import optparse +import re + +__VERSION__ = '0.0.1' + +__URL__ = "https://github.com/blankenberg/Kraken-Taxonomy-Report" + +# Rank names were pulled from ncbi nodes.dmp on 02/02/2016 +# cat nodes.dmp | cut -f 5 | sort | uniq +# "root" is added manually +NO_RANK_NAME = "no rank" +RANK_NAMES = [ NO_RANK_NAME, + "root", + "superkingdom", + "kingdom", + "subkingdom", + "superphylum", + "phylum", + "subphylum", + "superclass", + "class", + "subclass", + "infraclass", + "superorder", + "order", + "suborder", + "infraorder", + "parvorder", + "superfamily", + "family", + "subfamily", + "tribe", + "subtribe", + "genus", + "subgenus", + "species group", + "species subgroup", + "species", + "subspecies", + "varietas", + "forma" ] +# NB: We put 'no rank' at top of list for generating trees, due to e.g. +# root (root) -> cellular organisms (no rank) -> bacteria (superkingdom) + +RANK_NAME_TO_INTS = dict( [ (y, x) for (x, y) in enumerate( RANK_NAMES ) ] ) +RANK_NAMES_INTS = range( len( RANK_NAMES ) ) + +NO_RANK_INT = RANK_NAMES.index( NO_RANK_NAME ) +NO_RANK_CODE = 'n' + +PRIMARY_RANK_NAMES = [ 'species', 'genus', 'family', 'order', 'class', 'phylum', 'kingdom' ] +RANK_INT_TO_CODE = {} +for name in PRIMARY_RANK_NAMES: + RANK_INT_TO_CODE[ RANK_NAMES.index( name ) ] = name[0] +RANK_INT_TO_CODE[ RANK_NAMES.index( 'superkingdom' ) ] = 'd' +PRIMARY_RANK_NAMES.append( 'superkingdom' ) + +NAME_STUB = "%s__%s" +NAME_RE = re.compile( "(\t| |\||\.;)" ) +NAME_REPL = "_" + + +def get_kraken_db_path( db ): + assert db, ValueError( "You must provide a kraken database" ) + k_db_path = os.getenv('KRAKEN_DB_PATH', None ) + if k_db_path: + db = os.path.join( k_db_path, db ) + return db + + +def load_taxonomy( db_path, sanitize_names=False ): + child_lists = {} + name_map = {} + rank_map = {} + with open( os.path.join( db_path, "taxonomy/names.dmp" ) ) as fh: + for line in fh: + line = line.rstrip( "\n\r" ) + if line.endswith( "\t|" ): + line = line[:-2] + fields = line.split( "\t|\t" ) + node_id = fields[0] + name = fields[1] + if sanitize_names: + name = NAME_RE.sub( NAME_REPL, name ) + name_type = fields[3] + if name_type == "scientific name": + name_map[ node_id ] = name + + with open( os.path.join( db_path, "taxonomy/nodes.dmp" ) ) as fh: + for line in fh: + line = line.rstrip( "\n\r" ) + fields = line.split( "\t|\t" ) + node_id = fields[0] + parent_id = fields[1] + rank = RANK_NAME_TO_INTS.get( fields[2].lower(), None ) + if rank is None: + # This should never happen, unless new taxonomy ranks are created + print >> sys.stderr, 'Unrecognized rank: Node "%s" is "%s", setting to "%s"' % ( node_id, fields[2], NO_RANK_NAME ) + rank = NO_RANK_INT + if node_id == '1': + parent_id = '0' + if parent_id not in child_lists: + child_lists[ parent_id ] = [] + child_lists[ parent_id ].append( node_id ) + rank_map[node_id] = rank + return ( child_lists, name_map, rank_map ) + + +def dfs_summation( node, counts, child_lists ): + children = child_lists.get( node, None ) + if children: + for child in children: + dfs_summation( child, counts, child_lists ) + counts[ node ] = counts.get( node, 0 ) + counts.get( child, 0 ) + + +def dfs_report( node, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None ): + if not options.summation and ( not options.show_zeros and node not in hit_taxa ): + return + rank_int = rank_map[node] + code = RANK_INT_TO_CODE.get( rank_int, NO_RANK_CODE ) + if ( code != NO_RANK_CODE or options.intermediate ) and ( options.show_zeros or node in hit_taxa): + if name is None: + name = "" + else: + name = "%s|" % name + if tax is None: + tax = '' + else: + tax = "%s;" % tax + sanitized_name = name_map[ node ] + name_stub = NAME_STUB % ( code, sanitized_name ) + name = name + name_stub + tax = tax + name_stub + if options.name_id: + output = node + elif options.name_long: + output = name + else: + output = sanitized_name + for val in file_data: + output = "%s\t%i" % ( output, val.get( node, 0 ) ) + if options.show_rank: + output = "%s\t%s" % ( output, RANK_NAMES[ rank_int ] ) + if options.taxonomy: + output = "%s\t%s" % ( output, tax ) + output_lines[ rank_int ].append( output ) + children = child_lists.get( node ) + if children: + for child in children: + dfs_report( child, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=name, tax=tax ) + + +def write_tree( child_lists, name_map, rank_map, options, branch_length=1 ): + # Uses Biopython, only load if making tree + import Bio.Phylo + from Bio.Phylo import BaseTree + + def _get_name( node_id ): + if options.name_id: + return node_id + return name_map[node_id] + nodes = {} + root_node_id = child_lists["0"][0] + nodes[root_node_id] = BaseTree.Clade( name=_get_name( root_node_id), branch_length=branch_length ) + + def recurse_children( parent_id ): + if options.cluster is not None and rank_map[parent_id] == options.cluster: + # Short circuit if we found our rank, prevents 'hanging' no ranks from being output + # e.g. clustering by "species" (Escherichia coli), but have "no rank" below (Escherichia coli K-12) in test_db + return + if parent_id not in nodes: + nodes[parent_id] = BaseTree.Clade( name=_get_name( parent_id ), branch_length=branch_length ) + for child_id in child_lists.get( parent_id, [] ): + if options.cluster is None or ( rank_map[child_id] <= options.cluster ): + if child_id not in nodes: + nodes[child_id] = BaseTree.Clade(name=_get_name( child_id ), branch_length=branch_length) + nodes[parent_id].clades.append(nodes[child_id]) + recurse_children( child_id ) + recurse_children( root_node_id ) + tree = BaseTree.Tree(root=nodes[root_node_id]) + Bio.Phylo.write( [tree], options.output_tree, 'newick' ) + + +def __main__(): + parser = optparse.OptionParser( usage="%prog [options] file1 file...fileN" ) + parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='print version and exit' ) + parser.add_option( '', '--show-zeros', dest='show_zeros', action='store_true', default=False, help='Show empty nodes' ) + parser.add_option( '', '--header-line', dest='header_line', action='store_true', default=False, help='Provide a header on output' ) + parser.add_option( '', '--intermediate', dest='intermediate', action='store_true', default=False, help='Intermediate Ranks' ) + parser.add_option( '', '--name-id', dest='name_id', action='store_true', default=False, help='Use Taxa ID instead of Name' ) + parser.add_option( '', '--name-long', dest='name_long', action='store_true', default=False, help='Use Long taxa ID instead of base name' ) + parser.add_option( '', '--taxonomy', dest='taxonomy', action='store_true', default=False, help='Output taxonomy in last column' ) + parser.add_option( '', '--cluster', dest='cluster', action='store', type="string", default=None, help='Cluster counts to specified rank' ) + parser.add_option( '', '--summation', dest='summation', action='store_true', default=False, help='Add summation of child counts to each taxa' ) + parser.add_option( '', '--sanitize-names', dest='sanitize_names', action='store_true', default=False, help='Replace special chars (\t| |\||\.;) with underscore (_)' ) + parser.add_option( '', '--show-rank', dest='show_rank', action='store_true', default=False, help='Output column with Rank name' ) + parser.add_option( '', '--db', dest='db', action='store', type="string", default=None, help='Name of Kraken database' ) + parser.add_option( '', '--output', dest='output', action='store', type="string", default=None, help='Name of output file' ) + parser.add_option( '', '--output-tree', dest='output_tree', action='store', type="string", default=None, help='Name of output file to place newick tree' ) + (options, args) = parser.parse_args() + if options.version: + print >> sys.stderr, "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ ) + sys.exit() + if not args: + print >> sys.stderr, parser.get_usage() + sys.exit() + + if options.cluster: + cluster_name = options.cluster.lower() + cluster = RANK_NAME_TO_INTS.get( cluster_name, None ) + assert cluster is not None, ValueError( '"%s" is not a valid rank for clustering.' % options.cluster ) + if cluster_name not in PRIMARY_RANK_NAMES: + assert options.intermediate, ValueError( 'You cannot cluster by "%s", unless you enable intermediate ranks.' % options.cluster ) + ranks_to_report = [ cluster ] + options.cluster = cluster + # When clustering we need to do summatation + options.summation = True + else: + options.cluster = None # make empty string into None + ranks_to_report = RANK_NAMES_INTS + + if options.output: + output_fh = open( options.output, 'wb+' ) + else: + output_fh = sys.stdout + + db_path = get_kraken_db_path( options.db ) + ( child_lists, name_map, rank_map ) = load_taxonomy( db_path, sanitize_names=options.sanitize_names ) + file_data = [] + hit_taxa = [] + for input_filename in args: + taxo_counts = {} + with open( input_filename ) as fh: + for line in fh: + fields = line.split( "\t" ) + taxo_counts[ fields[2] ] = taxo_counts.get( fields[2], 0 ) + 1 + clade_counts = taxo_counts.copy() # fixme remove copying? + if options.summation: + dfs_summation( '1', clade_counts, child_lists ) + for key, value in clade_counts.items(): + if value and key not in hit_taxa: + hit_taxa.append( key ) + file_data.append( clade_counts ) + + if options.header_line: + output_fh.write( "#ID\t" ) + output_fh.write( "\t".join( args ) ) + if options.show_rank: + output_fh.write( "\trank" ) + if options.taxonomy: + output_fh.write( "\ttaxonomy" ) + output_fh.write( '\n' ) + + output_lines = dict( [ ( x, [] ) for x in RANK_NAMES_INTS ] ) + dfs_report( '1', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None ) + + for rank_int in ranks_to_report: + for line in output_lines.get( rank_int, [] ): + output_fh.write( line ) + output_fh.write( '\n' ) + fh.close() + if options.output_tree: + write_tree( child_lists, name_map, rank_map, options ) + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 3f1a0d47ea8d kraken_taxonomy_report.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kraken_taxonomy_report.xml Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,242 @@ + + + view report of classification for multiple samples + + biopython + + + + + + python ${__tool_directory__}/kraken_taxonomy_report.py --version + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + tree + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @unpublished{Kraken-Taxonomy-Report:2016, + title = "Kraken Taxonomy Report", + author = "Daniel Blankenberg", + url = "https://github.com/blankenberg/Kraken-Taxonomy-Report", + year = "2016 (accessed June 1, 2016)" + } + + diff -r 000000000000 -r 3f1a0d47ea8d test-data/input_kraken_1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_kraken_1.tabular Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,4 @@ +C gi|145231|gb|M33724.1|ECOALPHOA 83333 171 83333:162 +C gi|145232|gb|M33725.1|ECOALPHOB 83333 183 83333:174 +C gi|145234|gb|M33727.1|ECOALPHOE 562 97 562:88 +C gi|146195|gb|J01619.1|ECOGLTA 83333 3850 83333:3841 diff -r 000000000000 -r 3f1a0d47ea8d test-data/input_kraken_2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_kraken_2.tabular Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,5 @@ +C gi|145231|gb|M33724.1|ECOALPHOA 83333 171 83333:162 +C gi|145232|gb|M33725.1|ECOALPHOB 83333 183 83333:174 +C gi|145234|gb|M33727.1|ECOALPHOE 562 97 562:88 +C gi|146195|gb|J01619.1|ECOGLTA 83333 3850 83333:3841 +C gi|145234|gb|M33727.1|ECOALPHOE2 562 97 562:88 diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_abundance_1.tabular Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,11 @@ +#ID input_kraken_1.tabular rank taxonomy +root 4 no rank n__root +cellular_organisms 4 no rank n__root;n__cellular_organisms +Escherichia_coli_K-12 3 no rank n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli;n__Escherichia_coli_K-12 +Bacteria 4 superkingdom n__root;n__cellular_organisms;d__Bacteria +Proteobacteria 4 phylum n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria +Gammaproteobacteria 4 class n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria +Enterobacteriales 4 order n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales +Enterobacteriaceae 4 family n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae +Escherichia 4 genus n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia +Escherichia_coli 4 species n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_abundance_2.tabular Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,11 @@ +#ID input_kraken_1.tabular rank taxonomy +root 0 no rank n__root +cellular_organisms 0 no rank n__root;n__cellular_organisms +Escherichia_coli_K-12 3 no rank n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli;n__Escherichia_coli_K-12 +Bacteria 0 superkingdom n__root;n__cellular_organisms;d__Bacteria +Proteobacteria 0 phylum n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria +Gammaproteobacteria 0 class n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria +Enterobacteriales 0 order n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales +Enterobacteriaceae 0 family n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae +Escherichia 0 genus n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia +Escherichia_coli 1 species n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_3.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_abundance_3.tabular Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,2 @@ +#ID input_kraken_1.tabular rank taxonomy +Escherichia_coli 4 species d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_4.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_abundance_4.tabular Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,2 @@ +#ID input_kraken_1.tabular input_kraken_2.tabular rank taxonomy +Escherichia_coli 4 5 species d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_tree_1.newick --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_tree_1.newick Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,1 @@ +(((((((((Escherichia_coli_K-12:1.00000)Escherichia_coli:1.00000)Escherichia:1.00000)Enterobacteriaceae:1.00000)Enterobacteriales:1.00000)Gammaproteobacteria:1.00000)Proteobacteria:1.00000)Bacteria:1.00000)cellular_organisms:1.00000)root:1.00000; diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_tree_3.newick --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_tree_3.newick Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,1 @@ +((((((((Escherichia_coli:1.00000)Escherichia:1.00000)Enterobacteriaceae:1.00000)Enterobacteriales:1.00000)Gammaproteobacteria:1.00000)Proteobacteria:1.00000)Bacteria:1.00000)cellular_organisms:1.00000)root:1.00000; diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_database.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_database.loc Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,1 @@ +test_db test_db ${__HERE__} \ No newline at end of file diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/database.idx Binary file test-data/test_db/database.idx has changed diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/database.kdb Binary file test-data/test_db/database.kdb has changed diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/taxonomy/names.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/taxonomy/names.dmp Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,74 @@ +83333 | Escherichia coli K-12 | | scientific name | +83333 | Escherichia coli K12 | | equivalent name | +562 | "Bacillus coli" Migula 1895 | | authority | +562 | "Bacterium coli commune" Escherich 1885 | | authority | +562 | "Bacterium coli" (Migula 1895) Lehmann and Neumann 1896 | | authority | +562 | ATCC 11775 | | type material | +562 | Bacillus coli | | synonym | +562 | Bacterium coli | | synonym | +562 | Bacterium coli commune | | synonym | +562 | CCUG 24 | | type material | +562 | CCUG 29300 | | type material | +562 | CIP 54.8 | | type material | +562 | DSM 30083 | | type material | +562 | Enterococcus coli | | synonym | +562 | Escherchia coli | | misspelling | +562 | Escherichia coli | | scientific name | +562 | Escherichia coli (Migula 1895) Castellani and Chalmers 1919 | | authority | +562 | Escherichia sp. MAR | | includes | +562 | Escherichia/Shigella coli | | equivalent name | +562 | Eschericia coli | | misspelling | +562 | JCM 1649 | | type material | +562 | LMG 2092 | | type material | +562 | NBRC 102203 | | type material | +562 | NCCB 54008 | | type material | +562 | NCTC 9001 | | type material | +562 | bacterium 10a | | includes | +562 | bacterium E3 | | includes | +561 | Escherchia | | misspelling | +561 | Escherichia | | scientific name | +561 | Escherichia Castellani and Chalmers 1919 | | authority | +543 | Enterobacteraceae | | synonym | +543 | Enterobacteraceae (ex Lapage 1979) Lapage 1982, fam. nov., nom. rev. | | synonym | +543 | Enterobacteriaceae | | scientific name | +543 | Enterobacteriaceae (ex Rahn 1937) Ewing et al. 1980, fam. nov., nom. rev. | | synonym | +543 | Enterobacteriaceae Rahn 1937 | | synonym | +543 | gamma-3 proteobacteria | gamma-3 proteobacteria <#1> | in-part | +91347 | 'Enterobacteriales' | | synonym | +91347 | Enterobacteriaceae and related endosymbionts | | synonym | +91347 | Enterobacteriaceae group | | synonym | +91347 | Enterobacteriales | | scientific name | +91347 | enterobacteria | enterobacteria | blast name | +91347 | gamma-3 proteobacteria | gamma-3 proteobacteria <#5> | in-part | +1236 | Gammaproteobacteria | | scientific name | +1236 | Gammaproteobacteria Garrity et al. 2005 | | synonym | +1236 | Proteobacteria gamma subdivision | | synonym | +1236 | Purple bacteria, gamma subdivision | | synonym | +1236 | g-proteobacteria | gamma proteos | blast name | +1236 | gamma proteobacteria | | synonym | +1236 | gamma subdivision | | synonym | +1236 | gamma subgroup | | synonym | +1224 | Proteobacteria | | scientific name | +1224 | Proteobacteria Garrity et al. 2005 | | authority | +1224 | Proteobacteria [class] Stackebrandt et al. 1988 | | authority | +1224 | not Proteobacteria Cavalier-Smith 2002 | | authority | +1224 | proteobacteria | proteobacteria | blast name | +1224 | purple bacteria | | common name | +1224 | purple bacteria and relatives | | common name | +1224 | purple non-sulfur bacteria | | common name | +1224 | purple photosynthetic bacteria | | common name | +1224 | purple photosynthetic bacteria and relatives | | common name | +2 | Bacteria | Bacteria | scientific name | +2 | Monera | Monera | in-part | +2 | Procaryotae | Procaryotae | in-part | +2 | Prokaryota | Prokaryota | in-part | +2 | Prokaryotae | Prokaryotae | in-part | +2 | bacteria | bacteria | blast name | +2 | eubacteria | | genbank common name | +2 | not Bacteria Haeckel 1894 | | synonym | +2 | prokaryote | prokaryote | in-part | +2 | prokaryotes | prokaryotes | in-part | +1 | all | | synonym | +1 | root | | scientific name | +131567 | biota | | synonym | +131567 | cellular organisms | | scientific name | diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/taxonomy/nodes.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/taxonomy/nodes.dmp Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,10 @@ +83333 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | +131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | +1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | diff -r 000000000000 -r 3f1a0d47ea8d tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,8 @@ + + + + + value, name, path + +
+
diff -r 000000000000 -r 3f1a0d47ea8d tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Jun 01 17:25:40 2016 -0400 @@ -0,0 +1,8 @@ + + + + + value, name, path + +
+