# HG changeset patch
# User iuc
# Date 1464816340 14400
# Node ID 3f1a0d47ea8daeda6f9a40235b8feff27d9a22a2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
diff -r 000000000000 -r 3f1a0d47ea8d kraken_databases.loc.sample
diff -r 000000000000 -r 3f1a0d47ea8d kraken_taxonomy_report.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kraken_taxonomy_report.py Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,278 @@
+#!/usr/bin/env python
+
+# Reports a summary of Kraken's results
+# and optionally creates a newick Tree
+# Copyright (c) 2016 Daniel Blankenberg
+# Licensed under the Academic Free License version 3.0
+# https://github.com/blankenberg/Kraken-Taxonomy-Report
+
+import sys
+import os
+import optparse
+import re
+
+__VERSION__ = '0.0.1'
+
+__URL__ = "https://github.com/blankenberg/Kraken-Taxonomy-Report"
+
+# Rank names were pulled from ncbi nodes.dmp on 02/02/2016
+# cat nodes.dmp | cut -f 5 | sort | uniq
+# "root" is added manually
+NO_RANK_NAME = "no rank"
+RANK_NAMES = [ NO_RANK_NAME,
+ "root",
+ "superkingdom",
+ "kingdom",
+ "subkingdom",
+ "superphylum",
+ "phylum",
+ "subphylum",
+ "superclass",
+ "class",
+ "subclass",
+ "infraclass",
+ "superorder",
+ "order",
+ "suborder",
+ "infraorder",
+ "parvorder",
+ "superfamily",
+ "family",
+ "subfamily",
+ "tribe",
+ "subtribe",
+ "genus",
+ "subgenus",
+ "species group",
+ "species subgroup",
+ "species",
+ "subspecies",
+ "varietas",
+ "forma" ]
+# NB: We put 'no rank' at top of list for generating trees, due to e.g.
+# root (root) -> cellular organisms (no rank) -> bacteria (superkingdom)
+
+RANK_NAME_TO_INTS = dict( [ (y, x) for (x, y) in enumerate( RANK_NAMES ) ] )
+RANK_NAMES_INTS = range( len( RANK_NAMES ) )
+
+NO_RANK_INT = RANK_NAMES.index( NO_RANK_NAME )
+NO_RANK_CODE = 'n'
+
+PRIMARY_RANK_NAMES = [ 'species', 'genus', 'family', 'order', 'class', 'phylum', 'kingdom' ]
+RANK_INT_TO_CODE = {}
+for name in PRIMARY_RANK_NAMES:
+ RANK_INT_TO_CODE[ RANK_NAMES.index( name ) ] = name[0]
+RANK_INT_TO_CODE[ RANK_NAMES.index( 'superkingdom' ) ] = 'd'
+PRIMARY_RANK_NAMES.append( 'superkingdom' )
+
+NAME_STUB = "%s__%s"
+NAME_RE = re.compile( "(\t| |\||\.;)" )
+NAME_REPL = "_"
+
+
+def get_kraken_db_path( db ):
+ assert db, ValueError( "You must provide a kraken database" )
+ k_db_path = os.getenv('KRAKEN_DB_PATH', None )
+ if k_db_path:
+ db = os.path.join( k_db_path, db )
+ return db
+
+
+def load_taxonomy( db_path, sanitize_names=False ):
+ child_lists = {}
+ name_map = {}
+ rank_map = {}
+ with open( os.path.join( db_path, "taxonomy/names.dmp" ) ) as fh:
+ for line in fh:
+ line = line.rstrip( "\n\r" )
+ if line.endswith( "\t|" ):
+ line = line[:-2]
+ fields = line.split( "\t|\t" )
+ node_id = fields[0]
+ name = fields[1]
+ if sanitize_names:
+ name = NAME_RE.sub( NAME_REPL, name )
+ name_type = fields[3]
+ if name_type == "scientific name":
+ name_map[ node_id ] = name
+
+ with open( os.path.join( db_path, "taxonomy/nodes.dmp" ) ) as fh:
+ for line in fh:
+ line = line.rstrip( "\n\r" )
+ fields = line.split( "\t|\t" )
+ node_id = fields[0]
+ parent_id = fields[1]
+ rank = RANK_NAME_TO_INTS.get( fields[2].lower(), None )
+ if rank is None:
+ # This should never happen, unless new taxonomy ranks are created
+ print >> sys.stderr, 'Unrecognized rank: Node "%s" is "%s", setting to "%s"' % ( node_id, fields[2], NO_RANK_NAME )
+ rank = NO_RANK_INT
+ if node_id == '1':
+ parent_id = '0'
+ if parent_id not in child_lists:
+ child_lists[ parent_id ] = []
+ child_lists[ parent_id ].append( node_id )
+ rank_map[node_id] = rank
+ return ( child_lists, name_map, rank_map )
+
+
+def dfs_summation( node, counts, child_lists ):
+ children = child_lists.get( node, None )
+ if children:
+ for child in children:
+ dfs_summation( child, counts, child_lists )
+ counts[ node ] = counts.get( node, 0 ) + counts.get( child, 0 )
+
+
+def dfs_report( node, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None ):
+ if not options.summation and ( not options.show_zeros and node not in hit_taxa ):
+ return
+ rank_int = rank_map[node]
+ code = RANK_INT_TO_CODE.get( rank_int, NO_RANK_CODE )
+ if ( code != NO_RANK_CODE or options.intermediate ) and ( options.show_zeros or node in hit_taxa):
+ if name is None:
+ name = ""
+ else:
+ name = "%s|" % name
+ if tax is None:
+ tax = ''
+ else:
+ tax = "%s;" % tax
+ sanitized_name = name_map[ node ]
+ name_stub = NAME_STUB % ( code, sanitized_name )
+ name = name + name_stub
+ tax = tax + name_stub
+ if options.name_id:
+ output = node
+ elif options.name_long:
+ output = name
+ else:
+ output = sanitized_name
+ for val in file_data:
+ output = "%s\t%i" % ( output, val.get( node, 0 ) )
+ if options.show_rank:
+ output = "%s\t%s" % ( output, RANK_NAMES[ rank_int ] )
+ if options.taxonomy:
+ output = "%s\t%s" % ( output, tax )
+ output_lines[ rank_int ].append( output )
+ children = child_lists.get( node )
+ if children:
+ for child in children:
+ dfs_report( child, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=name, tax=tax )
+
+
+def write_tree( child_lists, name_map, rank_map, options, branch_length=1 ):
+ # Uses Biopython, only load if making tree
+ import Bio.Phylo
+ from Bio.Phylo import BaseTree
+
+ def _get_name( node_id ):
+ if options.name_id:
+ return node_id
+ return name_map[node_id]
+ nodes = {}
+ root_node_id = child_lists["0"][0]
+ nodes[root_node_id] = BaseTree.Clade( name=_get_name( root_node_id), branch_length=branch_length )
+
+ def recurse_children( parent_id ):
+ if options.cluster is not None and rank_map[parent_id] == options.cluster:
+ # Short circuit if we found our rank, prevents 'hanging' no ranks from being output
+ # e.g. clustering by "species" (Escherichia coli), but have "no rank" below (Escherichia coli K-12) in test_db
+ return
+ if parent_id not in nodes:
+ nodes[parent_id] = BaseTree.Clade( name=_get_name( parent_id ), branch_length=branch_length )
+ for child_id in child_lists.get( parent_id, [] ):
+ if options.cluster is None or ( rank_map[child_id] <= options.cluster ):
+ if child_id not in nodes:
+ nodes[child_id] = BaseTree.Clade(name=_get_name( child_id ), branch_length=branch_length)
+ nodes[parent_id].clades.append(nodes[child_id])
+ recurse_children( child_id )
+ recurse_children( root_node_id )
+ tree = BaseTree.Tree(root=nodes[root_node_id])
+ Bio.Phylo.write( [tree], options.output_tree, 'newick' )
+
+
+def __main__():
+ parser = optparse.OptionParser( usage="%prog [options] file1 file...fileN" )
+ parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='print version and exit' )
+ parser.add_option( '', '--show-zeros', dest='show_zeros', action='store_true', default=False, help='Show empty nodes' )
+ parser.add_option( '', '--header-line', dest='header_line', action='store_true', default=False, help='Provide a header on output' )
+ parser.add_option( '', '--intermediate', dest='intermediate', action='store_true', default=False, help='Intermediate Ranks' )
+ parser.add_option( '', '--name-id', dest='name_id', action='store_true', default=False, help='Use Taxa ID instead of Name' )
+ parser.add_option( '', '--name-long', dest='name_long', action='store_true', default=False, help='Use Long taxa ID instead of base name' )
+ parser.add_option( '', '--taxonomy', dest='taxonomy', action='store_true', default=False, help='Output taxonomy in last column' )
+ parser.add_option( '', '--cluster', dest='cluster', action='store', type="string", default=None, help='Cluster counts to specified rank' )
+ parser.add_option( '', '--summation', dest='summation', action='store_true', default=False, help='Add summation of child counts to each taxa' )
+ parser.add_option( '', '--sanitize-names', dest='sanitize_names', action='store_true', default=False, help='Replace special chars (\t| |\||\.;) with underscore (_)' )
+ parser.add_option( '', '--show-rank', dest='show_rank', action='store_true', default=False, help='Output column with Rank name' )
+ parser.add_option( '', '--db', dest='db', action='store', type="string", default=None, help='Name of Kraken database' )
+ parser.add_option( '', '--output', dest='output', action='store', type="string", default=None, help='Name of output file' )
+ parser.add_option( '', '--output-tree', dest='output_tree', action='store', type="string", default=None, help='Name of output file to place newick tree' )
+ (options, args) = parser.parse_args()
+ if options.version:
+ print >> sys.stderr, "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ )
+ sys.exit()
+ if not args:
+ print >> sys.stderr, parser.get_usage()
+ sys.exit()
+
+ if options.cluster:
+ cluster_name = options.cluster.lower()
+ cluster = RANK_NAME_TO_INTS.get( cluster_name, None )
+ assert cluster is not None, ValueError( '"%s" is not a valid rank for clustering.' % options.cluster )
+ if cluster_name not in PRIMARY_RANK_NAMES:
+ assert options.intermediate, ValueError( 'You cannot cluster by "%s", unless you enable intermediate ranks.' % options.cluster )
+ ranks_to_report = [ cluster ]
+ options.cluster = cluster
+ # When clustering we need to do summatation
+ options.summation = True
+ else:
+ options.cluster = None # make empty string into None
+ ranks_to_report = RANK_NAMES_INTS
+
+ if options.output:
+ output_fh = open( options.output, 'wb+' )
+ else:
+ output_fh = sys.stdout
+
+ db_path = get_kraken_db_path( options.db )
+ ( child_lists, name_map, rank_map ) = load_taxonomy( db_path, sanitize_names=options.sanitize_names )
+ file_data = []
+ hit_taxa = []
+ for input_filename in args:
+ taxo_counts = {}
+ with open( input_filename ) as fh:
+ for line in fh:
+ fields = line.split( "\t" )
+ taxo_counts[ fields[2] ] = taxo_counts.get( fields[2], 0 ) + 1
+ clade_counts = taxo_counts.copy() # fixme remove copying?
+ if options.summation:
+ dfs_summation( '1', clade_counts, child_lists )
+ for key, value in clade_counts.items():
+ if value and key not in hit_taxa:
+ hit_taxa.append( key )
+ file_data.append( clade_counts )
+
+ if options.header_line:
+ output_fh.write( "#ID\t" )
+ output_fh.write( "\t".join( args ) )
+ if options.show_rank:
+ output_fh.write( "\trank" )
+ if options.taxonomy:
+ output_fh.write( "\ttaxonomy" )
+ output_fh.write( '\n' )
+
+ output_lines = dict( [ ( x, [] ) for x in RANK_NAMES_INTS ] )
+ dfs_report( '1', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None )
+
+ for rank_int in ranks_to_report:
+ for line in output_lines.get( rank_int, [] ):
+ output_fh.write( line )
+ output_fh.write( '\n' )
+ fh.close()
+ if options.output_tree:
+ write_tree( child_lists, name_map, rank_map, options )
+
+
+if __name__ == "__main__":
+ __main__()
diff -r 000000000000 -r 3f1a0d47ea8d kraken_taxonomy_report.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kraken_taxonomy_report.xml Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,242 @@
+
+
+ view report of classification for multiple samples
+
+ biopython
+
+
+
+
+
+ python ${__tool_directory__}/kraken_taxonomy_report.py --version
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ tree
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @unpublished{Kraken-Taxonomy-Report:2016,
+ title = "Kraken Taxonomy Report",
+ author = "Daniel Blankenberg",
+ url = "https://github.com/blankenberg/Kraken-Taxonomy-Report",
+ year = "2016 (accessed June 1, 2016)"
+ }
+
+
diff -r 000000000000 -r 3f1a0d47ea8d test-data/input_kraken_1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_kraken_1.tabular Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,4 @@
+C gi|145231|gb|M33724.1|ECOALPHOA 83333 171 83333:162
+C gi|145232|gb|M33725.1|ECOALPHOB 83333 183 83333:174
+C gi|145234|gb|M33727.1|ECOALPHOE 562 97 562:88
+C gi|146195|gb|J01619.1|ECOGLTA 83333 3850 83333:3841
diff -r 000000000000 -r 3f1a0d47ea8d test-data/input_kraken_2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_kraken_2.tabular Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,5 @@
+C gi|145231|gb|M33724.1|ECOALPHOA 83333 171 83333:162
+C gi|145232|gb|M33725.1|ECOALPHOB 83333 183 83333:174
+C gi|145234|gb|M33727.1|ECOALPHOE 562 97 562:88
+C gi|146195|gb|J01619.1|ECOGLTA 83333 3850 83333:3841
+C gi|145234|gb|M33727.1|ECOALPHOE2 562 97 562:88
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_abundance_1.tabular Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,11 @@
+#ID input_kraken_1.tabular rank taxonomy
+root 4 no rank n__root
+cellular_organisms 4 no rank n__root;n__cellular_organisms
+Escherichia_coli_K-12 3 no rank n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli;n__Escherichia_coli_K-12
+Bacteria 4 superkingdom n__root;n__cellular_organisms;d__Bacteria
+Proteobacteria 4 phylum n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria
+Gammaproteobacteria 4 class n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria
+Enterobacteriales 4 order n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales
+Enterobacteriaceae 4 family n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae
+Escherichia 4 genus n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia
+Escherichia_coli 4 species n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_abundance_2.tabular Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,11 @@
+#ID input_kraken_1.tabular rank taxonomy
+root 0 no rank n__root
+cellular_organisms 0 no rank n__root;n__cellular_organisms
+Escherichia_coli_K-12 3 no rank n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli;n__Escherichia_coli_K-12
+Bacteria 0 superkingdom n__root;n__cellular_organisms;d__Bacteria
+Proteobacteria 0 phylum n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria
+Gammaproteobacteria 0 class n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria
+Enterobacteriales 0 order n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales
+Enterobacteriaceae 0 family n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae
+Escherichia 0 genus n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia
+Escherichia_coli 1 species n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_3.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_abundance_3.tabular Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,2 @@
+#ID input_kraken_1.tabular rank taxonomy
+Escherichia_coli 4 species d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_4.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_abundance_4.tabular Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,2 @@
+#ID input_kraken_1.tabular input_kraken_2.tabular rank taxonomy
+Escherichia_coli 4 5 species d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_tree_1.newick
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_tree_1.newick Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,1 @@
+(((((((((Escherichia_coli_K-12:1.00000)Escherichia_coli:1.00000)Escherichia:1.00000)Enterobacteriaceae:1.00000)Enterobacteriales:1.00000)Gammaproteobacteria:1.00000)Proteobacteria:1.00000)Bacteria:1.00000)cellular_organisms:1.00000)root:1.00000;
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_tree_3.newick
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_tree_3.newick Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,1 @@
+((((((((Escherichia_coli:1.00000)Escherichia:1.00000)Enterobacteriaceae:1.00000)Enterobacteriales:1.00000)Gammaproteobacteria:1.00000)Proteobacteria:1.00000)Bacteria:1.00000)cellular_organisms:1.00000)root:1.00000;
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_database.loc Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,1 @@
+test_db test_db ${__HERE__}
\ No newline at end of file
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/database.idx
Binary file test-data/test_db/database.idx has changed
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/database.kdb
Binary file test-data/test_db/database.kdb has changed
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/taxonomy/names.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/names.dmp Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,74 @@
+83333 | Escherichia coli K-12 | | scientific name |
+83333 | Escherichia coli K12 | | equivalent name |
+562 | "Bacillus coli" Migula 1895 | | authority |
+562 | "Bacterium coli commune" Escherich 1885 | | authority |
+562 | "Bacterium coli" (Migula 1895) Lehmann and Neumann 1896 | | authority |
+562 | ATCC 11775 | | type material |
+562 | Bacillus coli | | synonym |
+562 | Bacterium coli | | synonym |
+562 | Bacterium coli commune | | synonym |
+562 | CCUG 24 | | type material |
+562 | CCUG 29300 | | type material |
+562 | CIP 54.8 | | type material |
+562 | DSM 30083 | | type material |
+562 | Enterococcus coli | | synonym |
+562 | Escherchia coli | | misspelling |
+562 | Escherichia coli | | scientific name |
+562 | Escherichia coli (Migula 1895) Castellani and Chalmers 1919 | | authority |
+562 | Escherichia sp. MAR | | includes |
+562 | Escherichia/Shigella coli | | equivalent name |
+562 | Eschericia coli | | misspelling |
+562 | JCM 1649 | | type material |
+562 | LMG 2092 | | type material |
+562 | NBRC 102203 | | type material |
+562 | NCCB 54008 | | type material |
+562 | NCTC 9001 | | type material |
+562 | bacterium 10a | | includes |
+562 | bacterium E3 | | includes |
+561 | Escherchia | | misspelling |
+561 | Escherichia | | scientific name |
+561 | Escherichia Castellani and Chalmers 1919 | | authority |
+543 | Enterobacteraceae | | synonym |
+543 | Enterobacteraceae (ex Lapage 1979) Lapage 1982, fam. nov., nom. rev. | | synonym |
+543 | Enterobacteriaceae | | scientific name |
+543 | Enterobacteriaceae (ex Rahn 1937) Ewing et al. 1980, fam. nov., nom. rev. | | synonym |
+543 | Enterobacteriaceae Rahn 1937 | | synonym |
+543 | gamma-3 proteobacteria | gamma-3 proteobacteria <#1> | in-part |
+91347 | 'Enterobacteriales' | | synonym |
+91347 | Enterobacteriaceae and related endosymbionts | | synonym |
+91347 | Enterobacteriaceae group | | synonym |
+91347 | Enterobacteriales | | scientific name |
+91347 | enterobacteria | enterobacteria | blast name |
+91347 | gamma-3 proteobacteria | gamma-3 proteobacteria <#5> | in-part |
+1236 | Gammaproteobacteria | | scientific name |
+1236 | Gammaproteobacteria Garrity et al. 2005 | | synonym |
+1236 | Proteobacteria gamma subdivision | | synonym |
+1236 | Purple bacteria, gamma subdivision | | synonym |
+1236 | g-proteobacteria | gamma proteos | blast name |
+1236 | gamma proteobacteria | | synonym |
+1236 | gamma subdivision | | synonym |
+1236 | gamma subgroup | | synonym |
+1224 | Proteobacteria | | scientific name |
+1224 | Proteobacteria Garrity et al. 2005 | | authority |
+1224 | Proteobacteria [class] Stackebrandt et al. 1988 | | authority |
+1224 | not Proteobacteria Cavalier-Smith 2002 | | authority |
+1224 | proteobacteria | proteobacteria | blast name |
+1224 | purple bacteria | | common name |
+1224 | purple bacteria and relatives | | common name |
+1224 | purple non-sulfur bacteria | | common name |
+1224 | purple photosynthetic bacteria | | common name |
+1224 | purple photosynthetic bacteria and relatives | | common name |
+2 | Bacteria | Bacteria | scientific name |
+2 | Monera | Monera | in-part |
+2 | Procaryotae | Procaryotae | in-part |
+2 | Prokaryota | Prokaryota | in-part |
+2 | Prokaryotae | Prokaryotae | in-part |
+2 | bacteria | bacteria | blast name |
+2 | eubacteria | | genbank common name |
+2 | not Bacteria Haeckel 1894 | | synonym |
+2 | prokaryote | prokaryote | in-part |
+2 | prokaryotes | prokaryotes | in-part |
+1 | all | | synonym |
+1 | root | | scientific name |
+131567 | biota | | synonym |
+131567 | cellular organisms | | scientific name |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/taxonomy/nodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/nodes.dmp Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,10 @@
+83333 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
+131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | |
+1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
diff -r 000000000000 -r 3f1a0d47ea8d tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,8 @@
+
+
+
+
+
diff -r 000000000000 -r 3f1a0d47ea8d tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed Jun 01 17:25:40 2016 -0400
@@ -0,0 +1,8 @@
+
+
+
+
+