Next changeset 1:b97694b21bc3 (2016-11-23) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1 |
added:
kraken_databases.loc.sample kraken_taxonomy_report.py kraken_taxonomy_report.xml test-data/input_kraken_1.tabular test-data/input_kraken_2.tabular test-data/output_abundance_1.tabular test-data/output_abundance_2.tabular test-data/output_abundance_3.tabular test-data/output_abundance_4.tabular test-data/output_tree_1.newick test-data/output_tree_3.newick test-data/test_database.loc test-data/test_db/database.idx test-data/test_db/database.kdb test-data/test_db/taxonomy/names.dmp test-data/test_db/taxonomy/nodes.dmp tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
b |
diff -r 000000000000 -r 3f1a0d47ea8d kraken_taxonomy_report.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kraken_taxonomy_report.py Wed Jun 01 17:25:40 2016 -0400 |
[ |
b'@@ -0,0 +1,278 @@\n+#!/usr/bin/env python\n+\n+# Reports a summary of Kraken\'s results\n+# and optionally creates a newick Tree\n+# Copyright (c) 2016 Daniel Blankenberg\n+# Licensed under the Academic Free License version 3.0\n+# https://github.com/blankenberg/Kraken-Taxonomy-Report\n+\n+import sys\n+import os\n+import optparse\n+import re\n+\n+__VERSION__ = \'0.0.1\'\n+\n+__URL__ = "https://github.com/blankenberg/Kraken-Taxonomy-Report"\n+\n+# Rank names were pulled from ncbi nodes.dmp on 02/02/2016\n+# cat nodes.dmp | cut -f 5 | sort | uniq\n+# "root" is added manually\n+NO_RANK_NAME = "no rank"\n+RANK_NAMES = [ NO_RANK_NAME,\n+ "root",\n+ "superkingdom",\n+ "kingdom",\n+ "subkingdom",\n+ "superphylum",\n+ "phylum",\n+ "subphylum",\n+ "superclass",\n+ "class",\n+ "subclass",\n+ "infraclass",\n+ "superorder",\n+ "order",\n+ "suborder",\n+ "infraorder",\n+ "parvorder",\n+ "superfamily",\n+ "family",\n+ "subfamily",\n+ "tribe",\n+ "subtribe",\n+ "genus",\n+ "subgenus",\n+ "species group",\n+ "species subgroup",\n+ "species",\n+ "subspecies",\n+ "varietas",\n+ "forma" ]\n+# NB: We put \'no rank\' at top of list for generating trees, due to e.g.\n+# root (root) -> cellular organisms (no rank) -> bacteria (superkingdom)\n+\n+RANK_NAME_TO_INTS = dict( [ (y, x) for (x, y) in enumerate( RANK_NAMES ) ] )\n+RANK_NAMES_INTS = range( len( RANK_NAMES ) )\n+\n+NO_RANK_INT = RANK_NAMES.index( NO_RANK_NAME )\n+NO_RANK_CODE = \'n\'\n+\n+PRIMARY_RANK_NAMES = [ \'species\', \'genus\', \'family\', \'order\', \'class\', \'phylum\', \'kingdom\' ]\n+RANK_INT_TO_CODE = {}\n+for name in PRIMARY_RANK_NAMES:\n+ RANK_INT_TO_CODE[ RANK_NAMES.index( name ) ] = name[0]\n+RANK_INT_TO_CODE[ RANK_NAMES.index( \'superkingdom\' ) ] = \'d\'\n+PRIMARY_RANK_NAMES.append( \'superkingdom\' )\n+\n+NAME_STUB = "%s__%s"\n+NAME_RE = re.compile( "(\\t| |\\||\\.;)" )\n+NAME_REPL = "_"\n+\n+\n+def get_kraken_db_path( db ):\n+ assert db, ValueError( "You must provide a kraken database" )\n+ k_db_path = os.getenv(\'KRAKEN_DB_PATH\', None )\n+ if k_db_path:\n+ db = os.path.join( k_db_path, db )\n+ return db\n+\n+\n+def load_taxonomy( db_path, sanitize_names=False ):\n+ child_lists = {}\n+ name_map = {}\n+ rank_map = {}\n+ with open( os.path.join( db_path, "taxonomy/names.dmp" ) ) as fh:\n+ for line in fh:\n+ line = line.rstrip( "\\n\\r" )\n+ if line.endswith( "\\t|" ):\n+ line = line[:-2]\n+ fields = line.split( "\\t|\\t" )\n+ node_id = fields[0]\n+ name = fields[1]\n+ if sanitize_names:\n+ name = NAME_RE.sub( NAME_REPL, name )\n+ name_type = fields[3]\n+ if name_type == "scientific name":\n+ name_map[ node_id ] = name\n+\n+ with open( os.path.join( db_path, "taxonomy/nodes.dmp" ) ) as fh:\n+ for line in fh:\n+ line = line.rstrip( "\\n\\r" )\n+ fields = line.split( "\\t|\\t" )\n+ node_id = fields[0]\n+ parent_id = fields[1]\n+ rank = RANK_NAME_TO_INTS.get( fields[2].lower(), None )\n+ if rank is None:\n+ # This should never happen, unless new taxonomy ranks are created\n+ print >> sys.stderr, \'Unrecognized rank: Node "%s" is "%s", setting to "%s"\' % ( node_id, fields[2], NO_RANK_NAME )\n+ rank = NO_RANK_INT\n+ if node_id == \'1\':\n+ parent_id = \'0\'\n+ if parent_id not in child_lists:\n+ child_lists[ parent_id ] = []\n+ child_lists[ parent_id ].append( node_id )\n+ rank_map[node_id] = rank\n+ return ( child_lists, name_map, rank_map )\n+\n+\n+def dfs_summation( node, c'..b'er.add_option( \'\', \'--name-long\', dest=\'name_long\', action=\'store_true\', default=False, help=\'Use Long taxa ID instead of base name\' )\n+ parser.add_option( \'\', \'--taxonomy\', dest=\'taxonomy\', action=\'store_true\', default=False, help=\'Output taxonomy in last column\' )\n+ parser.add_option( \'\', \'--cluster\', dest=\'cluster\', action=\'store\', type="string", default=None, help=\'Cluster counts to specified rank\' )\n+ parser.add_option( \'\', \'--summation\', dest=\'summation\', action=\'store_true\', default=False, help=\'Add summation of child counts to each taxa\' )\n+ parser.add_option( \'\', \'--sanitize-names\', dest=\'sanitize_names\', action=\'store_true\', default=False, help=\'Replace special chars (\\t| |\\||\\.;) with underscore (_)\' )\n+ parser.add_option( \'\', \'--show-rank\', dest=\'show_rank\', action=\'store_true\', default=False, help=\'Output column with Rank name\' )\n+ parser.add_option( \'\', \'--db\', dest=\'db\', action=\'store\', type="string", default=None, help=\'Name of Kraken database\' )\n+ parser.add_option( \'\', \'--output\', dest=\'output\', action=\'store\', type="string", default=None, help=\'Name of output file\' )\n+ parser.add_option( \'\', \'--output-tree\', dest=\'output_tree\', action=\'store\', type="string", default=None, help=\'Name of output file to place newick tree\' )\n+ (options, args) = parser.parse_args()\n+ if options.version:\n+ print >> sys.stderr, "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ )\n+ sys.exit()\n+ if not args:\n+ print >> sys.stderr, parser.get_usage()\n+ sys.exit()\n+\n+ if options.cluster:\n+ cluster_name = options.cluster.lower()\n+ cluster = RANK_NAME_TO_INTS.get( cluster_name, None )\n+ assert cluster is not None, ValueError( \'"%s" is not a valid rank for clustering.\' % options.cluster )\n+ if cluster_name not in PRIMARY_RANK_NAMES:\n+ assert options.intermediate, ValueError( \'You cannot cluster by "%s", unless you enable intermediate ranks.\' % options.cluster )\n+ ranks_to_report = [ cluster ]\n+ options.cluster = cluster\n+ # When clustering we need to do summatation\n+ options.summation = True\n+ else:\n+ options.cluster = None # make empty string into None\n+ ranks_to_report = RANK_NAMES_INTS\n+\n+ if options.output:\n+ output_fh = open( options.output, \'wb+\' )\n+ else:\n+ output_fh = sys.stdout\n+\n+ db_path = get_kraken_db_path( options.db )\n+ ( child_lists, name_map, rank_map ) = load_taxonomy( db_path, sanitize_names=options.sanitize_names )\n+ file_data = []\n+ hit_taxa = []\n+ for input_filename in args:\n+ taxo_counts = {}\n+ with open( input_filename ) as fh:\n+ for line in fh:\n+ fields = line.split( "\\t" )\n+ taxo_counts[ fields[2] ] = taxo_counts.get( fields[2], 0 ) + 1\n+ clade_counts = taxo_counts.copy() # fixme remove copying?\n+ if options.summation:\n+ dfs_summation( \'1\', clade_counts, child_lists )\n+ for key, value in clade_counts.items():\n+ if value and key not in hit_taxa:\n+ hit_taxa.append( key )\n+ file_data.append( clade_counts )\n+\n+ if options.header_line:\n+ output_fh.write( "#ID\\t" )\n+ output_fh.write( "\\t".join( args ) )\n+ if options.show_rank:\n+ output_fh.write( "\\trank" )\n+ if options.taxonomy:\n+ output_fh.write( "\\ttaxonomy" )\n+ output_fh.write( \'\\n\' )\n+\n+ output_lines = dict( [ ( x, [] ) for x in RANK_NAMES_INTS ] )\n+ dfs_report( \'1\', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None )\n+\n+ for rank_int in ranks_to_report:\n+ for line in output_lines.get( rank_int, [] ):\n+ output_fh.write( line )\n+ output_fh.write( \'\\n\' )\n+ fh.close()\n+ if options.output_tree:\n+ write_tree( child_lists, name_map, rank_map, options )\n+\n+\n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r 000000000000 -r 3f1a0d47ea8d kraken_taxonomy_report.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kraken_taxonomy_report.xml Wed Jun 01 17:25:40 2016 -0400 |
[ |
b'@@ -0,0 +1,242 @@\n+<?xml version="1.0"?>\n+<tool id="kraken_taxonomy_report" name="Kraken taxonomic report" version="0.0.1">\n+ <description>view report of classification for multiple samples</description>\n+ <requirements>\n+ <requirement type="package" version="1.66">biopython</requirement>\n+ </requirements>\n+ <stdio>\n+ <exit_code range="1:" />\n+ <exit_code range=":-1" />\n+ </stdio>\n+ <version_command>python ${__tool_directory__}/kraken_taxonomy_report.py --version</version_command>\n+ <command>\n+<![CDATA[\n+\n+#for $input_classification in $classification:\n+ ln -s "${input_classification}" "${input_classification.element_identifier}" &&\n+#end for\n+\n+export KRAKEN_DB_PATH="${kraken_database.fields.path}" && \n+python ${__tool_directory__}/kraken_taxonomy_report.py\n+\n+--db "${kraken_database.fields.name}"\n+${show_zeros}\n+${header_line}\n+${otu_name}\n+${taxonomy}\n+${show_rank}\n+${intermediate}\n+${sanitize_names}\n+#if str( $cluster.cluster ):\n+ --cluster "${cluster.cluster}"\n+#else:\n+ ${cluster.summation}\n+#end if\n+\n+--output "${output_report}"\n+\n+#if $output_tree:\n+ --output-tree "${output_tree}"\n+#end if\n+\n+#for $input_classification in $classification:\n+ "${input_classification.element_identifier}"\n+#end for\n+\n+]]>\n+ </command>\n+ <inputs>\n+ <param format="tabular" label="Kraken output" multiple="True" name="classification" type="data" />\n+ <param checked="False" falsevalue="" argument="--show-zeros" label="Display taxa even if they lack a read in any sample" name="show_zeros" truevalue="--show-zeros" type="boolean" />\n+ <param checked="True" falsevalue="" argument="--header-line" label="Display a header line indicating sample IDs" name="header_line" truevalue="--header-line" type="boolean" />\n+ <param label="Select a Kraken database" name="kraken_database" type="select" help="Select the same database used to classify reads">\n+ <options from_data_table="kraken_databases">\n+ <validator message="No Kraken databases are available" type="no_options" />\n+ </options>\n+ </param>\n+ <param label="How to name OTUs" name="otu_name" type="select" multiple="False">\n+ <option value="" selected="True">Node name only</option>\n+ <option value="--name-long">Taxonified Name</option>\n+ <option value="--name-id">Node ID</option>\n+ </param>\n+ <param checked="True" falsevalue="" argument="--sanitize-names" label="Sanitize Names" name="sanitize_names" truevalue="--sanitize-names" type="boolean" help="Replace special chars (\\t| |\\||\\.;) with underscore (_)" />\n+ <param checked="False" falsevalue="" argument="--show-rank" label="Output Rank Name in (second to) last column" name="show_rank" truevalue="--show-rank" type="boolean" />\n+ <param checked="False" falsevalue="" argument="--taxonomy" label="Output taxonomy in last column" name="taxonomy" truevalue="--taxonomy" type="boolean" />\n+ <param checked="False" falsevalue="" argument="--intermediate" label="Display intermediate ranks" name="intermediate" truevalue="--intermediate" type="boolean" />\n+ <conditional name="cluster">\n+ <param argument="--cluster" help="Combines rows under the selected taxon and reports only selected rank." label="Cluster by taxonomic rank" name="cluster" type="select">\n+ <option value="Superkingdom">Superkingdom</option>\n+ <option value="Kingdom">Kingdom</option>\n+ <option value="Subkingdom">Subkingdom</option>\n+ <option value="Superphylum">Superphylum</option>\n+ <option value="Phylum">Phylum</option>\n+ <option value="Subphylum">Subphylum</option>\n+ <option value="Superclass">Superclass</option>\n+ <option value="Class">Class</option>\n+ <option value="Subclass">Subclass</option>\n+ <option value="Infraclass">Infra'..b'ation" value="True"/>\n+ </conditional>\n+ <param name="tree" value="True"/>\n+ <output name="output_report" file="output_abundance_1.tabular" ftype="tabular"/>\n+ <output name="output_tree" file="output_tree_1.newick" />\n+ </test>\n+ <test>\n+ <param name="classification" value="input_kraken_1.tabular" ftype="tabular"/>\n+ <param name="show_zeros" value="True"/>\n+ <param name="header_line" value="True"/>\n+ <param name="kraken_database" value="test_db"/>\n+ <param name="otu_name" value=""/>\n+ <param name="sanitize_names" value="True"/>\n+ <param name="show_rank" value="True"/>\n+ <param name="taxonomy" value="True"/>\n+ <param name="intermediate" value="True"/>\n+ <conditional name="cluster">\n+ <param name="cluster" value=""/>\n+ <param name="summation" value="False"/>\n+ </conditional>\n+ <param name="tree" value="True"/>\n+ <output name="output_report" file="output_abundance_2.tabular" ftype="tabular"/>\n+ <output name="output_tree" file="output_tree_1.newick" />\n+ </test>\n+ <test>\n+ <param name="classification" value="input_kraken_1.tabular" ftype="tabular"/>\n+ <param name="show_zeros" value="True"/>\n+ <param name="header_line" value="True"/>\n+ <param name="kraken_database" value="test_db"/>\n+ <param name="otu_name" value=""/>\n+ <param name="sanitize_names" value="True"/>\n+ <param name="show_rank" value="True"/>\n+ <param name="taxonomy" value="True"/>\n+ <param name="intermediate" value="False"/>\n+ <conditional name="cluster">\n+ <param name="cluster" value="Species"/>\n+ </conditional>\n+ <param name="tree" value="True"/>\n+ <output name="output_report" file="output_abundance_3.tabular" ftype="tabular"/>\n+ <output name="output_tree" file="output_tree_3.newick" />\n+ </test>\n+ <test>\n+ <param name="classification" value="input_kraken_1.tabular,input_kraken_2.tabular" ftype="tabular"/>\n+ <param name="show_zeros" value="True"/>\n+ <param name="header_line" value="True"/>\n+ <param name="kraken_database" value="test_db"/>\n+ <param name="otu_name" value=""/>\n+ <param name="sanitize_names" value="True"/>\n+ <param name="show_rank" value="True"/>\n+ <param name="taxonomy" value="True"/>\n+ <param name="intermediate" value="False"/>\n+ <conditional name="cluster">\n+ <param name="cluster" value="Species"/>\n+ </conditional>\n+ <param name="tree" value="True"/>\n+ <output name="output_report" file="output_abundance_4.tabular" ftype="tabular"/>\n+ <output name="output_tree" file="output_tree_3.newick" />\n+ </test>\n+ </tests>\n+ <help>\n+<![CDATA[\n+\n+.. class:: warningmark\n+\n+**Note**: the database used must be the same as the one used in the original Kraken run\n+\n+-----\n+\n+**What is Does**\n+\n+Summarizes read counts across taxonomic ranks for multiple samples. This is convenient for comparing results across multiple experiments, conditions, locations, etc.\n+\n+-----\n+\n+**Output**\n+\n+The output is tab-delimited, with one line per taxon.\n+\n+Will optionally output a newick tree built from the kraken database taxonomy using the specified options. Tree branch lengths will be set to "1.00000".\n+\n+\n+]]>\n+ </help>\n+ <citations>\n+ <citation type="bibtex">@unpublished{Kraken-Taxonomy-Report:2016,\n+ title = "Kraken Taxonomy Report",\n+ author = "Daniel Blankenberg",\n+ url = "https://github.com/blankenberg/Kraken-Taxonomy-Report",\n+ year = "2016 (accessed June 1, 2016)"\n+ }</citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/input_kraken_1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_kraken_1.tabular Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,4 @@ +C gi|145231|gb|M33724.1|ECOALPHOA 83333 171 83333:162 +C gi|145232|gb|M33725.1|ECOALPHOB 83333 183 83333:174 +C gi|145234|gb|M33727.1|ECOALPHOE 562 97 562:88 +C gi|146195|gb|J01619.1|ECOGLTA 83333 3850 83333:3841 |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/input_kraken_2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/input_kraken_2.tabular Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,5 @@ +C gi|145231|gb|M33724.1|ECOALPHOA 83333 171 83333:162 +C gi|145232|gb|M33725.1|ECOALPHOB 83333 183 83333:174 +C gi|145234|gb|M33727.1|ECOALPHOE 562 97 562:88 +C gi|146195|gb|J01619.1|ECOGLTA 83333 3850 83333:3841 +C gi|145234|gb|M33727.1|ECOALPHOE2 562 97 562:88 |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_1.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_abundance_1.tabular Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,11 @@ +#ID input_kraken_1.tabular rank taxonomy +root 4 no rank n__root +cellular_organisms 4 no rank n__root;n__cellular_organisms +Escherichia_coli_K-12 3 no rank n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli;n__Escherichia_coli_K-12 +Bacteria 4 superkingdom n__root;n__cellular_organisms;d__Bacteria +Proteobacteria 4 phylum n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria +Gammaproteobacteria 4 class n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria +Enterobacteriales 4 order n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales +Enterobacteriaceae 4 family n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae +Escherichia 4 genus n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia +Escherichia_coli 4 species n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_2.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_abundance_2.tabular Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,11 @@ +#ID input_kraken_1.tabular rank taxonomy +root 0 no rank n__root +cellular_organisms 0 no rank n__root;n__cellular_organisms +Escherichia_coli_K-12 3 no rank n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli;n__Escherichia_coli_K-12 +Bacteria 0 superkingdom n__root;n__cellular_organisms;d__Bacteria +Proteobacteria 0 phylum n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria +Gammaproteobacteria 0 class n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria +Enterobacteriales 0 order n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales +Enterobacteriaceae 0 family n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae +Escherichia 0 genus n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia +Escherichia_coli 1 species n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_3.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_abundance_3.tabular Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,2 @@ +#ID input_kraken_1.tabular rank taxonomy +Escherichia_coli 4 species d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_4.tabular --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_abundance_4.tabular Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,2 @@ +#ID input_kraken_1.tabular input_kraken_2.tabular rank taxonomy +Escherichia_coli 4 5 species d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_tree_1.newick --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_tree_1.newick Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,1 @@ +(((((((((Escherichia_coli_K-12:1.00000)Escherichia_coli:1.00000)Escherichia:1.00000)Enterobacteriaceae:1.00000)Enterobacteriales:1.00000)Gammaproteobacteria:1.00000)Proteobacteria:1.00000)Bacteria:1.00000)cellular_organisms:1.00000)root:1.00000; |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_tree_3.newick --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_tree_3.newick Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,1 @@ +((((((((Escherichia_coli:1.00000)Escherichia:1.00000)Enterobacteriaceae:1.00000)Enterobacteriales:1.00000)Gammaproteobacteria:1.00000)Proteobacteria:1.00000)Bacteria:1.00000)cellular_organisms:1.00000)root:1.00000; |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_database.loc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_database.loc Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,1 @@ +test_db test_db ${__HERE__} \ No newline at end of file |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/database.idx |
b |
Binary file test-data/test_db/database.idx has changed |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/database.kdb |
b |
Binary file test-data/test_db/database.kdb has changed |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/taxonomy/names.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/taxonomy/names.dmp Wed Jun 01 17:25:40 2016 -0400 |
[ |
@@ -0,0 +1,74 @@ +83333 | Escherichia coli K-12 | | scientific name | +83333 | Escherichia coli K12 | | equivalent name | +562 | "Bacillus coli" Migula 1895 | | authority | +562 | "Bacterium coli commune" Escherich 1885 | | authority | +562 | "Bacterium coli" (Migula 1895) Lehmann and Neumann 1896 | | authority | +562 | ATCC 11775 | | type material | +562 | Bacillus coli | | synonym | +562 | Bacterium coli | | synonym | +562 | Bacterium coli commune | | synonym | +562 | CCUG 24 | | type material | +562 | CCUG 29300 | | type material | +562 | CIP 54.8 | | type material | +562 | DSM 30083 | | type material | +562 | Enterococcus coli | | synonym | +562 | Escherchia coli | | misspelling | +562 | Escherichia coli | | scientific name | +562 | Escherichia coli (Migula 1895) Castellani and Chalmers 1919 | | authority | +562 | Escherichia sp. MAR | | includes | +562 | Escherichia/Shigella coli | | equivalent name | +562 | Eschericia coli | | misspelling | +562 | JCM 1649 | | type material | +562 | LMG 2092 | | type material | +562 | NBRC 102203 | | type material | +562 | NCCB 54008 | | type material | +562 | NCTC 9001 | | type material | +562 | bacterium 10a | | includes | +562 | bacterium E3 | | includes | +561 | Escherchia | | misspelling | +561 | Escherichia | | scientific name | +561 | Escherichia Castellani and Chalmers 1919 | | authority | +543 | Enterobacteraceae | | synonym | +543 | Enterobacteraceae (ex Lapage 1979) Lapage 1982, fam. nov., nom. rev. | | synonym | +543 | Enterobacteriaceae | | scientific name | +543 | Enterobacteriaceae (ex Rahn 1937) Ewing et al. 1980, fam. nov., nom. rev. | | synonym | +543 | Enterobacteriaceae Rahn 1937 | | synonym | +543 | gamma-3 proteobacteria | gamma-3 proteobacteria <#1> | in-part | +91347 | 'Enterobacteriales' | | synonym | +91347 | Enterobacteriaceae and related endosymbionts | | synonym | +91347 | Enterobacteriaceae group | | synonym | +91347 | Enterobacteriales | | scientific name | +91347 | enterobacteria | enterobacteria<blast91347> | blast name | +91347 | gamma-3 proteobacteria | gamma-3 proteobacteria <#5> | in-part | +1236 | Gammaproteobacteria | | scientific name | +1236 | Gammaproteobacteria Garrity et al. 2005 | | synonym | +1236 | Proteobacteria gamma subdivision | | synonym | +1236 | Purple bacteria, gamma subdivision | | synonym | +1236 | g-proteobacteria | gamma proteos<blast1236> | blast name | +1236 | gamma proteobacteria | | synonym | +1236 | gamma subdivision | | synonym | +1236 | gamma subgroup | | synonym | +1224 | Proteobacteria | | scientific name | +1224 | Proteobacteria Garrity et al. 2005 | | authority | +1224 | Proteobacteria [class] Stackebrandt et al. 1988 | | authority | +1224 | not Proteobacteria Cavalier-Smith 2002 | | authority | +1224 | proteobacteria | proteobacteria<blast1224> | blast name | +1224 | purple bacteria | | common name | +1224 | purple bacteria and relatives | | common name | +1224 | purple non-sulfur bacteria | | common name | +1224 | purple photosynthetic bacteria | | common name | +1224 | purple photosynthetic bacteria and relatives | | common name | +2 | Bacteria | Bacteria <prokaryote> | scientific name | +2 | Monera | Monera <Bacteria> | in-part | +2 | Procaryotae | Procaryotae <Bacteria> | in-part | +2 | Prokaryota | Prokaryota <Bacteria> | in-part | +2 | Prokaryotae | Prokaryotae <Bacteria> | in-part | +2 | bacteria | bacteria <blast2> | blast name | +2 | eubacteria | | genbank common name | +2 | not Bacteria Haeckel 1894 | | synonym | +2 | prokaryote | prokaryote <Bacteria> | in-part | +2 | prokaryotes | prokaryotes <Bacteria> | in-part | +1 | all | | synonym | +1 | root | | scientific name | +131567 | biota | | synonym | +131567 | cellular organisms | | scientific name | |
b |
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/taxonomy/nodes.dmp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/test_db/taxonomy/nodes.dmp Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,10 @@ +83333 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | +561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | +2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | +131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | +1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | |
b |
diff -r 000000000000 -r 3f1a0d47ea8d tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of Kraken database in the required format --> + <table name="kraken_databases" comment_char="#"> + <columns>value, name, path</columns> + <file path="tool-data/kraken_databases.loc" /> + </table> +</tables> |
b |
diff -r 000000000000 -r 3f1a0d47ea8d tool_data_table_conf.xml.test --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Wed Jun 01 17:25:40 2016 -0400 |
b |
@@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tables> + <!-- Locations of Kraken database in the required format --> + <table name="kraken_databases" comment_char="#"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/test_database.loc" /> + </table> +</tables> |