Repository 'kraken_taxonomy_report'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/kraken_taxonomy_report

Changeset 0:3f1a0d47ea8d (2016-06-01)
Next changeset 1:b97694b21bc3 (2016-11-23)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
added:
kraken_databases.loc.sample
kraken_taxonomy_report.py
kraken_taxonomy_report.xml
test-data/input_kraken_1.tabular
test-data/input_kraken_2.tabular
test-data/output_abundance_1.tabular
test-data/output_abundance_2.tabular
test-data/output_abundance_3.tabular
test-data/output_abundance_4.tabular
test-data/output_tree_1.newick
test-data/output_tree_3.newick
test-data/test_database.loc
test-data/test_db/database.idx
test-data/test_db/database.kdb
test-data/test_db/taxonomy/names.dmp
test-data/test_db/taxonomy/nodes.dmp
tool_data_table_conf.xml.sample
tool_data_table_conf.xml.test
b
diff -r 000000000000 -r 3f1a0d47ea8d kraken_taxonomy_report.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kraken_taxonomy_report.py Wed Jun 01 17:25:40 2016 -0400
[
b'@@ -0,0 +1,278 @@\n+#!/usr/bin/env python\n+\n+# Reports a summary of Kraken\'s results\n+# and optionally creates a newick Tree\n+# Copyright (c) 2016 Daniel Blankenberg\n+# Licensed under the Academic Free License version 3.0\n+# https://github.com/blankenberg/Kraken-Taxonomy-Report\n+\n+import sys\n+import os\n+import optparse\n+import re\n+\n+__VERSION__ = \'0.0.1\'\n+\n+__URL__ = "https://github.com/blankenberg/Kraken-Taxonomy-Report"\n+\n+# Rank names were pulled from ncbi nodes.dmp on 02/02/2016\n+# cat nodes.dmp | cut -f 5 | sort | uniq\n+# "root" is added manually\n+NO_RANK_NAME = "no rank"\n+RANK_NAMES = [ NO_RANK_NAME,\n+               "root",\n+               "superkingdom",\n+               "kingdom",\n+               "subkingdom",\n+               "superphylum",\n+               "phylum",\n+               "subphylum",\n+               "superclass",\n+               "class",\n+               "subclass",\n+               "infraclass",\n+               "superorder",\n+               "order",\n+               "suborder",\n+               "infraorder",\n+               "parvorder",\n+               "superfamily",\n+               "family",\n+               "subfamily",\n+               "tribe",\n+               "subtribe",\n+               "genus",\n+               "subgenus",\n+               "species group",\n+               "species subgroup",\n+               "species",\n+               "subspecies",\n+               "varietas",\n+               "forma" ]\n+# NB: We put \'no rank\' at top of list for generating trees, due to e.g.\n+# root (root) -> cellular organisms (no rank) -> bacteria (superkingdom)\n+\n+RANK_NAME_TO_INTS = dict( [ (y, x) for (x, y) in enumerate( RANK_NAMES ) ] )\n+RANK_NAMES_INTS = range( len( RANK_NAMES ) )\n+\n+NO_RANK_INT = RANK_NAMES.index( NO_RANK_NAME )\n+NO_RANK_CODE = \'n\'\n+\n+PRIMARY_RANK_NAMES = [ \'species\', \'genus\', \'family\', \'order\', \'class\', \'phylum\', \'kingdom\' ]\n+RANK_INT_TO_CODE = {}\n+for name in PRIMARY_RANK_NAMES:\n+    RANK_INT_TO_CODE[ RANK_NAMES.index( name ) ] = name[0]\n+RANK_INT_TO_CODE[ RANK_NAMES.index( \'superkingdom\' ) ] = \'d\'\n+PRIMARY_RANK_NAMES.append( \'superkingdom\' )\n+\n+NAME_STUB = "%s__%s"\n+NAME_RE = re.compile( "(\\t| |\\||\\.;)" )\n+NAME_REPL = "_"\n+\n+\n+def get_kraken_db_path( db ):\n+    assert db, ValueError( "You must provide a kraken database" )\n+    k_db_path = os.getenv(\'KRAKEN_DB_PATH\', None )\n+    if k_db_path:\n+        db = os.path.join( k_db_path, db )\n+    return db\n+\n+\n+def load_taxonomy( db_path, sanitize_names=False ):\n+    child_lists = {}\n+    name_map = {}\n+    rank_map = {}\n+    with open( os.path.join( db_path, "taxonomy/names.dmp" ) ) as fh:\n+        for line in fh:\n+            line = line.rstrip( "\\n\\r" )\n+            if line.endswith( "\\t|" ):\n+                line = line[:-2]\n+            fields = line.split( "\\t|\\t" )\n+            node_id = fields[0]\n+            name = fields[1]\n+            if sanitize_names:\n+                name = NAME_RE.sub( NAME_REPL, name )\n+            name_type = fields[3]\n+            if name_type == "scientific name":\n+                name_map[ node_id ] = name\n+\n+    with open( os.path.join( db_path, "taxonomy/nodes.dmp" ) ) as fh:\n+        for line in fh:\n+            line = line.rstrip( "\\n\\r" )\n+            fields = line.split( "\\t|\\t" )\n+            node_id = fields[0]\n+            parent_id = fields[1]\n+            rank = RANK_NAME_TO_INTS.get( fields[2].lower(), None )\n+            if rank is None:\n+                # This should never happen, unless new taxonomy ranks are created\n+                print >> sys.stderr, \'Unrecognized rank: Node "%s" is "%s", setting to "%s"\' % ( node_id, fields[2], NO_RANK_NAME )\n+                rank = NO_RANK_INT\n+            if node_id == \'1\':\n+                parent_id = \'0\'\n+            if parent_id not in child_lists:\n+                child_lists[ parent_id ] = []\n+            child_lists[ parent_id ].append( node_id )\n+            rank_map[node_id] = rank\n+    return ( child_lists, name_map, rank_map )\n+\n+\n+def dfs_summation( node, c'..b'er.add_option( \'\', \'--name-long\', dest=\'name_long\', action=\'store_true\', default=False, help=\'Use Long taxa ID instead of base name\' )\n+    parser.add_option( \'\', \'--taxonomy\', dest=\'taxonomy\', action=\'store_true\', default=False, help=\'Output taxonomy in last column\' )\n+    parser.add_option( \'\', \'--cluster\', dest=\'cluster\', action=\'store\', type="string", default=None, help=\'Cluster counts to specified rank\' )\n+    parser.add_option( \'\', \'--summation\', dest=\'summation\', action=\'store_true\', default=False, help=\'Add summation of child counts to each taxa\' )\n+    parser.add_option( \'\', \'--sanitize-names\', dest=\'sanitize_names\', action=\'store_true\', default=False, help=\'Replace special chars (\\t| |\\||\\.;) with underscore (_)\' )\n+    parser.add_option( \'\', \'--show-rank\', dest=\'show_rank\', action=\'store_true\', default=False, help=\'Output column with Rank name\' )\n+    parser.add_option( \'\', \'--db\', dest=\'db\', action=\'store\', type="string", default=None, help=\'Name of Kraken database\' )\n+    parser.add_option( \'\', \'--output\', dest=\'output\', action=\'store\', type="string", default=None, help=\'Name of output file\' )\n+    parser.add_option( \'\', \'--output-tree\', dest=\'output_tree\', action=\'store\', type="string", default=None, help=\'Name of output file to place newick tree\' )\n+    (options, args) = parser.parse_args()\n+    if options.version:\n+        print >> sys.stderr, "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ )\n+        sys.exit()\n+    if not args:\n+        print >> sys.stderr, parser.get_usage()\n+        sys.exit()\n+\n+    if options.cluster:\n+        cluster_name = options.cluster.lower()\n+        cluster = RANK_NAME_TO_INTS.get( cluster_name, None )\n+        assert cluster is not None, ValueError( \'"%s" is not a valid rank for clustering.\' % options.cluster )\n+        if cluster_name not in PRIMARY_RANK_NAMES:\n+            assert options.intermediate, ValueError( \'You cannot cluster by "%s", unless you enable intermediate ranks.\' % options.cluster )\n+        ranks_to_report = [ cluster ]\n+        options.cluster = cluster\n+        # When clustering we need to do summatation\n+        options.summation = True\n+    else:\n+        options.cluster = None  # make empty string into None\n+        ranks_to_report = RANK_NAMES_INTS\n+\n+    if options.output:\n+        output_fh = open( options.output, \'wb+\' )\n+    else:\n+        output_fh = sys.stdout\n+\n+    db_path = get_kraken_db_path( options.db )\n+    ( child_lists, name_map, rank_map ) = load_taxonomy( db_path, sanitize_names=options.sanitize_names )\n+    file_data = []\n+    hit_taxa = []\n+    for input_filename in args:\n+        taxo_counts = {}\n+        with open( input_filename ) as fh:\n+            for line in fh:\n+                fields = line.split( "\\t" )\n+                taxo_counts[ fields[2] ] = taxo_counts.get( fields[2], 0 ) + 1\n+        clade_counts = taxo_counts.copy()  # fixme remove copying?\n+        if options.summation:\n+            dfs_summation( \'1\', clade_counts, child_lists )\n+        for key, value in clade_counts.items():\n+            if value and key not in hit_taxa:\n+                hit_taxa.append( key )\n+        file_data.append( clade_counts )\n+\n+    if options.header_line:\n+        output_fh.write( "#ID\\t" )\n+        output_fh.write( "\\t".join( args ) )\n+        if options.show_rank:\n+            output_fh.write( "\\trank" )\n+        if options.taxonomy:\n+            output_fh.write( "\\ttaxonomy" )\n+        output_fh.write( \'\\n\' )\n+\n+    output_lines = dict( [ ( x, [] ) for x in RANK_NAMES_INTS ] )\n+    dfs_report( \'1\', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None )\n+\n+    for rank_int in ranks_to_report:\n+        for line in output_lines.get( rank_int, [] ):\n+            output_fh.write( line )\n+            output_fh.write( \'\\n\' )\n+    fh.close()\n+    if options.output_tree:\n+        write_tree( child_lists, name_map, rank_map, options )\n+\n+\n+if __name__ == "__main__":\n+    __main__()\n'
b
diff -r 000000000000 -r 3f1a0d47ea8d kraken_taxonomy_report.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/kraken_taxonomy_report.xml Wed Jun 01 17:25:40 2016 -0400
[
b'@@ -0,0 +1,242 @@\n+<?xml version="1.0"?>\n+<tool id="kraken_taxonomy_report" name="Kraken taxonomic report" version="0.0.1">\n+    <description>view report of classification for multiple samples</description>\n+    <requirements>\n+        <requirement type="package" version="1.66">biopython</requirement>\n+    </requirements>\n+    <stdio>\n+        <exit_code range="1:" />\n+        <exit_code range=":-1" />\n+    </stdio>\n+    <version_command>python ${__tool_directory__}/kraken_taxonomy_report.py --version</version_command>\n+    <command>\n+<![CDATA[\n+\n+#for $input_classification in $classification:\n+    ln -s "${input_classification}" "${input_classification.element_identifier}" &&\n+#end for\n+\n+export KRAKEN_DB_PATH="${kraken_database.fields.path}" && \n+python ${__tool_directory__}/kraken_taxonomy_report.py\n+\n+--db "${kraken_database.fields.name}"\n+${show_zeros}\n+${header_line}\n+${otu_name}\n+${taxonomy}\n+${show_rank}\n+${intermediate}\n+${sanitize_names}\n+#if str( $cluster.cluster ):\n+    --cluster "${cluster.cluster}"\n+#else:\n+    ${cluster.summation}\n+#end if\n+\n+--output "${output_report}"\n+\n+#if $output_tree:\n+    --output-tree "${output_tree}"\n+#end if\n+\n+#for $input_classification in $classification:\n+    "${input_classification.element_identifier}"\n+#end for\n+\n+]]>\n+    </command>\n+    <inputs>\n+        <param format="tabular" label="Kraken output" multiple="True" name="classification" type="data" />\n+        <param checked="False" falsevalue="" argument="--show-zeros" label="Display taxa even if they lack a read in any sample" name="show_zeros" truevalue="--show-zeros" type="boolean" />\n+        <param checked="True" falsevalue="" argument="--header-line" label="Display a header line indicating sample IDs" name="header_line" truevalue="--header-line" type="boolean" />\n+        <param label="Select a Kraken database" name="kraken_database" type="select" help="Select the same database used to classify reads">\n+            <options from_data_table="kraken_databases">\n+                <validator message="No Kraken databases are available" type="no_options" />\n+            </options>\n+        </param>\n+        <param label="How to name OTUs" name="otu_name" type="select" multiple="False">\n+            <option value="" selected="True">Node name only</option>\n+            <option value="--name-long">Taxonified Name</option>\n+            <option value="--name-id">Node ID</option>\n+        </param>\n+        <param checked="True" falsevalue="" argument="--sanitize-names" label="Sanitize Names" name="sanitize_names" truevalue="--sanitize-names" type="boolean" help="Replace special chars (\\t| |\\||\\.;) with underscore (_)" />\n+        <param checked="False" falsevalue="" argument="--show-rank" label="Output Rank Name in (second to) last column" name="show_rank" truevalue="--show-rank" type="boolean" />\n+        <param checked="False" falsevalue="" argument="--taxonomy" label="Output taxonomy in last column" name="taxonomy" truevalue="--taxonomy" type="boolean" />\n+        <param checked="False" falsevalue="" argument="--intermediate" label="Display intermediate ranks" name="intermediate" truevalue="--intermediate" type="boolean" />\n+        <conditional name="cluster">\n+            <param argument="--cluster" help="Combines rows under the selected taxon and reports only selected rank." label="Cluster by taxonomic rank" name="cluster" type="select">\n+                <option value="Superkingdom">Superkingdom</option>\n+                <option value="Kingdom">Kingdom</option>\n+                <option value="Subkingdom">Subkingdom</option>\n+                <option value="Superphylum">Superphylum</option>\n+                <option value="Phylum">Phylum</option>\n+                <option value="Subphylum">Subphylum</option>\n+                <option value="Superclass">Superclass</option>\n+                <option value="Class">Class</option>\n+                <option value="Subclass">Subclass</option>\n+                <option value="Infraclass">Infra'..b'ation" value="True"/>\n+            </conditional>\n+            <param name="tree" value="True"/>\n+            <output name="output_report" file="output_abundance_1.tabular" ftype="tabular"/>\n+            <output name="output_tree" file="output_tree_1.newick" />\n+        </test>\n+        <test>\n+            <param name="classification" value="input_kraken_1.tabular" ftype="tabular"/>\n+            <param name="show_zeros" value="True"/>\n+            <param name="header_line" value="True"/>\n+            <param name="kraken_database" value="test_db"/>\n+            <param name="otu_name" value=""/>\n+            <param name="sanitize_names" value="True"/>\n+            <param name="show_rank" value="True"/>\n+            <param name="taxonomy" value="True"/>\n+            <param name="intermediate" value="True"/>\n+            <conditional name="cluster">\n+                <param name="cluster" value=""/>\n+                <param name="summation" value="False"/>\n+            </conditional>\n+            <param name="tree" value="True"/>\n+            <output name="output_report" file="output_abundance_2.tabular" ftype="tabular"/>\n+            <output name="output_tree" file="output_tree_1.newick" />\n+        </test>\n+        <test>\n+            <param name="classification" value="input_kraken_1.tabular" ftype="tabular"/>\n+            <param name="show_zeros" value="True"/>\n+            <param name="header_line" value="True"/>\n+            <param name="kraken_database" value="test_db"/>\n+            <param name="otu_name" value=""/>\n+            <param name="sanitize_names" value="True"/>\n+            <param name="show_rank" value="True"/>\n+            <param name="taxonomy" value="True"/>\n+            <param name="intermediate" value="False"/>\n+            <conditional name="cluster">\n+                <param name="cluster" value="Species"/>\n+            </conditional>\n+            <param name="tree" value="True"/>\n+            <output name="output_report" file="output_abundance_3.tabular" ftype="tabular"/>\n+            <output name="output_tree" file="output_tree_3.newick" />\n+        </test>\n+        <test>\n+            <param name="classification" value="input_kraken_1.tabular,input_kraken_2.tabular" ftype="tabular"/>\n+            <param name="show_zeros" value="True"/>\n+            <param name="header_line" value="True"/>\n+            <param name="kraken_database" value="test_db"/>\n+            <param name="otu_name" value=""/>\n+            <param name="sanitize_names" value="True"/>\n+            <param name="show_rank" value="True"/>\n+            <param name="taxonomy" value="True"/>\n+            <param name="intermediate" value="False"/>\n+            <conditional name="cluster">\n+                <param name="cluster" value="Species"/>\n+            </conditional>\n+            <param name="tree" value="True"/>\n+            <output name="output_report" file="output_abundance_4.tabular" ftype="tabular"/>\n+            <output name="output_tree" file="output_tree_3.newick" />\n+        </test>\n+    </tests>\n+    <help>\n+<![CDATA[\n+\n+.. class:: warningmark\n+\n+**Note**: the database used must be the same as the one used in the original Kraken run\n+\n+-----\n+\n+**What is Does**\n+\n+Summarizes read counts across taxonomic ranks for multiple samples. This is convenient for comparing results across multiple experiments, conditions, locations, etc.\n+\n+-----\n+\n+**Output**\n+\n+The output is tab-delimited, with one line per taxon.\n+\n+Will optionally output a newick tree built from the kraken database taxonomy using the specified options. Tree branch lengths will be set to "1.00000".\n+\n+\n+]]>\n+    </help>\n+    <citations>\n+        <citation type="bibtex">@unpublished{Kraken-Taxonomy-Report:2016,\n+          title  = "Kraken Taxonomy Report",\n+          author = "Daniel Blankenberg",\n+          url    = "https://github.com/blankenberg/Kraken-Taxonomy-Report",\n+          year   = "2016 (accessed June 1, 2016)"\n+        }</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/input_kraken_1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_kraken_1.tabular Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,4 @@
+C gi|145231|gb|M33724.1|ECOALPHOA 83333 171 83333:162
+C gi|145232|gb|M33725.1|ECOALPHOB 83333 183 83333:174
+C gi|145234|gb|M33727.1|ECOALPHOE 562 97 562:88
+C gi|146195|gb|J01619.1|ECOGLTA 83333 3850 83333:3841
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/input_kraken_2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_kraken_2.tabular Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,5 @@
+C gi|145231|gb|M33724.1|ECOALPHOA 83333 171 83333:162
+C gi|145232|gb|M33725.1|ECOALPHOB 83333 183 83333:174
+C gi|145234|gb|M33727.1|ECOALPHOE 562 97 562:88
+C gi|146195|gb|J01619.1|ECOGLTA 83333 3850 83333:3841
+C gi|145234|gb|M33727.1|ECOALPHOE2 562 97 562:88
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_1.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_abundance_1.tabular Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,11 @@
+#ID input_kraken_1.tabular rank taxonomy
+root 4 no rank n__root
+cellular_organisms 4 no rank n__root;n__cellular_organisms
+Escherichia_coli_K-12 3 no rank n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli;n__Escherichia_coli_K-12
+Bacteria 4 superkingdom n__root;n__cellular_organisms;d__Bacteria
+Proteobacteria 4 phylum n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria
+Gammaproteobacteria 4 class n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria
+Enterobacteriales 4 order n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales
+Enterobacteriaceae 4 family n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae
+Escherichia 4 genus n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia
+Escherichia_coli 4 species n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_abundance_2.tabular Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,11 @@
+#ID input_kraken_1.tabular rank taxonomy
+root 0 no rank n__root
+cellular_organisms 0 no rank n__root;n__cellular_organisms
+Escherichia_coli_K-12 3 no rank n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli;n__Escherichia_coli_K-12
+Bacteria 0 superkingdom n__root;n__cellular_organisms;d__Bacteria
+Proteobacteria 0 phylum n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria
+Gammaproteobacteria 0 class n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria
+Enterobacteriales 0 order n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales
+Enterobacteriaceae 0 family n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae
+Escherichia 0 genus n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia
+Escherichia_coli 1 species n__root;n__cellular_organisms;d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_3.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_abundance_3.tabular Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,2 @@
+#ID input_kraken_1.tabular rank taxonomy
+Escherichia_coli 4 species d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_abundance_4.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_abundance_4.tabular Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,2 @@
+#ID input_kraken_1.tabular input_kraken_2.tabular rank taxonomy
+Escherichia_coli 4 5 species d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacteriales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia_coli
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_tree_1.newick
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_tree_1.newick Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,1 @@
+(((((((((Escherichia_coli_K-12:1.00000)Escherichia_coli:1.00000)Escherichia:1.00000)Enterobacteriaceae:1.00000)Enterobacteriales:1.00000)Gammaproteobacteria:1.00000)Proteobacteria:1.00000)Bacteria:1.00000)cellular_organisms:1.00000)root:1.00000;
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/output_tree_3.newick
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_tree_3.newick Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,1 @@
+((((((((Escherichia_coli:1.00000)Escherichia:1.00000)Enterobacteriaceae:1.00000)Enterobacteriales:1.00000)Gammaproteobacteria:1.00000)Proteobacteria:1.00000)Bacteria:1.00000)cellular_organisms:1.00000)root:1.00000;
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_database.loc
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_database.loc Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,1 @@
+test_db test_db ${__HERE__}
\ No newline at end of file
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/database.idx
b
Binary file test-data/test_db/database.idx has changed
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/database.kdb
b
Binary file test-data/test_db/database.kdb has changed
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/taxonomy/names.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/names.dmp Wed Jun 01 17:25:40 2016 -0400
[
@@ -0,0 +1,74 @@
+83333 | Escherichia coli K-12 | | scientific name |
+83333 | Escherichia coli K12 | | equivalent name |
+562 | "Bacillus coli" Migula 1895 | | authority |
+562 | "Bacterium coli commune" Escherich 1885 | | authority |
+562 | "Bacterium coli" (Migula 1895) Lehmann and Neumann 1896 | | authority |
+562 | ATCC 11775 | | type material |
+562 | Bacillus coli | | synonym |
+562 | Bacterium coli | | synonym |
+562 | Bacterium coli commune | | synonym |
+562 | CCUG 24 | | type material |
+562 | CCUG 29300 | | type material |
+562 | CIP 54.8 | | type material |
+562 | DSM 30083 | | type material |
+562 | Enterococcus coli | | synonym |
+562 | Escherchia coli | | misspelling |
+562 | Escherichia coli | | scientific name |
+562 | Escherichia coli (Migula 1895) Castellani and Chalmers 1919 | | authority |
+562 | Escherichia sp. MAR | | includes |
+562 | Escherichia/Shigella coli | | equivalent name |
+562 | Eschericia coli | | misspelling |
+562 | JCM 1649 | | type material |
+562 | LMG 2092 | | type material |
+562 | NBRC 102203 | | type material |
+562 | NCCB 54008 | | type material |
+562 | NCTC 9001 | | type material |
+562 | bacterium 10a | | includes |
+562 | bacterium E3 | | includes |
+561 | Escherchia | | misspelling |
+561 | Escherichia | | scientific name |
+561 | Escherichia Castellani and Chalmers 1919 | | authority |
+543 | Enterobacteraceae | | synonym |
+543 | Enterobacteraceae (ex Lapage 1979) Lapage 1982, fam. nov., nom. rev. | | synonym |
+543 | Enterobacteriaceae | | scientific name |
+543 | Enterobacteriaceae (ex Rahn 1937) Ewing et al. 1980, fam. nov., nom. rev. | | synonym |
+543 | Enterobacteriaceae Rahn 1937 | | synonym |
+543 | gamma-3 proteobacteria | gamma-3 proteobacteria <#1> | in-part |
+91347 | 'Enterobacteriales' | | synonym |
+91347 | Enterobacteriaceae and related endosymbionts | | synonym |
+91347 | Enterobacteriaceae group | | synonym |
+91347 | Enterobacteriales | | scientific name |
+91347 | enterobacteria | enterobacteria<blast91347> | blast name |
+91347 | gamma-3 proteobacteria | gamma-3 proteobacteria <#5> | in-part |
+1236 | Gammaproteobacteria | | scientific name |
+1236 | Gammaproteobacteria Garrity et al. 2005 | | synonym |
+1236 | Proteobacteria gamma subdivision | | synonym |
+1236 | Purple bacteria, gamma subdivision | | synonym |
+1236 | g-proteobacteria | gamma proteos<blast1236> | blast name |
+1236 | gamma proteobacteria | | synonym |
+1236 | gamma subdivision | | synonym |
+1236 | gamma subgroup | | synonym |
+1224 | Proteobacteria | | scientific name |
+1224 | Proteobacteria Garrity et al. 2005 | | authority |
+1224 | Proteobacteria [class] Stackebrandt et al. 1988 | | authority |
+1224 | not Proteobacteria Cavalier-Smith 2002 | | authority |
+1224 | proteobacteria | proteobacteria<blast1224> | blast name |
+1224 | purple bacteria | | common name |
+1224 | purple bacteria and relatives | | common name |
+1224 | purple non-sulfur bacteria | | common name |
+1224 | purple photosynthetic bacteria | | common name |
+1224 | purple photosynthetic bacteria and relatives | | common name |
+2 | Bacteria | Bacteria <prokaryote> | scientific name |
+2 | Monera | Monera <Bacteria> | in-part |
+2 | Procaryotae | Procaryotae <Bacteria> | in-part |
+2 | Prokaryota | Prokaryota <Bacteria> | in-part |
+2 | Prokaryotae | Prokaryotae <Bacteria> | in-part |
+2 | bacteria | bacteria <blast2> | blast name |
+2 | eubacteria | | genbank common name |
+2 | not Bacteria Haeckel 1894 | | synonym |
+2 | prokaryote | prokaryote <Bacteria> | in-part |
+2 | prokaryotes | prokaryotes <Bacteria> | in-part |
+1 | all | | synonym |
+1 | root | | scientific name |
+131567 | biota | | synonym |
+131567 | cellular organisms | | scientific name |
b
diff -r 000000000000 -r 3f1a0d47ea8d test-data/test_db/taxonomy/nodes.dmp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_db/taxonomy/nodes.dmp Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,10 @@
+83333 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | |
+561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | |
+2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | |
+131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | |
+1 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | |
b
diff -r 000000000000 -r 3f1a0d47ea8d tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of Kraken database in the required format -->
+    <table name="kraken_databases" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="tool-data/kraken_databases.loc" />
+    </table>
+</tables>
b
diff -r 000000000000 -r 3f1a0d47ea8d tool_data_table_conf.xml.test
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test Wed Jun 01 17:25:40 2016 -0400
b
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tables>
+    <!-- Locations of Kraken database in the required format -->
+    <table name="kraken_databases" comment_char="#">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/test_database.loc" />
+    </table>
+</tables>