Mercurial > repos > iuc > kraken_taxonomy_report
changeset 1:b97694b21bc3 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
author | iuc |
---|---|
date | Wed, 23 Nov 2016 03:27:33 -0500 |
parents | 3f1a0d47ea8d |
children | 528a1d91b066 |
files | kraken_taxonomy_report.py kraken_taxonomy_report.xml |
diffstat | 2 files changed, 23 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/kraken_taxonomy_report.py Wed Jun 01 17:25:40 2016 -0400 +++ b/kraken_taxonomy_report.py Wed Nov 23 03:27:33 2016 -0500 @@ -6,12 +6,14 @@ # Licensed under the Academic Free License version 3.0 # https://github.com/blankenberg/Kraken-Taxonomy-Report +from __future__ import print_function + import sys import os import optparse import re -__VERSION__ = '0.0.1' +__VERSION__ = '0.0.2' __URL__ = "https://github.com/blankenberg/Kraken-Taxonomy-Report" @@ -82,6 +84,7 @@ child_lists = {} name_map = {} rank_map = {} + names = {} # Store names here to look for duplicates (id, True/False name fixed) with open( os.path.join( db_path, "taxonomy/names.dmp" ) ) as fh: for line in fh: line = line.rstrip( "\n\r" ) @@ -94,6 +97,20 @@ name = NAME_RE.sub( NAME_REPL, name ) name_type = fields[3] if name_type == "scientific name": + if name in names: + print( 'Warning: name "%s" found at node "%s" but already exists originally for node "%s".' % ( name, node_id, names[name][0] ), file=sys.stderr ) + new_name = "%s_%s" % ( name, node_id ) + print( 'Transforming node "%s" named "%s" to "%s".' % ( node_id, name, new_name ), file=sys.stderr ) + assert new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % new_name + if not names[name][1]: + orig_new_name = "%s_%s" % ( name, names[name][0] ) + print( 'Transforming node "%s" named "%s" to "%s".' % ( names[name][0], name, orig_new_name ), file=sys.stderr ) + assert orig_new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % orig_new_name + name_map[names[name][0]] = orig_new_name + names[name] = ( names[name][0], True ) + name = new_name + else: + names[name] = ( node_id, False ) name_map[ node_id ] = name with open( os.path.join( db_path, "taxonomy/nodes.dmp" ) ) as fh: @@ -105,7 +122,7 @@ rank = RANK_NAME_TO_INTS.get( fields[2].lower(), None ) if rank is None: # This should never happen, unless new taxonomy ranks are created - print >> sys.stderr, 'Unrecognized rank: Node "%s" is "%s", setting to "%s"' % ( node_id, fields[2], NO_RANK_NAME ) + print( 'Unrecognized rank: Node "%s" is "%s", setting to "%s"' % ( node_id, fields[2], NO_RANK_NAME ), file=sys.stderr ) rank = NO_RANK_INT if node_id == '1': parent_id = '0' @@ -125,8 +142,6 @@ def dfs_report( node, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None ): - if not options.summation and ( not options.show_zeros and node not in hit_taxa ): - return rank_int = rank_map[node] code = RANK_INT_TO_CODE.get( rank_int, NO_RANK_CODE ) if ( code != NO_RANK_CODE or options.intermediate ) and ( options.show_zeros or node in hit_taxa): @@ -210,10 +225,10 @@ parser.add_option( '', '--output-tree', dest='output_tree', action='store', type="string", default=None, help='Name of output file to place newick tree' ) (options, args) = parser.parse_args() if options.version: - print >> sys.stderr, "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ ) + print( "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ ), file=sys.stderr ) sys.exit() if not args: - print >> sys.stderr, parser.get_usage() + print( parser.get_usage(), file=sys.stderr ) sys.exit() if options.cluster:
--- a/kraken_taxonomy_report.xml Wed Jun 01 17:25:40 2016 -0400 +++ b/kraken_taxonomy_report.xml Wed Nov 23 03:27:33 2016 -0500 @@ -1,5 +1,5 @@ <?xml version="1.0"?> -<tool id="kraken_taxonomy_report" name="Kraken taxonomic report" version="0.0.1"> +<tool id="kraken_taxonomy_report" name="Kraken taxonomic report" version="0.0.2"> <description>view report of classification for multiple samples</description> <requirements> <requirement type="package" version="1.66">biopython</requirement> @@ -62,7 +62,7 @@ <param checked="True" falsevalue="" argument="--sanitize-names" label="Sanitize Names" name="sanitize_names" truevalue="--sanitize-names" type="boolean" help="Replace special chars (\t| |\||\.;) with underscore (_)" /> <param checked="False" falsevalue="" argument="--show-rank" label="Output Rank Name in (second to) last column" name="show_rank" truevalue="--show-rank" type="boolean" /> <param checked="False" falsevalue="" argument="--taxonomy" label="Output taxonomy in last column" name="taxonomy" truevalue="--taxonomy" type="boolean" /> - <param checked="False" falsevalue="" argument="--intermediate" label="Display intermediate ranks" name="intermediate" truevalue="--intermediate" type="boolean" /> + <param checked="True" falsevalue="" argument="--intermediate" label="Display intermediate ranks" name="intermediate" truevalue="--intermediate" type="boolean" /> <conditional name="cluster"> <param argument="--cluster" help="Combines rows under the selected taxon and reports only selected rank." label="Cluster by taxonomic rank" name="cluster" type="select"> <option value="Superkingdom">Superkingdom</option>