Repository 'kraken_taxonomy_report'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/kraken_taxonomy_report

Changeset 4:27d65c78863c (2022-03-20)
Previous changeset 3:b11b3ac48bb9 (2018-10-30) Next changeset 5:bfae8da6082c (2024-03-12)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
modified:
kraken_taxonomy_report.py
kraken_taxonomy_report.xml
b
diff -r b11b3ac48bb9 -r 27d65c78863c kraken_taxonomy_report.py
--- a/kraken_taxonomy_report.py Tue Oct 30 18:58:54 2018 -0400
+++ b/kraken_taxonomy_report.py Sun Mar 20 16:39:48 2022 +0000
[
b'@@ -21,130 +21,132 @@\n # cat nodes.dmp | cut -f 5 | sort | uniq\n # "root" is added manually\n NO_RANK_NAME = "no rank"\n-RANK_NAMES = [ NO_RANK_NAME,\n-               "root",\n-               "superkingdom",\n-               "kingdom",\n-               "subkingdom",\n-               "superphylum",\n-               "phylum",\n-               "subphylum",\n-               "superclass",\n-               "class",\n-               "subclass",\n-               "infraclass",\n-               "superorder",\n-               "order",\n-               "suborder",\n-               "infraorder",\n-               "parvorder",\n-               "superfamily",\n-               "family",\n-               "subfamily",\n-               "tribe",\n-               "subtribe",\n-               "genus",\n-               "subgenus",\n-               "species group",\n-               "species subgroup",\n-               "species",\n-               "subspecies",\n-               "varietas",\n-               "forma" ]\n+RANK_NAMES = [\n+    NO_RANK_NAME,\n+    "root",\n+    "superkingdom",\n+    "kingdom",\n+    "subkingdom",\n+    "superphylum",\n+    "phylum",\n+    "subphylum",\n+    "superclass",\n+    "class",\n+    "subclass",\n+    "infraclass",\n+    "superorder",\n+    "order",\n+    "suborder",\n+    "infraorder",\n+    "parvorder",\n+    "superfamily",\n+    "family",\n+    "subfamily",\n+    "tribe",\n+    "subtribe",\n+    "genus",\n+    "subgenus",\n+    "species group",\n+    "species subgroup",\n+    "species",\n+    "subspecies",\n+    "varietas",\n+    "forma"\n+]\n # NB: We put \'no rank\' at top of list for generating trees, due to e.g.\n # root (root) -> cellular organisms (no rank) -> bacteria (superkingdom)\n \n-RANK_NAME_TO_INTS = dict( [ (y, x) for (x, y) in enumerate( RANK_NAMES ) ] )\n-RANK_NAMES_INTS = range( len( RANK_NAMES ) )\n+RANK_NAME_TO_INTS = dict([(y, x) for (x, y) in enumerate(RANK_NAMES)])\n+RANK_NAMES_INTS = range(len(RANK_NAMES))\n \n-NO_RANK_INT = RANK_NAMES.index( NO_RANK_NAME )\n+NO_RANK_INT = RANK_NAMES.index(NO_RANK_NAME)\n NO_RANK_CODE = \'n\'\n \n-PRIMARY_RANK_NAMES = [ \'species\', \'genus\', \'family\', \'order\', \'class\', \'phylum\', \'kingdom\' ]\n+PRIMARY_RANK_NAMES = [\'species\', \'genus\', \'family\', \'order\', \'class\', \'phylum\', \'kingdom\']\n RANK_INT_TO_CODE = {}\n for name in PRIMARY_RANK_NAMES:\n-    RANK_INT_TO_CODE[ RANK_NAMES.index( name ) ] = name[0]\n-RANK_INT_TO_CODE[ RANK_NAMES.index( \'superkingdom\' ) ] = \'d\'\n-PRIMARY_RANK_NAMES.append( \'superkingdom\' )\n+    RANK_INT_TO_CODE[RANK_NAMES.index(name)] = name[0]\n+RANK_INT_TO_CODE[RANK_NAMES.index(\'superkingdom\')] = \'d\'\n+PRIMARY_RANK_NAMES.append(\'superkingdom\')\n \n NAME_STUB = "%s__%s"\n NAME_RE = re.compile(r"(\\t| |\\||\\.;)")\n NAME_REPL = "_"\n \n \n-def get_kraken_db_path( db ):\n-    assert db, ValueError( "You must provide a kraken database" )\n-    k_db_path = os.getenv(\'KRAKEN_DB_PATH\', None )\n+def get_kraken_db_path(db):\n+    assert db, ValueError("You must provide a kraken database")\n+    k_db_path = os.getenv(\'KRAKEN_DB_PATH\', None)\n     if k_db_path:\n-        db = os.path.join( k_db_path, db )\n+        db = os.path.join(k_db_path, db)\n     return db\n \n \n-def load_taxonomy( db_path, sanitize_names=False ):\n+def load_taxonomy(db_path, sanitize_names=False):\n     child_lists = {}\n     name_map = {}\n     rank_map = {}\n     names = {}  # Store names here to look for duplicates (id, True/False name fixed)\n-    with open( os.path.join( db_path, "taxonomy/names.dmp" ) ) as fh:\n+    with open(os.path.join(db_path, "taxonomy/names.dmp")) as fh:\n         for line in fh:\n-            line = line.rstrip( "\\n\\r" )\n-            if line.endswith( "\\t|" ):\n+            line = line.rstrip("\\n\\r")\n+            if line.endswith("\\t|"):\n                 line = line[:-2]\n-            fields = line.split( "\\t|\\t" )\n+            fields = line.split("\\t|\\t")\n             node_id = fields[0]\n             name = fields[1]\n             if sanitize_names:\n-                name = NAME_RE.sub( NAME_REPL, name )\n+                name = NAME_RE.sub(NAME_REPL, name)\n          '..b'gs()\n     if options.version:\n-        print( "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ ), file=sys.stderr )\n+        print("Kraken Taxonomy Report (%s) version %s" % (__URL__, __VERSION__), file=sys.stderr)\n         sys.exit()\n     if not args:\n-        print( parser.get_usage(), file=sys.stderr )\n+        print(parser.get_usage(), file=sys.stderr)\n         sys.exit()\n \n     if options.cluster:\n         cluster_name = options.cluster.lower()\n-        cluster = RANK_NAME_TO_INTS.get( cluster_name, None )\n-        assert cluster is not None, ValueError( \'"%s" is not a valid rank for clustering.\' % options.cluster )\n+        cluster = RANK_NAME_TO_INTS.get(cluster_name, None)\n+        assert cluster is not None, ValueError(\'"%s" is not a valid rank for clustering.\' % options.cluster)\n         if cluster_name not in PRIMARY_RANK_NAMES:\n-            assert options.intermediate, ValueError( \'You cannot cluster by "%s", unless you enable intermediate ranks.\' % options.cluster )\n-        ranks_to_report = [ cluster ]\n+            assert options.intermediate, ValueError(\'You cannot cluster by "%s", unless you enable intermediate ranks.\' % options.cluster)\n+        ranks_to_report = [cluster]\n         options.cluster = cluster\n         # When clustering we need to do summatation\n         options.summation = True\n@@ -250,43 +252,43 @@\n     else:\n         output_fh = sys.stdout\n \n-    db_path = get_kraken_db_path( options.db )\n-    ( child_lists, name_map, rank_map ) = load_taxonomy( db_path, sanitize_names=options.sanitize_names )\n+    db_path = get_kraken_db_path(options.db)\n+    (child_lists, name_map, rank_map) = load_taxonomy(db_path, sanitize_names=options.sanitize_names)\n     file_data = []\n     hit_taxa = []\n     for input_filename in args:\n         taxo_counts = {}\n-        with open( input_filename ) as fh:\n+        with open(input_filename) as fh:\n             for line in fh:\n-                fields = line.split( "\\t" )\n-                taxo_counts[ fields[2] ] = taxo_counts.get( fields[2], 0 ) + 1\n+                fields = line.split("\\t")\n+                taxo_counts[fields[2]] = taxo_counts.get(fields[2], 0) + 1\n         clade_counts = taxo_counts.copy()  # fixme remove copying?\n         if options.summation:\n-            dfs_summation( \'1\', clade_counts, child_lists )\n+            dfs_summation(\'1\', clade_counts, child_lists)\n         for key, value in clade_counts.items():\n             if value and key not in hit_taxa:\n-                hit_taxa.append( key )\n-        file_data.append( clade_counts )\n+                hit_taxa.append(key)\n+        file_data.append(clade_counts)\n \n     if options.header_line:\n-        output_fh.write( "#ID\\t" )\n-        output_fh.write( "\\t".join( args ) )\n+        output_fh.write("#ID\\t")\n+        output_fh.write("\\t".join(args))\n         if options.show_rank:\n-            output_fh.write( "\\trank" )\n+            output_fh.write("\\trank")\n         if options.taxonomy:\n-            output_fh.write( "\\ttaxonomy" )\n-        output_fh.write( \'\\n\' )\n+            output_fh.write("\\ttaxonomy")\n+        output_fh.write(\'\\n\')\n \n-    output_lines = dict( [ ( x, [] ) for x in RANK_NAMES_INTS ] )\n-    dfs_report( \'1\', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None )\n+    output_lines = dict([(x, []) for x in RANK_NAMES_INTS])\n+    dfs_report(\'1\', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None)\n \n     for rank_int in ranks_to_report:\n-        for line in output_lines.get( rank_int, [] ):\n-            output_fh.write( line )\n-            output_fh.write( \'\\n\' )\n+        for line in output_lines.get(rank_int, []):\n+            output_fh.write(line)\n+            output_fh.write(\'\\n\')\n     fh.close()\n     if options.output_tree:\n-        write_tree( child_lists, name_map, rank_map, options )\n+        write_tree(child_lists, name_map, rank_map, options)\n \n \n if __name__ == "__main__":\n'
b
diff -r b11b3ac48bb9 -r 27d65c78863c kraken_taxonomy_report.xml
--- a/kraken_taxonomy_report.xml Tue Oct 30 18:58:54 2018 -0400
+++ b/kraken_taxonomy_report.xml Sun Mar 20 16:39:48 2022 +0000
[
@@ -1,13 +1,8 @@
-<?xml version="1.0"?>
-<tool id="kraken_taxonomy_report" name="Kraken taxonomic report" version="0.0.2">
+<tool id="kraken_taxonomy_report" name="Kraken taxonomic report" version="0.0.3" profile="16.04">
     <description>view report of classification for multiple samples</description>
     <requirements>
         <requirement type="package" version="1.66">biopython</requirement>
     </requirements>
-    <stdio>
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-    </stdio>
     <version_command>python '${__tool_directory__}/kraken_taxonomy_report.py' --version</version_command>
     <command><![CDATA[
 #for $input_classification in $classification: