Previous changeset 3:b11b3ac48bb9 (2018-10-30) Next changeset 5:bfae8da6082c (2024-03-12) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5" |
modified:
kraken_taxonomy_report.py kraken_taxonomy_report.xml |
b |
diff -r b11b3ac48bb9 -r 27d65c78863c kraken_taxonomy_report.py --- a/kraken_taxonomy_report.py Tue Oct 30 18:58:54 2018 -0400 +++ b/kraken_taxonomy_report.py Sun Mar 20 16:39:48 2022 +0000 |
[ |
b'@@ -21,130 +21,132 @@\n # cat nodes.dmp | cut -f 5 | sort | uniq\n # "root" is added manually\n NO_RANK_NAME = "no rank"\n-RANK_NAMES = [ NO_RANK_NAME,\n- "root",\n- "superkingdom",\n- "kingdom",\n- "subkingdom",\n- "superphylum",\n- "phylum",\n- "subphylum",\n- "superclass",\n- "class",\n- "subclass",\n- "infraclass",\n- "superorder",\n- "order",\n- "suborder",\n- "infraorder",\n- "parvorder",\n- "superfamily",\n- "family",\n- "subfamily",\n- "tribe",\n- "subtribe",\n- "genus",\n- "subgenus",\n- "species group",\n- "species subgroup",\n- "species",\n- "subspecies",\n- "varietas",\n- "forma" ]\n+RANK_NAMES = [\n+ NO_RANK_NAME,\n+ "root",\n+ "superkingdom",\n+ "kingdom",\n+ "subkingdom",\n+ "superphylum",\n+ "phylum",\n+ "subphylum",\n+ "superclass",\n+ "class",\n+ "subclass",\n+ "infraclass",\n+ "superorder",\n+ "order",\n+ "suborder",\n+ "infraorder",\n+ "parvorder",\n+ "superfamily",\n+ "family",\n+ "subfamily",\n+ "tribe",\n+ "subtribe",\n+ "genus",\n+ "subgenus",\n+ "species group",\n+ "species subgroup",\n+ "species",\n+ "subspecies",\n+ "varietas",\n+ "forma"\n+]\n # NB: We put \'no rank\' at top of list for generating trees, due to e.g.\n # root (root) -> cellular organisms (no rank) -> bacteria (superkingdom)\n \n-RANK_NAME_TO_INTS = dict( [ (y, x) for (x, y) in enumerate( RANK_NAMES ) ] )\n-RANK_NAMES_INTS = range( len( RANK_NAMES ) )\n+RANK_NAME_TO_INTS = dict([(y, x) for (x, y) in enumerate(RANK_NAMES)])\n+RANK_NAMES_INTS = range(len(RANK_NAMES))\n \n-NO_RANK_INT = RANK_NAMES.index( NO_RANK_NAME )\n+NO_RANK_INT = RANK_NAMES.index(NO_RANK_NAME)\n NO_RANK_CODE = \'n\'\n \n-PRIMARY_RANK_NAMES = [ \'species\', \'genus\', \'family\', \'order\', \'class\', \'phylum\', \'kingdom\' ]\n+PRIMARY_RANK_NAMES = [\'species\', \'genus\', \'family\', \'order\', \'class\', \'phylum\', \'kingdom\']\n RANK_INT_TO_CODE = {}\n for name in PRIMARY_RANK_NAMES:\n- RANK_INT_TO_CODE[ RANK_NAMES.index( name ) ] = name[0]\n-RANK_INT_TO_CODE[ RANK_NAMES.index( \'superkingdom\' ) ] = \'d\'\n-PRIMARY_RANK_NAMES.append( \'superkingdom\' )\n+ RANK_INT_TO_CODE[RANK_NAMES.index(name)] = name[0]\n+RANK_INT_TO_CODE[RANK_NAMES.index(\'superkingdom\')] = \'d\'\n+PRIMARY_RANK_NAMES.append(\'superkingdom\')\n \n NAME_STUB = "%s__%s"\n NAME_RE = re.compile(r"(\\t| |\\||\\.;)")\n NAME_REPL = "_"\n \n \n-def get_kraken_db_path( db ):\n- assert db, ValueError( "You must provide a kraken database" )\n- k_db_path = os.getenv(\'KRAKEN_DB_PATH\', None )\n+def get_kraken_db_path(db):\n+ assert db, ValueError("You must provide a kraken database")\n+ k_db_path = os.getenv(\'KRAKEN_DB_PATH\', None)\n if k_db_path:\n- db = os.path.join( k_db_path, db )\n+ db = os.path.join(k_db_path, db)\n return db\n \n \n-def load_taxonomy( db_path, sanitize_names=False ):\n+def load_taxonomy(db_path, sanitize_names=False):\n child_lists = {}\n name_map = {}\n rank_map = {}\n names = {} # Store names here to look for duplicates (id, True/False name fixed)\n- with open( os.path.join( db_path, "taxonomy/names.dmp" ) ) as fh:\n+ with open(os.path.join(db_path, "taxonomy/names.dmp")) as fh:\n for line in fh:\n- line = line.rstrip( "\\n\\r" )\n- if line.endswith( "\\t|" ):\n+ line = line.rstrip("\\n\\r")\n+ if line.endswith("\\t|"):\n line = line[:-2]\n- fields = line.split( "\\t|\\t" )\n+ fields = line.split("\\t|\\t")\n node_id = fields[0]\n name = fields[1]\n if sanitize_names:\n- name = NAME_RE.sub( NAME_REPL, name )\n+ name = NAME_RE.sub(NAME_REPL, name)\n '..b'gs()\n if options.version:\n- print( "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ ), file=sys.stderr )\n+ print("Kraken Taxonomy Report (%s) version %s" % (__URL__, __VERSION__), file=sys.stderr)\n sys.exit()\n if not args:\n- print( parser.get_usage(), file=sys.stderr )\n+ print(parser.get_usage(), file=sys.stderr)\n sys.exit()\n \n if options.cluster:\n cluster_name = options.cluster.lower()\n- cluster = RANK_NAME_TO_INTS.get( cluster_name, None )\n- assert cluster is not None, ValueError( \'"%s" is not a valid rank for clustering.\' % options.cluster )\n+ cluster = RANK_NAME_TO_INTS.get(cluster_name, None)\n+ assert cluster is not None, ValueError(\'"%s" is not a valid rank for clustering.\' % options.cluster)\n if cluster_name not in PRIMARY_RANK_NAMES:\n- assert options.intermediate, ValueError( \'You cannot cluster by "%s", unless you enable intermediate ranks.\' % options.cluster )\n- ranks_to_report = [ cluster ]\n+ assert options.intermediate, ValueError(\'You cannot cluster by "%s", unless you enable intermediate ranks.\' % options.cluster)\n+ ranks_to_report = [cluster]\n options.cluster = cluster\n # When clustering we need to do summatation\n options.summation = True\n@@ -250,43 +252,43 @@\n else:\n output_fh = sys.stdout\n \n- db_path = get_kraken_db_path( options.db )\n- ( child_lists, name_map, rank_map ) = load_taxonomy( db_path, sanitize_names=options.sanitize_names )\n+ db_path = get_kraken_db_path(options.db)\n+ (child_lists, name_map, rank_map) = load_taxonomy(db_path, sanitize_names=options.sanitize_names)\n file_data = []\n hit_taxa = []\n for input_filename in args:\n taxo_counts = {}\n- with open( input_filename ) as fh:\n+ with open(input_filename) as fh:\n for line in fh:\n- fields = line.split( "\\t" )\n- taxo_counts[ fields[2] ] = taxo_counts.get( fields[2], 0 ) + 1\n+ fields = line.split("\\t")\n+ taxo_counts[fields[2]] = taxo_counts.get(fields[2], 0) + 1\n clade_counts = taxo_counts.copy() # fixme remove copying?\n if options.summation:\n- dfs_summation( \'1\', clade_counts, child_lists )\n+ dfs_summation(\'1\', clade_counts, child_lists)\n for key, value in clade_counts.items():\n if value and key not in hit_taxa:\n- hit_taxa.append( key )\n- file_data.append( clade_counts )\n+ hit_taxa.append(key)\n+ file_data.append(clade_counts)\n \n if options.header_line:\n- output_fh.write( "#ID\\t" )\n- output_fh.write( "\\t".join( args ) )\n+ output_fh.write("#ID\\t")\n+ output_fh.write("\\t".join(args))\n if options.show_rank:\n- output_fh.write( "\\trank" )\n+ output_fh.write("\\trank")\n if options.taxonomy:\n- output_fh.write( "\\ttaxonomy" )\n- output_fh.write( \'\\n\' )\n+ output_fh.write("\\ttaxonomy")\n+ output_fh.write(\'\\n\')\n \n- output_lines = dict( [ ( x, [] ) for x in RANK_NAMES_INTS ] )\n- dfs_report( \'1\', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None )\n+ output_lines = dict([(x, []) for x in RANK_NAMES_INTS])\n+ dfs_report(\'1\', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None)\n \n for rank_int in ranks_to_report:\n- for line in output_lines.get( rank_int, [] ):\n- output_fh.write( line )\n- output_fh.write( \'\\n\' )\n+ for line in output_lines.get(rank_int, []):\n+ output_fh.write(line)\n+ output_fh.write(\'\\n\')\n fh.close()\n if options.output_tree:\n- write_tree( child_lists, name_map, rank_map, options )\n+ write_tree(child_lists, name_map, rank_map, options)\n \n \n if __name__ == "__main__":\n' |
b |
diff -r b11b3ac48bb9 -r 27d65c78863c kraken_taxonomy_report.xml --- a/kraken_taxonomy_report.xml Tue Oct 30 18:58:54 2018 -0400 +++ b/kraken_taxonomy_report.xml Sun Mar 20 16:39:48 2022 +0000 |
[ |
@@ -1,13 +1,8 @@ -<?xml version="1.0"?> -<tool id="kraken_taxonomy_report" name="Kraken taxonomic report" version="0.0.2"> +<tool id="kraken_taxonomy_report" name="Kraken taxonomic report" version="0.0.3" profile="16.04"> <description>view report of classification for multiple samples</description> <requirements> <requirement type="package" version="1.66">biopython</requirement> </requirements> - <stdio> - <exit_code range="1:" /> - <exit_code range=":-1" /> - </stdio> <version_command>python '${__tool_directory__}/kraken_taxonomy_report.py' --version</version_command> <command><![CDATA[ #for $input_classification in $classification: |