annotate kraken_taxonomy_report.py @ 4:27d65c78863c draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
author iuc
date Sun, 20 Mar 2022 16:39:48 +0000
parents b11b3ac48bb9
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
1 #!/usr/bin/env python
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
2
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
3 # Reports a summary of Kraken's results
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
4 # and optionally creates a newick Tree
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
5 # Copyright (c) 2016 Daniel Blankenberg
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
6 # Licensed under the Academic Free License version 3.0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
7 # https://github.com/blankenberg/Kraken-Taxonomy-Report
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
8
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
9 from __future__ import print_function
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
10
2
528a1d91b066 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 1
diff changeset
11 import optparse
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
12 import os
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
13 import re
2
528a1d91b066 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 1
diff changeset
14 import sys
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
15
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
16 __VERSION__ = '0.0.2'
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
17
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
18 __URL__ = "https://github.com/blankenberg/Kraken-Taxonomy-Report"
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
19
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
20 # Rank names were pulled from ncbi nodes.dmp on 02/02/2016
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
21 # cat nodes.dmp | cut -f 5 | sort | uniq
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
22 # "root" is added manually
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
23 NO_RANK_NAME = "no rank"
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
24 RANK_NAMES = [
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
25 NO_RANK_NAME,
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
26 "root",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
27 "superkingdom",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
28 "kingdom",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
29 "subkingdom",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
30 "superphylum",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
31 "phylum",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
32 "subphylum",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
33 "superclass",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
34 "class",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
35 "subclass",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
36 "infraclass",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
37 "superorder",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
38 "order",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
39 "suborder",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
40 "infraorder",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
41 "parvorder",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
42 "superfamily",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
43 "family",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
44 "subfamily",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
45 "tribe",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
46 "subtribe",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
47 "genus",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
48 "subgenus",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
49 "species group",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
50 "species subgroup",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
51 "species",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
52 "subspecies",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
53 "varietas",
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
54 "forma"
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
55 ]
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
56 # NB: We put 'no rank' at top of list for generating trees, due to e.g.
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
57 # root (root) -> cellular organisms (no rank) -> bacteria (superkingdom)
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
58
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
59 RANK_NAME_TO_INTS = dict([(y, x) for (x, y) in enumerate(RANK_NAMES)])
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
60 RANK_NAMES_INTS = range(len(RANK_NAMES))
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
61
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
62 NO_RANK_INT = RANK_NAMES.index(NO_RANK_NAME)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
63 NO_RANK_CODE = 'n'
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
64
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
65 PRIMARY_RANK_NAMES = ['species', 'genus', 'family', 'order', 'class', 'phylum', 'kingdom']
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
66 RANK_INT_TO_CODE = {}
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
67 for name in PRIMARY_RANK_NAMES:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
68 RANK_INT_TO_CODE[RANK_NAMES.index(name)] = name[0]
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
69 RANK_INT_TO_CODE[RANK_NAMES.index('superkingdom')] = 'd'
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
70 PRIMARY_RANK_NAMES.append('superkingdom')
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
71
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
72 NAME_STUB = "%s__%s"
3
b11b3ac48bb9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents: 2
diff changeset
73 NAME_RE = re.compile(r"(\t| |\||\.;)")
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
74 NAME_REPL = "_"
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
75
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
76
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
77 def get_kraken_db_path(db):
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
78 assert db, ValueError("You must provide a kraken database")
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
79 k_db_path = os.getenv('KRAKEN_DB_PATH', None)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
80 if k_db_path:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
81 db = os.path.join(k_db_path, db)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
82 return db
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
83
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
84
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
85 def load_taxonomy(db_path, sanitize_names=False):
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
86 child_lists = {}
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
87 name_map = {}
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
88 rank_map = {}
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
89 names = {} # Store names here to look for duplicates (id, True/False name fixed)
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
90 with open(os.path.join(db_path, "taxonomy/names.dmp")) as fh:
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
91 for line in fh:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
92 line = line.rstrip("\n\r")
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
93 if line.endswith("\t|"):
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
94 line = line[:-2]
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
95 fields = line.split("\t|\t")
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
96 node_id = fields[0]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
97 name = fields[1]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
98 if sanitize_names:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
99 name = NAME_RE.sub(NAME_REPL, name)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
100 name_type = fields[3]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
101 if name_type == "scientific name":
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
102 if name in names:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
103 print('Warning: name "%s" found at node "%s" but already exists originally for node "%s".' % (name, node_id, names[name][0]), file=sys.stderr)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
104 new_name = "%s_%s" % (name, node_id)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
105 print('Transforming node "%s" named "%s" to "%s".' % (node_id, name, new_name), file=sys.stderr)
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
106 assert new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % new_name
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
107 if not names[name][1]:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
108 orig_new_name = "%s_%s" % (name, names[name][0])
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
109 print('Transforming node "%s" named "%s" to "%s".' % (names[name][0], name, orig_new_name), file=sys.stderr)
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
110 assert orig_new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % orig_new_name
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
111 name_map[names[name][0]] = orig_new_name
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
112 names[name] = (names[name][0], True)
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
113 name = new_name
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
114 else:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
115 names[name] = (node_id, False)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
116 name_map[node_id] = name
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
117
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
118 with open(os.path.join(db_path, "taxonomy/nodes.dmp")) as fh:
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
119 for line in fh:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
120 line = line.rstrip("\n\r")
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
121 fields = line.split("\t|\t")
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
122 node_id = fields[0]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
123 parent_id = fields[1]
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
124 rank = RANK_NAME_TO_INTS.get(fields[2].lower(), None)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
125 if rank is None:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
126 # This should never happen, unless new taxonomy ranks are created
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
127 print('Unrecognized rank: Node "%s" is "%s", setting to "%s"' % (node_id, fields[2], NO_RANK_NAME), file=sys.stderr)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
128 rank = NO_RANK_INT
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
129 if node_id == '1':
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
130 parent_id = '0'
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
131 if parent_id not in child_lists:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
132 child_lists[parent_id] = []
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
133 child_lists[parent_id].append(node_id)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
134 rank_map[node_id] = rank
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
135 return (child_lists, name_map, rank_map)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
136
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
137
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
138 def dfs_summation(node, counts, child_lists):
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
139 children = child_lists.get(node, None)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
140 if children:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
141 for child in children:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
142 dfs_summation(child, counts, child_lists)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
143 counts[node] = counts.get(node, 0) + counts.get(child, 0)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
144
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
145
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
146 def dfs_report(node, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None):
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
147 rank_int = rank_map[node]
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
148 code = RANK_INT_TO_CODE.get(rank_int, NO_RANK_CODE)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
149 if (code != NO_RANK_CODE or options.intermediate) and (options.show_zeros or node in hit_taxa):
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
150 if name is None:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
151 name = ""
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
152 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
153 name = "%s|" % name
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
154 if tax is None:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
155 tax = ''
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
156 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
157 tax = "%s;" % tax
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
158 sanitized_name = name_map[node]
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
159 name_stub = NAME_STUB % (code, sanitized_name)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
160 name = name + name_stub
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
161 tax = tax + name_stub
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
162 if options.name_id:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
163 output = node
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
164 elif options.name_long:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
165 output = name
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
166 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
167 output = sanitized_name
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
168 for val in file_data:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
169 output = "%s\t%i" % (output, val.get(node, 0))
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
170 if options.show_rank:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
171 output = "%s\t%s" % (output, RANK_NAMES[rank_int])
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
172 if options.taxonomy:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
173 output = "%s\t%s" % (output, tax)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
174 output_lines[rank_int].append(output)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
175 children = child_lists.get(node)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
176 if children:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
177 for child in children:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
178 dfs_report(child, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=name, tax=tax)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
179
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
180
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
181 def write_tree(child_lists, name_map, rank_map, options, branch_length=1):
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
182 # Uses Biopython, only load if making tree
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
183 import Bio.Phylo
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
184 from Bio.Phylo import BaseTree
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
185
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
186 def _get_name(node_id):
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
187 if options.name_id:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
188 return node_id
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
189 return name_map[node_id]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
190 nodes = {}
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
191 root_node_id = child_lists["0"][0]
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
192 nodes[root_node_id] = BaseTree.Clade(name=_get_name(root_node_id), branch_length=branch_length)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
193
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
194 def recurse_children(parent_id):
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
195 if options.cluster is not None and rank_map[parent_id] == options.cluster:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
196 # Short circuit if we found our rank, prevents 'hanging' no ranks from being output
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
197 # e.g. clustering by "species" (Escherichia coli), but have "no rank" below (Escherichia coli K-12) in test_db
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
198 return
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
199 if parent_id not in nodes:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
200 nodes[parent_id] = BaseTree.Clade(name=_get_name(parent_id), branch_length=branch_length)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
201 for child_id in child_lists.get(parent_id, []):
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
202 if options.cluster is None or (rank_map[child_id] <= options.cluster):
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
203 if child_id not in nodes:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
204 nodes[child_id] = BaseTree.Clade(name=_get_name(child_id), branch_length=branch_length)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
205 nodes[parent_id].clades.append(nodes[child_id])
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
206 recurse_children(child_id)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
207 recurse_children(root_node_id)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
208 tree = BaseTree.Tree(root=nodes[root_node_id])
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
209 Bio.Phylo.write([tree], options.output_tree, 'newick')
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
210
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
211
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
212 def __main__():
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
213 parser = optparse.OptionParser(usage="%prog [options] file1 file...fileN")
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
214 parser.add_option('-v', '--version', dest='version', action='store_true', default=False, help='print version and exit')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
215 parser.add_option('', '--show-zeros', dest='show_zeros', action='store_true', default=False, help='Show empty nodes')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
216 parser.add_option('', '--header-line', dest='header_line', action='store_true', default=False, help='Provide a header on output')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
217 parser.add_option('', '--intermediate', dest='intermediate', action='store_true', default=False, help='Intermediate Ranks')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
218 parser.add_option('', '--name-id', dest='name_id', action='store_true', default=False, help='Use Taxa ID instead of Name')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
219 parser.add_option('', '--name-long', dest='name_long', action='store_true', default=False, help='Use Long taxa ID instead of base name')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
220 parser.add_option('', '--taxonomy', dest='taxonomy', action='store_true', default=False, help='Output taxonomy in last column')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
221 parser.add_option('', '--cluster', dest='cluster', action='store', type="string", default=None, help='Cluster counts to specified rank')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
222 parser.add_option('', '--summation', dest='summation', action='store_true', default=False, help='Add summation of child counts to each taxa')
3
b11b3ac48bb9 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents: 2
diff changeset
223 parser.add_option('', '--sanitize-names', dest='sanitize_names', action='store_true', default=False, help=r'Replace special chars (\t| |\||\.;) with underscore (_)')
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
224 parser.add_option('', '--show-rank', dest='show_rank', action='store_true', default=False, help='Output column with Rank name')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
225 parser.add_option('', '--db', dest='db', action='store', type="string", default=None, help='Name of Kraken database')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
226 parser.add_option('', '--output', dest='output', action='store', type="string", default=None, help='Name of output file')
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
227 parser.add_option('', '--output-tree', dest='output_tree', action='store', type="string", default=None, help='Name of output file to place newick tree')
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
228 (options, args) = parser.parse_args()
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
229 if options.version:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
230 print("Kraken Taxonomy Report (%s) version %s" % (__URL__, __VERSION__), file=sys.stderr)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
231 sys.exit()
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
232 if not args:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
233 print(parser.get_usage(), file=sys.stderr)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
234 sys.exit()
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
235
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
236 if options.cluster:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
237 cluster_name = options.cluster.lower()
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
238 cluster = RANK_NAME_TO_INTS.get(cluster_name, None)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
239 assert cluster is not None, ValueError('"%s" is not a valid rank for clustering.' % options.cluster)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
240 if cluster_name not in PRIMARY_RANK_NAMES:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
241 assert options.intermediate, ValueError('You cannot cluster by "%s", unless you enable intermediate ranks.' % options.cluster)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
242 ranks_to_report = [cluster]
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
243 options.cluster = cluster
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
244 # When clustering we need to do summatation
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
245 options.summation = True
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
246 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
247 options.cluster = None # make empty string into None
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
248 ranks_to_report = RANK_NAMES_INTS
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
249
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
250 if options.output:
2
528a1d91b066 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 1
diff changeset
251 output_fh = open(options.output, 'w')
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
252 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
253 output_fh = sys.stdout
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
254
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
255 db_path = get_kraken_db_path(options.db)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
256 (child_lists, name_map, rank_map) = load_taxonomy(db_path, sanitize_names=options.sanitize_names)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
257 file_data = []
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
258 hit_taxa = []
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
259 for input_filename in args:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
260 taxo_counts = {}
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
261 with open(input_filename) as fh:
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
262 for line in fh:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
263 fields = line.split("\t")
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
264 taxo_counts[fields[2]] = taxo_counts.get(fields[2], 0) + 1
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
265 clade_counts = taxo_counts.copy() # fixme remove copying?
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
266 if options.summation:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
267 dfs_summation('1', clade_counts, child_lists)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
268 for key, value in clade_counts.items():
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
269 if value and key not in hit_taxa:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
270 hit_taxa.append(key)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
271 file_data.append(clade_counts)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
272
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
273 if options.header_line:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
274 output_fh.write("#ID\t")
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
275 output_fh.write("\t".join(args))
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
276 if options.show_rank:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
277 output_fh.write("\trank")
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
278 if options.taxonomy:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
279 output_fh.write("\ttaxonomy")
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
280 output_fh.write('\n')
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
281
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
282 output_lines = dict([(x, []) for x in RANK_NAMES_INTS])
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
283 dfs_report('1', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
284
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
285 for rank_int in ranks_to_report:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
286 for line in output_lines.get(rank_int, []):
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
287 output_fh.write(line)
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
288 output_fh.write('\n')
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
289 fh.close()
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
290 if options.output_tree:
4
27d65c78863c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents: 3
diff changeset
291 write_tree(child_lists, name_map, rank_map, options)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
292
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
293
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
294 if __name__ == "__main__":
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
295 __main__()