Mercurial > repos > iuc > kraken_taxonomy_report
annotate kraken_taxonomy_report.py @ 5:bfae8da6082c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 5b5fa1a3d9492fbbf01aa6ddceca9d4525c7cba5
author | iuc |
---|---|
date | Tue, 12 Mar 2024 09:52:55 +0000 |
parents | 27d65c78863c |
children |
rev | line source |
---|---|
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
2 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
3 # Reports a summary of Kraken's results |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
4 # and optionally creates a newick Tree |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
5 # Copyright (c) 2016 Daniel Blankenberg |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
6 # Licensed under the Academic Free License version 3.0 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
7 # https://github.com/blankenberg/Kraken-Taxonomy-Report |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
8 |
1
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
9 from __future__ import print_function |
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
10 |
2
528a1d91b066
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents:
1
diff
changeset
|
11 import optparse |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
12 import os |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
13 import re |
2
528a1d91b066
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents:
1
diff
changeset
|
14 import sys |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
15 |
1
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
16 __VERSION__ = '0.0.2' |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
17 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
18 __URL__ = "https://github.com/blankenberg/Kraken-Taxonomy-Report" |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
19 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
20 # Rank names were pulled from ncbi nodes.dmp on 02/02/2016 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
21 # cat nodes.dmp | cut -f 5 | sort | uniq |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
22 # "root" is added manually |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
23 NO_RANK_NAME = "no rank" |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
24 RANK_NAMES = [ |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
25 NO_RANK_NAME, |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
26 "root", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
27 "superkingdom", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
28 "kingdom", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
29 "subkingdom", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
30 "superphylum", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
31 "phylum", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
32 "subphylum", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
33 "superclass", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
34 "class", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
35 "subclass", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
36 "infraclass", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
37 "superorder", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
38 "order", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
39 "suborder", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
40 "infraorder", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
41 "parvorder", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
42 "superfamily", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
43 "family", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
44 "subfamily", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
45 "tribe", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
46 "subtribe", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
47 "genus", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
48 "subgenus", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
49 "species group", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
50 "species subgroup", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
51 "species", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
52 "subspecies", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
53 "varietas", |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
54 "forma" |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
55 ] |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
56 # NB: We put 'no rank' at top of list for generating trees, due to e.g. |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
57 # root (root) -> cellular organisms (no rank) -> bacteria (superkingdom) |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
58 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
59 RANK_NAME_TO_INTS = dict([(y, x) for (x, y) in enumerate(RANK_NAMES)]) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
60 RANK_NAMES_INTS = range(len(RANK_NAMES)) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
61 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
62 NO_RANK_INT = RANK_NAMES.index(NO_RANK_NAME) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
63 NO_RANK_CODE = 'n' |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
64 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
65 PRIMARY_RANK_NAMES = ['species', 'genus', 'family', 'order', 'class', 'phylum', 'kingdom'] |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
66 RANK_INT_TO_CODE = {} |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
67 for name in PRIMARY_RANK_NAMES: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
68 RANK_INT_TO_CODE[RANK_NAMES.index(name)] = name[0] |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
69 RANK_INT_TO_CODE[RANK_NAMES.index('superkingdom')] = 'd' |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
70 PRIMARY_RANK_NAMES.append('superkingdom') |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
71 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
72 NAME_STUB = "%s__%s" |
3
b11b3ac48bb9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents:
2
diff
changeset
|
73 NAME_RE = re.compile(r"(\t| |\||\.;)") |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
74 NAME_REPL = "_" |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
75 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
76 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
77 def get_kraken_db_path(db): |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
78 assert db, ValueError("You must provide a kraken database") |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
79 k_db_path = os.getenv('KRAKEN_DB_PATH', None) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
80 if k_db_path: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
81 db = os.path.join(k_db_path, db) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
82 return db |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
83 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
84 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
85 def load_taxonomy(db_path, sanitize_names=False): |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
86 child_lists = {} |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
87 name_map = {} |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
88 rank_map = {} |
1
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
89 names = {} # Store names here to look for duplicates (id, True/False name fixed) |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
90 with open(os.path.join(db_path, "taxonomy/names.dmp")) as fh: |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
91 for line in fh: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
92 line = line.rstrip("\n\r") |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
93 if line.endswith("\t|"): |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
94 line = line[:-2] |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
95 fields = line.split("\t|\t") |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
96 node_id = fields[0] |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
97 name = fields[1] |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
98 if sanitize_names: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
99 name = NAME_RE.sub(NAME_REPL, name) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
100 name_type = fields[3] |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
101 if name_type == "scientific name": |
1
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
102 if name in names: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
103 print('Warning: name "%s" found at node "%s" but already exists originally for node "%s".' % (name, node_id, names[name][0]), file=sys.stderr) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
104 new_name = "%s_%s" % (name, node_id) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
105 print('Transforming node "%s" named "%s" to "%s".' % (node_id, name, new_name), file=sys.stderr) |
1
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
106 assert new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % new_name |
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
107 if not names[name][1]: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
108 orig_new_name = "%s_%s" % (name, names[name][0]) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
109 print('Transforming node "%s" named "%s" to "%s".' % (names[name][0], name, orig_new_name), file=sys.stderr) |
1
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
110 assert orig_new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % orig_new_name |
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
111 name_map[names[name][0]] = orig_new_name |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
112 names[name] = (names[name][0], True) |
1
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
113 name = new_name |
b97694b21bc3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents:
0
diff
changeset
|
114 else: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
115 names[name] = (node_id, False) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
116 name_map[node_id] = name |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
117 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
118 with open(os.path.join(db_path, "taxonomy/nodes.dmp")) as fh: |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
119 for line in fh: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
120 line = line.rstrip("\n\r") |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
121 fields = line.split("\t|\t") |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
122 node_id = fields[0] |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
123 parent_id = fields[1] |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
124 rank = RANK_NAME_TO_INTS.get(fields[2].lower(), None) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
125 if rank is None: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
126 # This should never happen, unless new taxonomy ranks are created |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
127 print('Unrecognized rank: Node "%s" is "%s", setting to "%s"' % (node_id, fields[2], NO_RANK_NAME), file=sys.stderr) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
128 rank = NO_RANK_INT |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
129 if node_id == '1': |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
130 parent_id = '0' |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
131 if parent_id not in child_lists: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
132 child_lists[parent_id] = [] |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
133 child_lists[parent_id].append(node_id) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
134 rank_map[node_id] = rank |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
135 return (child_lists, name_map, rank_map) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
136 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
137 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
138 def dfs_summation(node, counts, child_lists): |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
139 children = child_lists.get(node, None) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
140 if children: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
141 for child in children: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
142 dfs_summation(child, counts, child_lists) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
143 counts[node] = counts.get(node, 0) + counts.get(child, 0) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
144 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
145 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
146 def dfs_report(node, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None): |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
147 rank_int = rank_map[node] |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
148 code = RANK_INT_TO_CODE.get(rank_int, NO_RANK_CODE) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
149 if (code != NO_RANK_CODE or options.intermediate) and (options.show_zeros or node in hit_taxa): |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
150 if name is None: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
151 name = "" |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
152 else: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
153 name = "%s|" % name |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
154 if tax is None: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
155 tax = '' |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
156 else: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
157 tax = "%s;" % tax |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
158 sanitized_name = name_map[node] |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
159 name_stub = NAME_STUB % (code, sanitized_name) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
160 name = name + name_stub |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
161 tax = tax + name_stub |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
162 if options.name_id: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
163 output = node |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
164 elif options.name_long: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
165 output = name |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
166 else: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
167 output = sanitized_name |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
168 for val in file_data: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
169 output = "%s\t%i" % (output, val.get(node, 0)) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
170 if options.show_rank: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
171 output = "%s\t%s" % (output, RANK_NAMES[rank_int]) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
172 if options.taxonomy: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
173 output = "%s\t%s" % (output, tax) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
174 output_lines[rank_int].append(output) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
175 children = child_lists.get(node) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
176 if children: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
177 for child in children: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
178 dfs_report(child, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=name, tax=tax) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
179 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
180 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
181 def write_tree(child_lists, name_map, rank_map, options, branch_length=1): |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
182 # Uses Biopython, only load if making tree |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
183 import Bio.Phylo |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
184 from Bio.Phylo import BaseTree |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
185 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
186 def _get_name(node_id): |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
187 if options.name_id: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
188 return node_id |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
189 return name_map[node_id] |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
190 nodes = {} |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
191 root_node_id = child_lists["0"][0] |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
192 nodes[root_node_id] = BaseTree.Clade(name=_get_name(root_node_id), branch_length=branch_length) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
193 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
194 def recurse_children(parent_id): |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
195 if options.cluster is not None and rank_map[parent_id] == options.cluster: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
196 # Short circuit if we found our rank, prevents 'hanging' no ranks from being output |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
197 # e.g. clustering by "species" (Escherichia coli), but have "no rank" below (Escherichia coli K-12) in test_db |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
198 return |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
199 if parent_id not in nodes: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
200 nodes[parent_id] = BaseTree.Clade(name=_get_name(parent_id), branch_length=branch_length) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
201 for child_id in child_lists.get(parent_id, []): |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
202 if options.cluster is None or (rank_map[child_id] <= options.cluster): |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
203 if child_id not in nodes: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
204 nodes[child_id] = BaseTree.Clade(name=_get_name(child_id), branch_length=branch_length) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
205 nodes[parent_id].clades.append(nodes[child_id]) |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
206 recurse_children(child_id) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
207 recurse_children(root_node_id) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
208 tree = BaseTree.Tree(root=nodes[root_node_id]) |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
209 Bio.Phylo.write([tree], options.output_tree, 'newick') |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
210 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
211 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
212 def __main__(): |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
213 parser = optparse.OptionParser(usage="%prog [options] file1 file...fileN") |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
214 parser.add_option('-v', '--version', dest='version', action='store_true', default=False, help='print version and exit') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
215 parser.add_option('', '--show-zeros', dest='show_zeros', action='store_true', default=False, help='Show empty nodes') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
216 parser.add_option('', '--header-line', dest='header_line', action='store_true', default=False, help='Provide a header on output') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
217 parser.add_option('', '--intermediate', dest='intermediate', action='store_true', default=False, help='Intermediate Ranks') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
218 parser.add_option('', '--name-id', dest='name_id', action='store_true', default=False, help='Use Taxa ID instead of Name') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
219 parser.add_option('', '--name-long', dest='name_long', action='store_true', default=False, help='Use Long taxa ID instead of base name') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
220 parser.add_option('', '--taxonomy', dest='taxonomy', action='store_true', default=False, help='Output taxonomy in last column') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
221 parser.add_option('', '--cluster', dest='cluster', action='store', type="string", default=None, help='Cluster counts to specified rank') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
222 parser.add_option('', '--summation', dest='summation', action='store_true', default=False, help='Add summation of child counts to each taxa') |
3
b11b3ac48bb9
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit a4b0969b33a68a0ea9ba12291f6694aec24f13ed
iuc
parents:
2
diff
changeset
|
223 parser.add_option('', '--sanitize-names', dest='sanitize_names', action='store_true', default=False, help=r'Replace special chars (\t| |\||\.;) with underscore (_)') |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
224 parser.add_option('', '--show-rank', dest='show_rank', action='store_true', default=False, help='Output column with Rank name') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
225 parser.add_option('', '--db', dest='db', action='store', type="string", default=None, help='Name of Kraken database') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
226 parser.add_option('', '--output', dest='output', action='store', type="string", default=None, help='Name of output file') |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
227 parser.add_option('', '--output-tree', dest='output_tree', action='store', type="string", default=None, help='Name of output file to place newick tree') |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
228 (options, args) = parser.parse_args() |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
229 if options.version: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
230 print("Kraken Taxonomy Report (%s) version %s" % (__URL__, __VERSION__), file=sys.stderr) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
231 sys.exit() |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
232 if not args: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
233 print(parser.get_usage(), file=sys.stderr) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
234 sys.exit() |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
235 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
236 if options.cluster: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
237 cluster_name = options.cluster.lower() |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
238 cluster = RANK_NAME_TO_INTS.get(cluster_name, None) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
239 assert cluster is not None, ValueError('"%s" is not a valid rank for clustering.' % options.cluster) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
240 if cluster_name not in PRIMARY_RANK_NAMES: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
241 assert options.intermediate, ValueError('You cannot cluster by "%s", unless you enable intermediate ranks.' % options.cluster) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
242 ranks_to_report = [cluster] |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
243 options.cluster = cluster |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
244 # When clustering we need to do summatation |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
245 options.summation = True |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
246 else: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
247 options.cluster = None # make empty string into None |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
248 ranks_to_report = RANK_NAMES_INTS |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
249 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
250 if options.output: |
2
528a1d91b066
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents:
1
diff
changeset
|
251 output_fh = open(options.output, 'w') |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
252 else: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
253 output_fh = sys.stdout |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
254 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
255 db_path = get_kraken_db_path(options.db) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
256 (child_lists, name_map, rank_map) = load_taxonomy(db_path, sanitize_names=options.sanitize_names) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
257 file_data = [] |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
258 hit_taxa = [] |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
259 for input_filename in args: |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
260 taxo_counts = {} |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
261 with open(input_filename) as fh: |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
262 for line in fh: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
263 fields = line.split("\t") |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
264 taxo_counts[fields[2]] = taxo_counts.get(fields[2], 0) + 1 |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
265 clade_counts = taxo_counts.copy() # fixme remove copying? |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
266 if options.summation: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
267 dfs_summation('1', clade_counts, child_lists) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
268 for key, value in clade_counts.items(): |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
269 if value and key not in hit_taxa: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
270 hit_taxa.append(key) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
271 file_data.append(clade_counts) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
272 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
273 if options.header_line: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
274 output_fh.write("#ID\t") |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
275 output_fh.write("\t".join(args)) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
276 if options.show_rank: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
277 output_fh.write("\trank") |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
278 if options.taxonomy: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
279 output_fh.write("\ttaxonomy") |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
280 output_fh.write('\n') |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
281 |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
282 output_lines = dict([(x, []) for x in RANK_NAMES_INTS]) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
283 dfs_report('1', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
284 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
285 for rank_int in ranks_to_report: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
286 for line in output_lines.get(rank_int, []): |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
287 output_fh.write(line) |
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
288 output_fh.write('\n') |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
289 fh.close() |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
290 if options.output_tree: |
4
27d65c78863c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
iuc
parents:
3
diff
changeset
|
291 write_tree(child_lists, name_map, rank_map, options) |
0
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
292 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
293 |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
294 if __name__ == "__main__": |
3f1a0d47ea8d
planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff
changeset
|
295 __main__() |