Mercurial > repos > iuc > kraken_taxonomy_report
comparison kraken_taxonomy_report.py @ 4:27d65c78863c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/kraken_taxonomy_report commit 04943848aa0f6637b56303ec6026dcb475ecb9e5"
author | iuc |
---|---|
date | Sun, 20 Mar 2022 16:39:48 +0000 |
parents | b11b3ac48bb9 |
children |
comparison
equal
deleted
inserted
replaced
3:b11b3ac48bb9 | 4:27d65c78863c |
---|---|
19 | 19 |
20 # Rank names were pulled from ncbi nodes.dmp on 02/02/2016 | 20 # Rank names were pulled from ncbi nodes.dmp on 02/02/2016 |
21 # cat nodes.dmp | cut -f 5 | sort | uniq | 21 # cat nodes.dmp | cut -f 5 | sort | uniq |
22 # "root" is added manually | 22 # "root" is added manually |
23 NO_RANK_NAME = "no rank" | 23 NO_RANK_NAME = "no rank" |
24 RANK_NAMES = [ NO_RANK_NAME, | 24 RANK_NAMES = [ |
25 "root", | 25 NO_RANK_NAME, |
26 "superkingdom", | 26 "root", |
27 "kingdom", | 27 "superkingdom", |
28 "subkingdom", | 28 "kingdom", |
29 "superphylum", | 29 "subkingdom", |
30 "phylum", | 30 "superphylum", |
31 "subphylum", | 31 "phylum", |
32 "superclass", | 32 "subphylum", |
33 "class", | 33 "superclass", |
34 "subclass", | 34 "class", |
35 "infraclass", | 35 "subclass", |
36 "superorder", | 36 "infraclass", |
37 "order", | 37 "superorder", |
38 "suborder", | 38 "order", |
39 "infraorder", | 39 "suborder", |
40 "parvorder", | 40 "infraorder", |
41 "superfamily", | 41 "parvorder", |
42 "family", | 42 "superfamily", |
43 "subfamily", | 43 "family", |
44 "tribe", | 44 "subfamily", |
45 "subtribe", | 45 "tribe", |
46 "genus", | 46 "subtribe", |
47 "subgenus", | 47 "genus", |
48 "species group", | 48 "subgenus", |
49 "species subgroup", | 49 "species group", |
50 "species", | 50 "species subgroup", |
51 "subspecies", | 51 "species", |
52 "varietas", | 52 "subspecies", |
53 "forma" ] | 53 "varietas", |
54 "forma" | |
55 ] | |
54 # NB: We put 'no rank' at top of list for generating trees, due to e.g. | 56 # NB: We put 'no rank' at top of list for generating trees, due to e.g. |
55 # root (root) -> cellular organisms (no rank) -> bacteria (superkingdom) | 57 # root (root) -> cellular organisms (no rank) -> bacteria (superkingdom) |
56 | 58 |
57 RANK_NAME_TO_INTS = dict( [ (y, x) for (x, y) in enumerate( RANK_NAMES ) ] ) | 59 RANK_NAME_TO_INTS = dict([(y, x) for (x, y) in enumerate(RANK_NAMES)]) |
58 RANK_NAMES_INTS = range( len( RANK_NAMES ) ) | 60 RANK_NAMES_INTS = range(len(RANK_NAMES)) |
59 | 61 |
60 NO_RANK_INT = RANK_NAMES.index( NO_RANK_NAME ) | 62 NO_RANK_INT = RANK_NAMES.index(NO_RANK_NAME) |
61 NO_RANK_CODE = 'n' | 63 NO_RANK_CODE = 'n' |
62 | 64 |
63 PRIMARY_RANK_NAMES = [ 'species', 'genus', 'family', 'order', 'class', 'phylum', 'kingdom' ] | 65 PRIMARY_RANK_NAMES = ['species', 'genus', 'family', 'order', 'class', 'phylum', 'kingdom'] |
64 RANK_INT_TO_CODE = {} | 66 RANK_INT_TO_CODE = {} |
65 for name in PRIMARY_RANK_NAMES: | 67 for name in PRIMARY_RANK_NAMES: |
66 RANK_INT_TO_CODE[ RANK_NAMES.index( name ) ] = name[0] | 68 RANK_INT_TO_CODE[RANK_NAMES.index(name)] = name[0] |
67 RANK_INT_TO_CODE[ RANK_NAMES.index( 'superkingdom' ) ] = 'd' | 69 RANK_INT_TO_CODE[RANK_NAMES.index('superkingdom')] = 'd' |
68 PRIMARY_RANK_NAMES.append( 'superkingdom' ) | 70 PRIMARY_RANK_NAMES.append('superkingdom') |
69 | 71 |
70 NAME_STUB = "%s__%s" | 72 NAME_STUB = "%s__%s" |
71 NAME_RE = re.compile(r"(\t| |\||\.;)") | 73 NAME_RE = re.compile(r"(\t| |\||\.;)") |
72 NAME_REPL = "_" | 74 NAME_REPL = "_" |
73 | 75 |
74 | 76 |
75 def get_kraken_db_path( db ): | 77 def get_kraken_db_path(db): |
76 assert db, ValueError( "You must provide a kraken database" ) | 78 assert db, ValueError("You must provide a kraken database") |
77 k_db_path = os.getenv('KRAKEN_DB_PATH', None ) | 79 k_db_path = os.getenv('KRAKEN_DB_PATH', None) |
78 if k_db_path: | 80 if k_db_path: |
79 db = os.path.join( k_db_path, db ) | 81 db = os.path.join(k_db_path, db) |
80 return db | 82 return db |
81 | 83 |
82 | 84 |
83 def load_taxonomy( db_path, sanitize_names=False ): | 85 def load_taxonomy(db_path, sanitize_names=False): |
84 child_lists = {} | 86 child_lists = {} |
85 name_map = {} | 87 name_map = {} |
86 rank_map = {} | 88 rank_map = {} |
87 names = {} # Store names here to look for duplicates (id, True/False name fixed) | 89 names = {} # Store names here to look for duplicates (id, True/False name fixed) |
88 with open( os.path.join( db_path, "taxonomy/names.dmp" ) ) as fh: | 90 with open(os.path.join(db_path, "taxonomy/names.dmp")) as fh: |
89 for line in fh: | 91 for line in fh: |
90 line = line.rstrip( "\n\r" ) | 92 line = line.rstrip("\n\r") |
91 if line.endswith( "\t|" ): | 93 if line.endswith("\t|"): |
92 line = line[:-2] | 94 line = line[:-2] |
93 fields = line.split( "\t|\t" ) | 95 fields = line.split("\t|\t") |
94 node_id = fields[0] | 96 node_id = fields[0] |
95 name = fields[1] | 97 name = fields[1] |
96 if sanitize_names: | 98 if sanitize_names: |
97 name = NAME_RE.sub( NAME_REPL, name ) | 99 name = NAME_RE.sub(NAME_REPL, name) |
98 name_type = fields[3] | 100 name_type = fields[3] |
99 if name_type == "scientific name": | 101 if name_type == "scientific name": |
100 if name in names: | 102 if name in names: |
101 print( 'Warning: name "%s" found at node "%s" but already exists originally for node "%s".' % ( name, node_id, names[name][0] ), file=sys.stderr ) | 103 print('Warning: name "%s" found at node "%s" but already exists originally for node "%s".' % (name, node_id, names[name][0]), file=sys.stderr) |
102 new_name = "%s_%s" % ( name, node_id ) | 104 new_name = "%s_%s" % (name, node_id) |
103 print( 'Transforming node "%s" named "%s" to "%s".' % ( node_id, name, new_name ), file=sys.stderr ) | 105 print('Transforming node "%s" named "%s" to "%s".' % (node_id, name, new_name), file=sys.stderr) |
104 assert new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % new_name | 106 assert new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % new_name |
105 if not names[name][1]: | 107 if not names[name][1]: |
106 orig_new_name = "%s_%s" % ( name, names[name][0] ) | 108 orig_new_name = "%s_%s" % (name, names[name][0]) |
107 print( 'Transforming node "%s" named "%s" to "%s".' % ( names[name][0], name, orig_new_name ), file=sys.stderr ) | 109 print('Transforming node "%s" named "%s" to "%s".' % (names[name][0], name, orig_new_name), file=sys.stderr) |
108 assert orig_new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % orig_new_name | 110 assert orig_new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % orig_new_name |
109 name_map[names[name][0]] = orig_new_name | 111 name_map[names[name][0]] = orig_new_name |
110 names[name] = ( names[name][0], True ) | 112 names[name] = (names[name][0], True) |
111 name = new_name | 113 name = new_name |
112 else: | 114 else: |
113 names[name] = ( node_id, False ) | 115 names[name] = (node_id, False) |
114 name_map[ node_id ] = name | 116 name_map[node_id] = name |
115 | 117 |
116 with open( os.path.join( db_path, "taxonomy/nodes.dmp" ) ) as fh: | 118 with open(os.path.join(db_path, "taxonomy/nodes.dmp")) as fh: |
117 for line in fh: | 119 for line in fh: |
118 line = line.rstrip( "\n\r" ) | 120 line = line.rstrip("\n\r") |
119 fields = line.split( "\t|\t" ) | 121 fields = line.split("\t|\t") |
120 node_id = fields[0] | 122 node_id = fields[0] |
121 parent_id = fields[1] | 123 parent_id = fields[1] |
122 rank = RANK_NAME_TO_INTS.get( fields[2].lower(), None ) | 124 rank = RANK_NAME_TO_INTS.get(fields[2].lower(), None) |
123 if rank is None: | 125 if rank is None: |
124 # This should never happen, unless new taxonomy ranks are created | 126 # This should never happen, unless new taxonomy ranks are created |
125 print( 'Unrecognized rank: Node "%s" is "%s", setting to "%s"' % ( node_id, fields[2], NO_RANK_NAME ), file=sys.stderr ) | 127 print('Unrecognized rank: Node "%s" is "%s", setting to "%s"' % (node_id, fields[2], NO_RANK_NAME), file=sys.stderr) |
126 rank = NO_RANK_INT | 128 rank = NO_RANK_INT |
127 if node_id == '1': | 129 if node_id == '1': |
128 parent_id = '0' | 130 parent_id = '0' |
129 if parent_id not in child_lists: | 131 if parent_id not in child_lists: |
130 child_lists[ parent_id ] = [] | 132 child_lists[parent_id] = [] |
131 child_lists[ parent_id ].append( node_id ) | 133 child_lists[parent_id].append(node_id) |
132 rank_map[node_id] = rank | 134 rank_map[node_id] = rank |
133 return ( child_lists, name_map, rank_map ) | 135 return (child_lists, name_map, rank_map) |
134 | 136 |
135 | 137 |
136 def dfs_summation( node, counts, child_lists ): | 138 def dfs_summation(node, counts, child_lists): |
137 children = child_lists.get( node, None ) | 139 children = child_lists.get(node, None) |
138 if children: | 140 if children: |
139 for child in children: | 141 for child in children: |
140 dfs_summation( child, counts, child_lists ) | 142 dfs_summation(child, counts, child_lists) |
141 counts[ node ] = counts.get( node, 0 ) + counts.get( child, 0 ) | 143 counts[node] = counts.get(node, 0) + counts.get(child, 0) |
142 | 144 |
143 | 145 |
144 def dfs_report( node, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None ): | 146 def dfs_report(node, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None): |
145 rank_int = rank_map[node] | 147 rank_int = rank_map[node] |
146 code = RANK_INT_TO_CODE.get( rank_int, NO_RANK_CODE ) | 148 code = RANK_INT_TO_CODE.get(rank_int, NO_RANK_CODE) |
147 if ( code != NO_RANK_CODE or options.intermediate ) and ( options.show_zeros or node in hit_taxa): | 149 if (code != NO_RANK_CODE or options.intermediate) and (options.show_zeros or node in hit_taxa): |
148 if name is None: | 150 if name is None: |
149 name = "" | 151 name = "" |
150 else: | 152 else: |
151 name = "%s|" % name | 153 name = "%s|" % name |
152 if tax is None: | 154 if tax is None: |
153 tax = '' | 155 tax = '' |
154 else: | 156 else: |
155 tax = "%s;" % tax | 157 tax = "%s;" % tax |
156 sanitized_name = name_map[ node ] | 158 sanitized_name = name_map[node] |
157 name_stub = NAME_STUB % ( code, sanitized_name ) | 159 name_stub = NAME_STUB % (code, sanitized_name) |
158 name = name + name_stub | 160 name = name + name_stub |
159 tax = tax + name_stub | 161 tax = tax + name_stub |
160 if options.name_id: | 162 if options.name_id: |
161 output = node | 163 output = node |
162 elif options.name_long: | 164 elif options.name_long: |
163 output = name | 165 output = name |
164 else: | 166 else: |
165 output = sanitized_name | 167 output = sanitized_name |
166 for val in file_data: | 168 for val in file_data: |
167 output = "%s\t%i" % ( output, val.get( node, 0 ) ) | 169 output = "%s\t%i" % (output, val.get(node, 0)) |
168 if options.show_rank: | 170 if options.show_rank: |
169 output = "%s\t%s" % ( output, RANK_NAMES[ rank_int ] ) | 171 output = "%s\t%s" % (output, RANK_NAMES[rank_int]) |
170 if options.taxonomy: | 172 if options.taxonomy: |
171 output = "%s\t%s" % ( output, tax ) | 173 output = "%s\t%s" % (output, tax) |
172 output_lines[ rank_int ].append( output ) | 174 output_lines[rank_int].append(output) |
173 children = child_lists.get( node ) | 175 children = child_lists.get(node) |
174 if children: | 176 if children: |
175 for child in children: | 177 for child in children: |
176 dfs_report( child, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=name, tax=tax ) | 178 dfs_report(child, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=name, tax=tax) |
177 | 179 |
178 | 180 |
179 def write_tree( child_lists, name_map, rank_map, options, branch_length=1 ): | 181 def write_tree(child_lists, name_map, rank_map, options, branch_length=1): |
180 # Uses Biopython, only load if making tree | 182 # Uses Biopython, only load if making tree |
181 import Bio.Phylo | 183 import Bio.Phylo |
182 from Bio.Phylo import BaseTree | 184 from Bio.Phylo import BaseTree |
183 | 185 |
184 def _get_name( node_id ): | 186 def _get_name(node_id): |
185 if options.name_id: | 187 if options.name_id: |
186 return node_id | 188 return node_id |
187 return name_map[node_id] | 189 return name_map[node_id] |
188 nodes = {} | 190 nodes = {} |
189 root_node_id = child_lists["0"][0] | 191 root_node_id = child_lists["0"][0] |
190 nodes[root_node_id] = BaseTree.Clade( name=_get_name( root_node_id), branch_length=branch_length ) | 192 nodes[root_node_id] = BaseTree.Clade(name=_get_name(root_node_id), branch_length=branch_length) |
191 | 193 |
192 def recurse_children( parent_id ): | 194 def recurse_children(parent_id): |
193 if options.cluster is not None and rank_map[parent_id] == options.cluster: | 195 if options.cluster is not None and rank_map[parent_id] == options.cluster: |
194 # Short circuit if we found our rank, prevents 'hanging' no ranks from being output | 196 # Short circuit if we found our rank, prevents 'hanging' no ranks from being output |
195 # e.g. clustering by "species" (Escherichia coli), but have "no rank" below (Escherichia coli K-12) in test_db | 197 # e.g. clustering by "species" (Escherichia coli), but have "no rank" below (Escherichia coli K-12) in test_db |
196 return | 198 return |
197 if parent_id not in nodes: | 199 if parent_id not in nodes: |
198 nodes[parent_id] = BaseTree.Clade( name=_get_name( parent_id ), branch_length=branch_length ) | 200 nodes[parent_id] = BaseTree.Clade(name=_get_name(parent_id), branch_length=branch_length) |
199 for child_id in child_lists.get( parent_id, [] ): | 201 for child_id in child_lists.get(parent_id, []): |
200 if options.cluster is None or ( rank_map[child_id] <= options.cluster ): | 202 if options.cluster is None or (rank_map[child_id] <= options.cluster): |
201 if child_id not in nodes: | 203 if child_id not in nodes: |
202 nodes[child_id] = BaseTree.Clade(name=_get_name( child_id ), branch_length=branch_length) | 204 nodes[child_id] = BaseTree.Clade(name=_get_name(child_id), branch_length=branch_length) |
203 nodes[parent_id].clades.append(nodes[child_id]) | 205 nodes[parent_id].clades.append(nodes[child_id]) |
204 recurse_children( child_id ) | 206 recurse_children(child_id) |
205 recurse_children( root_node_id ) | 207 recurse_children(root_node_id) |
206 tree = BaseTree.Tree(root=nodes[root_node_id]) | 208 tree = BaseTree.Tree(root=nodes[root_node_id]) |
207 Bio.Phylo.write( [tree], options.output_tree, 'newick' ) | 209 Bio.Phylo.write([tree], options.output_tree, 'newick') |
208 | 210 |
209 | 211 |
210 def __main__(): | 212 def __main__(): |
211 parser = optparse.OptionParser( usage="%prog [options] file1 file...fileN" ) | 213 parser = optparse.OptionParser(usage="%prog [options] file1 file...fileN") |
212 parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='print version and exit' ) | 214 parser.add_option('-v', '--version', dest='version', action='store_true', default=False, help='print version and exit') |
213 parser.add_option( '', '--show-zeros', dest='show_zeros', action='store_true', default=False, help='Show empty nodes' ) | 215 parser.add_option('', '--show-zeros', dest='show_zeros', action='store_true', default=False, help='Show empty nodes') |
214 parser.add_option( '', '--header-line', dest='header_line', action='store_true', default=False, help='Provide a header on output' ) | 216 parser.add_option('', '--header-line', dest='header_line', action='store_true', default=False, help='Provide a header on output') |
215 parser.add_option( '', '--intermediate', dest='intermediate', action='store_true', default=False, help='Intermediate Ranks' ) | 217 parser.add_option('', '--intermediate', dest='intermediate', action='store_true', default=False, help='Intermediate Ranks') |
216 parser.add_option( '', '--name-id', dest='name_id', action='store_true', default=False, help='Use Taxa ID instead of Name' ) | 218 parser.add_option('', '--name-id', dest='name_id', action='store_true', default=False, help='Use Taxa ID instead of Name') |
217 parser.add_option( '', '--name-long', dest='name_long', action='store_true', default=False, help='Use Long taxa ID instead of base name' ) | 219 parser.add_option('', '--name-long', dest='name_long', action='store_true', default=False, help='Use Long taxa ID instead of base name') |
218 parser.add_option( '', '--taxonomy', dest='taxonomy', action='store_true', default=False, help='Output taxonomy in last column' ) | 220 parser.add_option('', '--taxonomy', dest='taxonomy', action='store_true', default=False, help='Output taxonomy in last column') |
219 parser.add_option( '', '--cluster', dest='cluster', action='store', type="string", default=None, help='Cluster counts to specified rank' ) | 221 parser.add_option('', '--cluster', dest='cluster', action='store', type="string", default=None, help='Cluster counts to specified rank') |
220 parser.add_option( '', '--summation', dest='summation', action='store_true', default=False, help='Add summation of child counts to each taxa' ) | 222 parser.add_option('', '--summation', dest='summation', action='store_true', default=False, help='Add summation of child counts to each taxa') |
221 parser.add_option('', '--sanitize-names', dest='sanitize_names', action='store_true', default=False, help=r'Replace special chars (\t| |\||\.;) with underscore (_)') | 223 parser.add_option('', '--sanitize-names', dest='sanitize_names', action='store_true', default=False, help=r'Replace special chars (\t| |\||\.;) with underscore (_)') |
222 parser.add_option( '', '--show-rank', dest='show_rank', action='store_true', default=False, help='Output column with Rank name' ) | 224 parser.add_option('', '--show-rank', dest='show_rank', action='store_true', default=False, help='Output column with Rank name') |
223 parser.add_option( '', '--db', dest='db', action='store', type="string", default=None, help='Name of Kraken database' ) | 225 parser.add_option('', '--db', dest='db', action='store', type="string", default=None, help='Name of Kraken database') |
224 parser.add_option( '', '--output', dest='output', action='store', type="string", default=None, help='Name of output file' ) | 226 parser.add_option('', '--output', dest='output', action='store', type="string", default=None, help='Name of output file') |
225 parser.add_option( '', '--output-tree', dest='output_tree', action='store', type="string", default=None, help='Name of output file to place newick tree' ) | 227 parser.add_option('', '--output-tree', dest='output_tree', action='store', type="string", default=None, help='Name of output file to place newick tree') |
226 (options, args) = parser.parse_args() | 228 (options, args) = parser.parse_args() |
227 if options.version: | 229 if options.version: |
228 print( "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ ), file=sys.stderr ) | 230 print("Kraken Taxonomy Report (%s) version %s" % (__URL__, __VERSION__), file=sys.stderr) |
229 sys.exit() | 231 sys.exit() |
230 if not args: | 232 if not args: |
231 print( parser.get_usage(), file=sys.stderr ) | 233 print(parser.get_usage(), file=sys.stderr) |
232 sys.exit() | 234 sys.exit() |
233 | 235 |
234 if options.cluster: | 236 if options.cluster: |
235 cluster_name = options.cluster.lower() | 237 cluster_name = options.cluster.lower() |
236 cluster = RANK_NAME_TO_INTS.get( cluster_name, None ) | 238 cluster = RANK_NAME_TO_INTS.get(cluster_name, None) |
237 assert cluster is not None, ValueError( '"%s" is not a valid rank for clustering.' % options.cluster ) | 239 assert cluster is not None, ValueError('"%s" is not a valid rank for clustering.' % options.cluster) |
238 if cluster_name not in PRIMARY_RANK_NAMES: | 240 if cluster_name not in PRIMARY_RANK_NAMES: |
239 assert options.intermediate, ValueError( 'You cannot cluster by "%s", unless you enable intermediate ranks.' % options.cluster ) | 241 assert options.intermediate, ValueError('You cannot cluster by "%s", unless you enable intermediate ranks.' % options.cluster) |
240 ranks_to_report = [ cluster ] | 242 ranks_to_report = [cluster] |
241 options.cluster = cluster | 243 options.cluster = cluster |
242 # When clustering we need to do summatation | 244 # When clustering we need to do summatation |
243 options.summation = True | 245 options.summation = True |
244 else: | 246 else: |
245 options.cluster = None # make empty string into None | 247 options.cluster = None # make empty string into None |
248 if options.output: | 250 if options.output: |
249 output_fh = open(options.output, 'w') | 251 output_fh = open(options.output, 'w') |
250 else: | 252 else: |
251 output_fh = sys.stdout | 253 output_fh = sys.stdout |
252 | 254 |
253 db_path = get_kraken_db_path( options.db ) | 255 db_path = get_kraken_db_path(options.db) |
254 ( child_lists, name_map, rank_map ) = load_taxonomy( db_path, sanitize_names=options.sanitize_names ) | 256 (child_lists, name_map, rank_map) = load_taxonomy(db_path, sanitize_names=options.sanitize_names) |
255 file_data = [] | 257 file_data = [] |
256 hit_taxa = [] | 258 hit_taxa = [] |
257 for input_filename in args: | 259 for input_filename in args: |
258 taxo_counts = {} | 260 taxo_counts = {} |
259 with open( input_filename ) as fh: | 261 with open(input_filename) as fh: |
260 for line in fh: | 262 for line in fh: |
261 fields = line.split( "\t" ) | 263 fields = line.split("\t") |
262 taxo_counts[ fields[2] ] = taxo_counts.get( fields[2], 0 ) + 1 | 264 taxo_counts[fields[2]] = taxo_counts.get(fields[2], 0) + 1 |
263 clade_counts = taxo_counts.copy() # fixme remove copying? | 265 clade_counts = taxo_counts.copy() # fixme remove copying? |
264 if options.summation: | 266 if options.summation: |
265 dfs_summation( '1', clade_counts, child_lists ) | 267 dfs_summation('1', clade_counts, child_lists) |
266 for key, value in clade_counts.items(): | 268 for key, value in clade_counts.items(): |
267 if value and key not in hit_taxa: | 269 if value and key not in hit_taxa: |
268 hit_taxa.append( key ) | 270 hit_taxa.append(key) |
269 file_data.append( clade_counts ) | 271 file_data.append(clade_counts) |
270 | 272 |
271 if options.header_line: | 273 if options.header_line: |
272 output_fh.write( "#ID\t" ) | 274 output_fh.write("#ID\t") |
273 output_fh.write( "\t".join( args ) ) | 275 output_fh.write("\t".join(args)) |
274 if options.show_rank: | 276 if options.show_rank: |
275 output_fh.write( "\trank" ) | 277 output_fh.write("\trank") |
276 if options.taxonomy: | 278 if options.taxonomy: |
277 output_fh.write( "\ttaxonomy" ) | 279 output_fh.write("\ttaxonomy") |
278 output_fh.write( '\n' ) | 280 output_fh.write('\n') |
279 | 281 |
280 output_lines = dict( [ ( x, [] ) for x in RANK_NAMES_INTS ] ) | 282 output_lines = dict([(x, []) for x in RANK_NAMES_INTS]) |
281 dfs_report( '1', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None ) | 283 dfs_report('1', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None) |
282 | 284 |
283 for rank_int in ranks_to_report: | 285 for rank_int in ranks_to_report: |
284 for line in output_lines.get( rank_int, [] ): | 286 for line in output_lines.get(rank_int, []): |
285 output_fh.write( line ) | 287 output_fh.write(line) |
286 output_fh.write( '\n' ) | 288 output_fh.write('\n') |
287 fh.close() | 289 fh.close() |
288 if options.output_tree: | 290 if options.output_tree: |
289 write_tree( child_lists, name_map, rank_map, options ) | 291 write_tree(child_lists, name_map, rank_map, options) |
290 | 292 |
291 | 293 |
292 if __name__ == "__main__": | 294 if __name__ == "__main__": |
293 __main__() | 295 __main__() |