comparison blast2html.py @ 81:40ce2043006a

merge py2.6 branch
author Jan Kanis <jan.code@jankanis.nl>
date Thu, 19 Jun 2014 17:00:29 +0200
parents 9fb1a7d67317
children 4378d11f0ed7
comparison
equal deleted inserted replaced
72:6ecbfebb9dd9 81:40ce2043006a
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 # Actually this program works with both python 2 and 3, tested against python 2.6
5
4 # Copyright The Hyve B.V. 2014 6 # Copyright The Hyve B.V. 2014
5 # License: GPL version 3 or (at your option) any higher version 7 # License: GPL version 3 or (at your option) any higher version
6 8
7 from __future__ import unicode_literals 9 from __future__ import unicode_literals, division
8 10
9 import sys 11 import sys
10 import math 12 import math
11 import warnings 13 import warnings
14 import six, codecs
15 from six.moves import builtins
12 from os import path 16 from os import path
13 from itertools import repeat 17 from itertools import repeat
14 import argparse 18 import argparse
15 from lxml import objectify 19 from lxml import objectify
16 import jinja2 20 import jinja2
17 21
18 22
19 23
20 _filters = {} 24 _filters = dict(float='float')
21 def filter(func_or_name): 25 def filter(func_or_name):
22 "Decorator to register a function as filter in the current jinja environment" 26 "Decorator to register a function as filter in the current jinja environment"
23 if isinstance(func_or_name, str): 27 if isinstance(func_or_name, six.string_types):
24 def inner(func): 28 def inner(func):
25 _filters[func_or_name] = func.__name__ 29 _filters[func_or_name] = func.__name__
26 return func 30 return func
27 return inner 31 return inner
28 else: 32 else:
44 @filter 48 @filter
45 def fmt(val, fmt): 49 def fmt(val, fmt):
46 return format(float(val), fmt) 50 return format(float(val), fmt)
47 51
48 @filter 52 @filter
53 def numfmt(val):
54 """Format numbers in decimal notation, but without excessive trailing 0's.
55 Default python float formatting will use scientific notation for some values,
56 or append trailing zeros with the 'f' format type, and the number of digits differs
57 between python 2 and 3."""
58 fpart, ipart = math.modf(val)
59 if fpart == 0:
60 return str(int(val))
61 # round to 10 to get identical representations in python 2 and 3
62 s = format(round(val, 10), '.10f').rstrip('0')
63 if s[-1] == '.':
64 s += '0'
65 return s
66
67 @filter
49 def firsttitle(hit): 68 def firsttitle(hit):
50 return hit.Hit_def.text.split('>')[0] 69 return hit.Hit_def.text.split('>')[0]
51 70
52 @filter 71 @filter
53 def othertitles(hit): 72 def othertitles(hit):
98 qseq = hsp.Hsp_qseq.text 117 qseq = hsp.Hsp_qseq.text
99 midline = hsp.Hsp_midline.text 118 midline = hsp.Hsp_midline.text
100 hseq = hsp.Hsp_hseq.text 119 hseq = hsp.Hsp_hseq.text
101 120
102 if not qframe in (1, -1): 121 if not qframe in (1, -1):
103 warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}. (should be 1 or -1)".format(nodeid(hsp), qframe)) 122 warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_query-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), qframe))
104 qframe = -1 if qframe < 0 else 1 123 qframe = -1 if qframe < 0 else 1
105 if not hframe in (1, -1): 124 if not hframe in (1, -1):
106 warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}. (should be 1 or -1)".format(nodeid(hsp), hframe)) 125 warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_hit-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), hframe))
107 hframe = -1 if hframe < 0 else 1 126 hframe = -1 if hframe < 0 else 1
108 127
109 def split(txt): 128 def split(txt):
110 return [txt[i:i+linewidth] for i in range(0, len(txt), linewidth)] 129 return [txt[i:i+linewidth] for i in range(0, len(txt), linewidth)]
111 130
112 for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), linewidth)): 131 for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), linewidth)):
113 yield ( 132 yield (
114 "Query {:>7} {} {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) + 133 "Query {0:>7} {1} {2}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) +
115 " {:7} {}\n".format('', mid) + 134 " {0:7} {1}\n".format('', mid) +
116 "Subject{:>7} {} {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe) 135 "Subject{0:>7} {1} {2}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe)
117 ) 136 )
118 137
119 if qfrom+(len(qseq)-1)*qframe != qto: 138 if qfrom+(len(qseq)-1)*qframe != qto:
120 warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format( 139 warnings.warn("Error in BlastXML input: Hsp node {0} qseq length mismatch: from {1} to {2} length {3}".format(
121 nodeid(hsp), qfrom, qto, len(qseq))) 140 nodeid(hsp), qfrom, qto, len(qseq)))
122 if hfrom+(len(hseq)-1)*hframe != hto: 141 if hfrom+(len(hseq)-1)*hframe != hto:
123 warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format( 142 warnings.warn("Error in BlastXML input: Hsp node {0} hseq length mismatch: from {1} to {2} length {3}".format(
124 nodeid(hsp), hfrom, hto, len(hseq))) 143 nodeid(hsp), hfrom, hto, len(hseq)))
125 144
126 145
127 146
128 @filter('len') 147 @filter('len')
157 elif frame == -1: 176 elif frame == -1:
158 return 'Minus' 177 return 'Minus'
159 raise Exception("frame should be either +1 or -1") 178 raise Exception("frame should be either +1 or -1")
160 179
161 def genelink(hit, type='genbank', hsp=None): 180 def genelink(hit, type='genbank', hsp=None):
162 if not isinstance(hit, str): 181 if not isinstance(hit, six.string_types):
163 hit = hitid(hit) 182 hit = hitid(hit)
164 link = "http://www.ncbi.nlm.nih.gov/nucleotide/{}?report={}&log$=nuclalign".format(hit, type) 183 link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type)
165 if hsp != None: 184 if hsp != None:
166 link += "&from={}&to={}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to']) 185 link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to'])
167 return link 186 return link
168 187
169 188
170 # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139 189 # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139
171 # I've removed the html escapes, since html escaping is already being performed by the template engine. 190 # I've removed the html escapes, since html escaping is already being performed by the template engine.
172 191
192 # The r'\u0027' syntax doesn't work the way we need to in python 2.6 with unicode_literals
173 _base_js_escapes = ( 193 _base_js_escapes = (
174 ('\\', r'\u005C'), 194 ('\\', '\\u005C'),
175 ('\'', r'\u0027'), 195 ('\'', '\\u0027'),
176 ('"', r'\u0022'), 196 ('"', '\\u0022'),
177 # ('>', r'\u003E'), 197 # ('>', '\\u003E'),
178 # ('<', r'\u003C'), 198 # ('<', '\\u003C'),
179 # ('&', r'\u0026'), 199 # ('&', '\\u0026'),
180 # ('=', r'\u003D'), 200 # ('=', '\\u003D'),
181 # ('-', r'\u002D'), 201 # ('-', '\\u002D'),
182 # (';', r'\u003B'), 202 # (';', '\\u003B'),
183 # (u'\u2028', r'\u2028'), 203 (u'\u2028', '\\u2028'),
184 # (u'\u2029', r'\u2029') 204 (u'\u2029', '\\u2029')
185 ) 205 )
186 206
187 # Escape every ASCII character with a value less than 32. This is 207 # Escape every ASCII character with a value less than 32. This is
188 # needed a.o. to prevent html parsers from jumping out of javascript 208 # needed a.o. to prevent html parsers from jumping out of javascript
189 # parsing mode. 209 # parsing mode.
196 Javascript string literal escape. Note that this only escapes data 216 Javascript string literal escape. Note that this only escapes data
197 for embedding within javascript string literals, not in general 217 for embedding within javascript string literals, not in general
198 javascript snippets. 218 javascript snippets.
199 """ 219 """
200 220
201 value = str(value) 221 value = six.text_type(value)
202 222
203 for bad, good in _js_escapes: 223 for bad, good in _js_escapes:
204 value = value.replace(bad, good) 224 value = value.replace(bad, good)
205 225
206 return value 226 return value
235 def _addfilters(self, environment): 255 def _addfilters(self, environment):
236 for filtername, funcname in _filters.items(): 256 for filtername, funcname in _filters.items():
237 try: 257 try:
238 environment.filters[filtername] = getattr(self, funcname) 258 environment.filters[filtername] = getattr(self, funcname)
239 except AttributeError: 259 except AttributeError:
240 environment.filters[filtername] = globals()[funcname] 260 try:
261 environment.filters[filtername] = globals()[funcname]
262 except KeyError:
263 environment.filters[filtername] = getattr(builtins, funcname)
241 264
242 def render(self, output): 265 def render(self, output):
243 template = self.environment.get_template(self.templatename) 266 template = self.environment.get_template(self.templatename)
244 267
245 params = (('Query ID', self.blast["BlastOutput_query-ID"]), 268 params = (('Query ID', self.blast["BlastOutput_query-ID"]),
247 ('Query length', self.blast["BlastOutput_query-len"]), 270 ('Query length', self.blast["BlastOutput_query-len"]),
248 ('Program', self.blast.BlastOutput_version), 271 ('Program', self.blast.BlastOutput_version),
249 ('Database', self.blast.BlastOutput_db), 272 ('Database', self.blast.BlastOutput_db),
250 ) 273 )
251 274
252 output.write(template.render(blast=self.blast, 275 result = template.render(blast=self.blast,
253 iterations=self.blast.BlastOutput_iterations.Iteration, 276 iterations=self.blast.BlastOutput_iterations.Iteration,
254 colors=self.colors, 277 colors=self.colors,
255 # match_colors=self.match_colors(), 278 genelink=genelink,
256 # hit_info=self.hit_info(), 279 params=params)
257 genelink=genelink, 280 if six.PY2:
258 params=params)) 281 result = result.encode('utf-8')
282 output.write(result)
259 283
260 @filter 284 @filter
261 def match_colors(self, result): 285 def match_colors(self, result):
262 """ 286 """
263 An iterator that yields lists of length-color pairs. 287 An iterator that yields lists of length-color pairs.
318 342
319 def hsp_val(path): 343 def hsp_val(path):
320 return (float(hsp[path]) for hsp in hsps) 344 return (float(hsp[path]) for hsp in hsps)
321 345
322 yield dict(hit = hit, 346 yield dict(hit = hit,
323 title = firsttitle(hit), 347 title = firsttitle(hit),
324 maxscore = "{:.1f}".format(max(hsp_val('Hsp_bit-score'))), 348 maxscore = "{0:.1f}".format(max(hsp_val('Hsp_bit-score'))),
325 totalscore = "{:.1f}".format(sum(hsp_val('Hsp_bit-score'))), 349 totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))),
326 cover = "{:.0%}".format(cover_count / query_length), 350 cover = "{0:.0%}".format(cover_count / query_length),
327 e_value = "{:.4g}".format(min(hsp_val('Hsp_evalue'))), 351 e_value = "{0:.4g}".format(min(hsp_val('Hsp_evalue'))),
328 # FIXME: is this the correct formula vv? 352 # FIXME: is this the correct formula vv?
329 ident = "{:.0%}".format(float(min(hsp.Hsp_identity / blastxml_len(hsp) for hsp in hsps))), 353 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6
354 ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))),
330 accession = hit.Hit_accession) 355 accession = hit.Hit_accession)
331 356
332 357
333 def main(): 358 def main():
334 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja') 359 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')
335 360
336 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page", 361 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",
337 usage="{} [-i] INPUT [-o OUTPUT]".format(sys.argv[0])) 362 usage="{0} [-i] INPUT [-o OUTPUT]".format(sys.argv[0]))
338 input_group = parser.add_mutually_exclusive_group(required=True) 363 input_group = parser.add_mutually_exclusive_group(required=True)
339 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'), 364 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
340 help='The input Blast XML file, same as -i/--input') 365 help='The input Blast XML file, same as -i/--input')
341 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'), 366 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'),
342 help='The input Blast XML file') 367 help='The input Blast XML file')