comparison blast2html.py @ 73:67b1a319c6dc py2.6

First go at 2.6 compatibility The tests are running without errors, but they don't produce the same output as python3
author Jan Kanis <jan.code@jankanis.nl>
date Wed, 18 Jun 2014 16:21:45 +0200
parents 6ecbfebb9dd9
children 03e044b5bcc2
comparison
equal deleted inserted replaced
72:6ecbfebb9dd9 73:67b1a319c6dc
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*- 2 # -*- coding: utf-8 -*-
3 3
4 # Actually this program works with both python 2 and 3
5
4 # Copyright The Hyve B.V. 2014 6 # Copyright The Hyve B.V. 2014
5 # License: GPL version 3 or (at your option) any higher version 7 # License: GPL version 3 or (at your option) any higher version
6 8
7 from __future__ import unicode_literals 9 from __future__ import unicode_literals, division
8 10
9 import sys 11 import sys
10 import math 12 import math
11 import warnings 13 import warnings
14 import six, codecs
12 from os import path 15 from os import path
13 from itertools import repeat 16 from itertools import repeat
14 import argparse 17 import argparse
15 from lxml import objectify 18 from lxml import objectify
16 import jinja2 19 import jinja2
17 20
18 21
19 22
20 _filters = {} 23 _filters = dict()
21 def filter(func_or_name): 24 def filter(func_or_name):
22 "Decorator to register a function as filter in the current jinja environment" 25 "Decorator to register a function as filter in the current jinja environment"
23 if isinstance(func_or_name, str): 26 if isinstance(func_or_name, six.string_types):
24 def inner(func): 27 def inner(func):
25 _filters[func_or_name] = func.__name__ 28 _filters[func_or_name] = func.__name__
26 return func 29 return func
27 return inner 30 return inner
28 else: 31 else:
98 qseq = hsp.Hsp_qseq.text 101 qseq = hsp.Hsp_qseq.text
99 midline = hsp.Hsp_midline.text 102 midline = hsp.Hsp_midline.text
100 hseq = hsp.Hsp_hseq.text 103 hseq = hsp.Hsp_hseq.text
101 104
102 if not qframe in (1, -1): 105 if not qframe in (1, -1):
103 warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}. (should be 1 or -1)".format(nodeid(hsp), qframe)) 106 warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_query-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), qframe))
104 qframe = -1 if qframe < 0 else 1 107 qframe = -1 if qframe < 0 else 1
105 if not hframe in (1, -1): 108 if not hframe in (1, -1):
106 warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}. (should be 1 or -1)".format(nodeid(hsp), hframe)) 109 warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_hit-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), hframe))
107 hframe = -1 if hframe < 0 else 1 110 hframe = -1 if hframe < 0 else 1
108 111
109 def split(txt): 112 def split(txt):
110 return [txt[i:i+linewidth] for i in range(0, len(txt), linewidth)] 113 return [txt[i:i+linewidth] for i in range(0, len(txt), linewidth)]
111 114
112 for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), linewidth)): 115 for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), linewidth)):
113 yield ( 116 yield (
114 "Query {:>7} {} {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) + 117 "Query {0:>7} {1} {2}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) +
115 " {:7} {}\n".format('', mid) + 118 " {0:7} {1}\n".format('', mid) +
116 "Subject{:>7} {} {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe) 119 "Subject{0:>7} {1} {2}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe)
117 ) 120 )
118 121
119 if qfrom+(len(qseq)-1)*qframe != qto: 122 if qfrom+(len(qseq)-1)*qframe != qto:
120 warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format( 123 warnings.warn("Error in BlastXML input: Hsp node {0} qseq length mismatch: from {1} to {2} length {3}".format(
121 nodeid(hsp), qfrom, qto, len(qseq))) 124 nodeid(hsp), qfrom, qto, len(qseq)))
122 if hfrom+(len(hseq)-1)*hframe != hto: 125 if hfrom+(len(hseq)-1)*hframe != hto:
123 warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format( 126 warnings.warn("Error in BlastXML input: Hsp node {0} hseq length mismatch: from {1} to {2} length {3}".format(
124 nodeid(hsp), hfrom, hto, len(hseq))) 127 nodeid(hsp), hfrom, hto, len(hseq)))
125 128
126 129
127 130
128 @filter('len') 131 @filter('len')
157 elif frame == -1: 160 elif frame == -1:
158 return 'Minus' 161 return 'Minus'
159 raise Exception("frame should be either +1 or -1") 162 raise Exception("frame should be either +1 or -1")
160 163
161 def genelink(hit, type='genbank', hsp=None): 164 def genelink(hit, type='genbank', hsp=None):
162 if not isinstance(hit, str): 165 if not isinstance(hit, six.string_types):
163 hit = hitid(hit) 166 hit = hitid(hit)
164 link = "http://www.ncbi.nlm.nih.gov/nucleotide/{}?report={}&log$=nuclalign".format(hit, type) 167 link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type)
165 if hsp != None: 168 if hsp != None:
166 link += "&from={}&to={}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to']) 169 link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to'])
167 return link 170 return link
168 171
169 172
170 # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139 173 # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139
171 # I've removed the html escapes, since html escaping is already being performed by the template engine. 174 # I've removed the html escapes, since html escaping is already being performed by the template engine.
196 Javascript string literal escape. Note that this only escapes data 199 Javascript string literal escape. Note that this only escapes data
197 for embedding within javascript string literals, not in general 200 for embedding within javascript string literals, not in general
198 javascript snippets. 201 javascript snippets.
199 """ 202 """
200 203
201 value = str(value) 204 value = six.text_type(value)
202 205
203 for bad, good in _js_escapes: 206 for bad, good in _js_escapes:
204 value = value.replace(bad, good) 207 value = value.replace(bad, good)
205 208
206 return value 209 return value
223 def __init__(self, input, templatedir, templatename): 226 def __init__(self, input, templatedir, templatename):
224 self.input = input 227 self.input = input
225 self.templatename = templatename 228 self.templatename = templatename
226 229
227 self.blast = objectify.parse(self.input).getroot() 230 self.blast = objectify.parse(self.input).getroot()
228 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) 231 self.loader = jinja2.FileSystemLoader(searchpath=templatedir, encoding='utf-8')
229 self.environment = jinja2.Environment(loader=self.loader, 232 self.environment = jinja2.Environment(loader=self.loader,
230 lstrip_blocks=True, trim_blocks=True, autoescape=True) 233 lstrip_blocks=True, trim_blocks=True, autoescape=True)
231 234
232 self._addfilters(self.environment) 235 self._addfilters(self.environment)
233 236
247 ('Query length', self.blast["BlastOutput_query-len"]), 250 ('Query length', self.blast["BlastOutput_query-len"]),
248 ('Program', self.blast.BlastOutput_version), 251 ('Program', self.blast.BlastOutput_version),
249 ('Database', self.blast.BlastOutput_db), 252 ('Database', self.blast.BlastOutput_db),
250 ) 253 )
251 254
252 output.write(template.render(blast=self.blast, 255 result = template.render(blast=self.blast,
253 iterations=self.blast.BlastOutput_iterations.Iteration, 256 iterations=self.blast.BlastOutput_iterations.Iteration,
254 colors=self.colors, 257 colors=self.colors,
255 # match_colors=self.match_colors(), 258 # match_colors=self.match_colors(),
256 # hit_info=self.hit_info(), 259 # hit_info=self.hit_info(),
257 genelink=genelink, 260 genelink=genelink,
258 params=params)) 261 params=params)
262 if six.PY2:
263 result = result.encode('utf-8')
264 output.write(result)
259 265
260 @filter 266 @filter
261 def match_colors(self, result): 267 def match_colors(self, result):
262 """ 268 """
263 An iterator that yields lists of length-color pairs. 269 An iterator that yields lists of length-color pairs.
319 def hsp_val(path): 325 def hsp_val(path):
320 return (float(hsp[path]) for hsp in hsps) 326 return (float(hsp[path]) for hsp in hsps)
321 327
322 yield dict(hit = hit, 328 yield dict(hit = hit,
323 title = firsttitle(hit), 329 title = firsttitle(hit),
324 maxscore = "{:.1f}".format(max(hsp_val('Hsp_bit-score'))), 330 maxscore = "{0:.1f}".format(max(hsp_val('Hsp_bit-score'))),
325 totalscore = "{:.1f}".format(sum(hsp_val('Hsp_bit-score'))), 331 totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))),
326 cover = "{:.0%}".format(cover_count / query_length), 332 cover = "{0:.0%}".format(cover_count / query_length),
327 e_value = "{:.4g}".format(min(hsp_val('Hsp_evalue'))), 333 e_value = "{0:.4g}".format(min(hsp_val('Hsp_evalue'))),
328 # FIXME: is this the correct formula vv? 334 # FIXME: is this the correct formula vv?
329 ident = "{:.0%}".format(float(min(hsp.Hsp_identity / blastxml_len(hsp) for hsp in hsps))), 335 ident = "{0:.0%}".format(float(min(hsp.Hsp_identity / blastxml_len(hsp) for hsp in hsps))),
330 accession = hit.Hit_accession) 336 accession = hit.Hit_accession)
331 337
332 338
333 def main(): 339 def main():
340 #import pdb; pdb.set_trace()
334 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja') 341 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')
335 342
336 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page", 343 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",
337 usage="{} [-i] INPUT [-o OUTPUT]".format(sys.argv[0])) 344 usage="{0} [-i] INPUT [-o OUTPUT]".format(sys.argv[0]))
338 input_group = parser.add_mutually_exclusive_group(required=True) 345 input_group = parser.add_mutually_exclusive_group(required=True)
339 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'), 346 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
340 help='The input Blast XML file, same as -i/--input') 347 help='The input Blast XML file, same as -i/--input')
341 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'), 348 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'),
342 help='The input Blast XML file') 349 help='The input Blast XML file')