comparison blast2html.py @ 120:2729c2326235

Fix for Rikilt issue 13 Hit e-value and identity% should be taken from the hsp with the highest bit score. Previously each of these values was calculated independently. Also use arrays for cover calculation instead of python lists and refactor the hit_info() code a bit.
author Jan Kanis <jan.code@jankanis.nl>
date Thu, 31 Jul 2014 16:14:36 +0200
parents 591dc9c24824
children 5f104d05aa23
comparison
equal deleted inserted replaced
119:591dc9c24824 120:2729c2326235
14 import six, codecs, io 14 import six, codecs, io
15 from six.moves import builtins 15 from six.moves import builtins
16 from os import path 16 from os import path
17 from itertools import repeat 17 from itertools import repeat
18 from collections import defaultdict, namedtuple 18 from collections import defaultdict, namedtuple
19 from array import array
19 import glob 20 import glob
20 import argparse 21 import argparse
21 from lxml import objectify 22 from lxml import objectify
22 import jinja2 23 import jinja2
23 24
325 query_length = blastxml_len(result) 326 query_length = blastxml_len(result)
326 327
327 for hit in hits(result): 328 for hit in hits(result):
328 hsps = hit.Hit_hsps.Hsp 329 hsps = hit.Hit_hsps.Hsp
329 330
330 cover = [False] * query_length 331 cover = array('B', [0]) * query_length
331 for hsp in hsps: 332 for hsp in hsps:
332 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, blastxml_len(hsp)) 333 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = array('B', [1]) * blastxml_len(hsp)
333 cover_count = cover.count(True) 334 cover_count = cover.count(1)
334 335
335 def hsp_val(path): 336 best_hsp = max(hsps, key=lambda h: h['Hsp_bit-score'])
336 return (float(hsp[path]) for hsp in hsps)
337 337
338 yield dict(hit = hit, 338 yield dict(hit = hit,
339 title = firsttitle(hit), 339 title = firsttitle(hit),
340 maxscore = "{0:.1f}".format(max(hsp_val('Hsp_bit-score'))), 340 maxscore = format(float(best_hsp['Hsp_bit-score']), '.1f'),
341 totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))), 341 e_value = format(float(best_hsp.Hsp_evalue), '.4'),
342 cover = "{0:.0%}".format(cover_count / query_length),
343 e_value = "{0:.4}".format(float(min(hsp_val('Hsp_evalue')))),
344 # FIXME: is this the correct formula vv?
345 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 342 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6
346 ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) 343 ident = format(float(best_hsp.Hsp_identity) / blastxml_len(best_hsp), '.0%'),
344 totalscore = format(sum(hsp['Hsp_bit-score'] for hsp in hsps), '.1f'),
345 cover = format(cover_count / query_length, '.0%'),
346 )
347 347
348 @filter 348 @filter
349 def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True): 349 def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True):
350 """Create a html link from a hit node to a configured gene bank webpage. 350 """Create a html link from a hit node to a configured gene bank webpage.
351 text: The text of the link. If not set applies text_from. 351 text: The text of the link. If not set applies text_from.