Mercurial > repos > jankanis > blast2html
diff blast2html.py @ 81:40ce2043006a
merge py2.6 branch
author | Jan Kanis <jan.code@jankanis.nl> |
---|---|
date | Thu, 19 Jun 2014 17:00:29 +0200 |
parents | 9fb1a7d67317 |
children | 4378d11f0ed7 |
line wrap: on
line diff
--- a/blast2html.py Wed Jun 18 14:33:12 2014 +0200 +++ b/blast2html.py Thu Jun 19 17:00:29 2014 +0200 @@ -1,14 +1,18 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Actually this program works with both python 2 and 3, tested against python 2.6 + # Copyright The Hyve B.V. 2014 # License: GPL version 3 or (at your option) any higher version -from __future__ import unicode_literals +from __future__ import unicode_literals, division import sys import math import warnings +import six, codecs +from six.moves import builtins from os import path from itertools import repeat import argparse @@ -17,10 +21,10 @@ -_filters = {} +_filters = dict(float='float') def filter(func_or_name): "Decorator to register a function as filter in the current jinja environment" - if isinstance(func_or_name, str): + if isinstance(func_or_name, six.string_types): def inner(func): _filters[func_or_name] = func.__name__ return func @@ -46,6 +50,21 @@ return format(float(val), fmt) @filter +def numfmt(val): + """Format numbers in decimal notation, but without excessive trailing 0's. + Default python float formatting will use scientific notation for some values, + or append trailing zeros with the 'f' format type, and the number of digits differs + between python 2 and 3.""" + fpart, ipart = math.modf(val) + if fpart == 0: + return str(int(val)) + # round to 10 to get identical representations in python 2 and 3 + s = format(round(val, 10), '.10f').rstrip('0') + if s[-1] == '.': + s += '0' + return s + +@filter def firsttitle(hit): return hit.Hit_def.text.split('>')[0] @@ -100,10 +119,10 @@ hseq = hsp.Hsp_hseq.text if not qframe in (1, -1): - warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}. (should be 1 or -1)".format(nodeid(hsp), qframe)) + warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_query-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), qframe)) qframe = -1 if qframe < 0 else 1 if not hframe in (1, -1): - warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}. (should be 1 or -1)".format(nodeid(hsp), hframe)) + warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_hit-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), hframe)) hframe = -1 if hframe < 0 else 1 def split(txt): @@ -111,16 +130,16 @@ for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), linewidth)): yield ( - "Query {:>7} {} {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) + - " {:7} {}\n".format('', mid) + - "Subject{:>7} {} {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe) + "Query {0:>7} {1} {2}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) + + " {0:7} {1}\n".format('', mid) + + "Subject{0:>7} {1} {2}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe) ) if qfrom+(len(qseq)-1)*qframe != qto: - warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format( + warnings.warn("Error in BlastXML input: Hsp node {0} qseq length mismatch: from {1} to {2} length {3}".format( nodeid(hsp), qfrom, qto, len(qseq))) if hfrom+(len(hseq)-1)*hframe != hto: - warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format( + warnings.warn("Error in BlastXML input: Hsp node {0} hseq length mismatch: from {1} to {2} length {3}".format( nodeid(hsp), hfrom, hto, len(hseq))) @@ -159,29 +178,30 @@ raise Exception("frame should be either +1 or -1") def genelink(hit, type='genbank', hsp=None): - if not isinstance(hit, str): + if not isinstance(hit, six.string_types): hit = hitid(hit) - link = "http://www.ncbi.nlm.nih.gov/nucleotide/{}?report={}&log$=nuclalign".format(hit, type) + link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type) if hsp != None: - link += "&from={}&to={}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to']) + link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to']) return link # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139 # I've removed the html escapes, since html escaping is already being performed by the template engine. +# The r'\u0027' syntax doesn't work the way we need to in python 2.6 with unicode_literals _base_js_escapes = ( - ('\\', r'\u005C'), - ('\'', r'\u0027'), - ('"', r'\u0022'), - # ('>', r'\u003E'), - # ('<', r'\u003C'), - # ('&', r'\u0026'), - # ('=', r'\u003D'), - # ('-', r'\u002D'), - # (';', r'\u003B'), - # (u'\u2028', r'\u2028'), - # (u'\u2029', r'\u2029') + ('\\', '\\u005C'), + ('\'', '\\u0027'), + ('"', '\\u0022'), + # ('>', '\\u003E'), + # ('<', '\\u003C'), + # ('&', '\\u0026'), + # ('=', '\\u003D'), + # ('-', '\\u002D'), + # (';', '\\u003B'), + (u'\u2028', '\\u2028'), + (u'\u2029', '\\u2029') ) # Escape every ASCII character with a value less than 32. This is @@ -198,7 +218,7 @@ javascript snippets. """ - value = str(value) + value = six.text_type(value) for bad, good in _js_escapes: value = value.replace(bad, good) @@ -237,7 +257,10 @@ try: environment.filters[filtername] = getattr(self, funcname) except AttributeError: - environment.filters[filtername] = globals()[funcname] + try: + environment.filters[filtername] = globals()[funcname] + except KeyError: + environment.filters[filtername] = getattr(builtins, funcname) def render(self, output): template = self.environment.get_template(self.templatename) @@ -249,13 +272,14 @@ ('Database', self.blast.BlastOutput_db), ) - output.write(template.render(blast=self.blast, - iterations=self.blast.BlastOutput_iterations.Iteration, - colors=self.colors, - # match_colors=self.match_colors(), - # hit_info=self.hit_info(), - genelink=genelink, - params=params)) + result = template.render(blast=self.blast, + iterations=self.blast.BlastOutput_iterations.Iteration, + colors=self.colors, + genelink=genelink, + params=params) + if six.PY2: + result = result.encode('utf-8') + output.write(result) @filter def match_colors(self, result): @@ -320,21 +344,22 @@ return (float(hsp[path]) for hsp in hsps) yield dict(hit = hit, - title = firsttitle(hit), - maxscore = "{:.1f}".format(max(hsp_val('Hsp_bit-score'))), - totalscore = "{:.1f}".format(sum(hsp_val('Hsp_bit-score'))), - cover = "{:.0%}".format(cover_count / query_length), - e_value = "{:.4g}".format(min(hsp_val('Hsp_evalue'))), + title = firsttitle(hit), + maxscore = "{0:.1f}".format(max(hsp_val('Hsp_bit-score'))), + totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))), + cover = "{0:.0%}".format(cover_count / query_length), + e_value = "{0:.4g}".format(min(hsp_val('Hsp_evalue'))), # FIXME: is this the correct formula vv? - ident = "{:.0%}".format(float(min(hsp.Hsp_identity / blastxml_len(hsp) for hsp in hsps))), + # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 + ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))), accession = hit.Hit_accession) def main(): default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja') - + parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page", - usage="{} [-i] INPUT [-o OUTPUT]".format(sys.argv[0])) + usage="{0} [-i] INPUT [-o OUTPUT]".format(sys.argv[0])) input_group = parser.add_mutually_exclusive_group(required=True) input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'), help='The input Blast XML file, same as -i/--input')