diff blast2html.py @ 81:40ce2043006a

merge py2.6 branch
author Jan Kanis <jan.code@jankanis.nl>
date Thu, 19 Jun 2014 17:00:29 +0200
parents 9fb1a7d67317
children 4378d11f0ed7
line wrap: on
line diff
--- a/blast2html.py	Wed Jun 18 14:33:12 2014 +0200
+++ b/blast2html.py	Thu Jun 19 17:00:29 2014 +0200
@@ -1,14 +1,18 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
+# Actually this program works with both python 2 and 3, tested against python 2.6
+
 # Copyright The Hyve B.V. 2014
 # License: GPL version 3 or (at your option) any higher version
 
-from __future__ import unicode_literals
+from __future__ import unicode_literals, division
 
 import sys
 import math
 import warnings
+import six, codecs
+from six.moves import builtins
 from os import path
 from itertools import repeat
 import argparse
@@ -17,10 +21,10 @@
 
 
 
-_filters = {}
+_filters = dict(float='float')
 def filter(func_or_name):
     "Decorator to register a function as filter in the current jinja environment"
-    if isinstance(func_or_name, str):
+    if isinstance(func_or_name, six.string_types):
         def inner(func):
             _filters[func_or_name] = func.__name__
             return func
@@ -46,6 +50,21 @@
     return format(float(val), fmt)
 
 @filter
+def numfmt(val):
+    """Format numbers in decimal notation, but without excessive trailing 0's.
+    Default python float formatting will use scientific notation for some values,
+    or append trailing zeros with the 'f' format type, and the number of digits differs
+    between python 2 and 3."""
+    fpart, ipart = math.modf(val)
+    if fpart == 0:
+        return str(int(val))
+    # round to 10 to get identical representations in python 2 and 3
+    s = format(round(val, 10), '.10f').rstrip('0')
+    if s[-1] == '.':
+        s += '0'
+    return s
+
+@filter
 def firsttitle(hit):
     return hit.Hit_def.text.split('>')[0]
 
@@ -100,10 +119,10 @@
     hseq = hsp.Hsp_hseq.text
 
     if not qframe in (1, -1):
-        warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_query-frame of {}. (should be 1 or -1)".format(nodeid(hsp), qframe))
+        warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_query-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), qframe))
         qframe = -1 if qframe < 0 else 1
     if not hframe in (1, -1):
-        warnings.warn("Error in BlastXML input: Hsp node {} has a Hsp_hit-frame of {}. (should be 1 or -1)".format(nodeid(hsp), hframe))
+        warnings.warn("Error in BlastXML input: Hsp node {0} has a Hsp_hit-frame of {1}. (should be 1 or -1)".format(nodeid(hsp), hframe))
         hframe = -1 if hframe < 0 else 1
     
     def split(txt):
@@ -111,16 +130,16 @@
 
     for qs, mid, hs, offset in zip(split(qseq), split(midline), split(hseq), range(0, len(qseq), linewidth)):
         yield (
-            "Query  {:>7}  {}  {}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) +
-            "       {:7}  {}\n".format('', mid) +
-            "Subject{:>7}  {}  {}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe)
+            "Query  {0:>7}  {1}  {2}\n".format(qfrom+offset*qframe, qs, qfrom+(offset+len(qs)-1)*qframe) +
+            "       {0:7}  {1}\n".format('', mid) +
+            "Subject{0:>7}  {1}  {2}".format(hfrom+offset*hframe, hs, hfrom+(offset+len(hs)-1)*hframe)
         )
         
     if qfrom+(len(qseq)-1)*qframe != qto:
-        warnings.warn("Error in BlastXML input: Hsp node {} qseq length mismatch: from {} to {} length {}".format(
+        warnings.warn("Error in BlastXML input: Hsp node {0} qseq length mismatch: from {1} to {2} length {3}".format(
             nodeid(hsp), qfrom, qto, len(qseq)))
     if hfrom+(len(hseq)-1)*hframe != hto:
-        warnings.warn("Error in BlastXML input: Hsp node {} hseq length mismatch: from {} to {} length {}".format(
+        warnings.warn("Error in BlastXML input: Hsp node {0} hseq length mismatch: from {1} to {2} length {3}".format(
             nodeid(hsp), hfrom, hto, len(hseq)))
 
     
@@ -159,29 +178,30 @@
     raise Exception("frame should be either +1 or -1")
 
 def genelink(hit, type='genbank', hsp=None):
-    if not isinstance(hit, str):
+    if not isinstance(hit, six.string_types):
         hit = hitid(hit)
-    link = "http://www.ncbi.nlm.nih.gov/nucleotide/{}?report={}&log$=nuclalign".format(hit, type)
+    link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type)
     if hsp != None:
-        link += "&from={}&to={}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to'])
+        link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to'])
     return link
 
 
 # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139
 # I've removed the html escapes, since html escaping is already being performed by the template engine.
 
+# The r'\u0027' syntax doesn't work the way we need to in python 2.6 with unicode_literals
 _base_js_escapes = (
-    ('\\', r'\u005C'),
-    ('\'', r'\u0027'),
-    ('"', r'\u0022'),
-    # ('>', r'\u003E'),
-    # ('<', r'\u003C'),
-    # ('&', r'\u0026'),
-    # ('=', r'\u003D'),
-    # ('-', r'\u002D'),
-    # (';', r'\u003B'),
-    # (u'\u2028', r'\u2028'),
-    # (u'\u2029', r'\u2029')
+    ('\\', '\\u005C'),
+    ('\'', '\\u0027'),
+    ('"', '\\u0022'),
+    # ('>', '\\u003E'),
+    # ('<', '\\u003C'),
+    # ('&', '\\u0026'),
+    # ('=', '\\u003D'),
+    # ('-', '\\u002D'),
+    # (';', '\\u003B'),
+    (u'\u2028', '\\u2028'),
+    (u'\u2029', '\\u2029')
 )
 
 # Escape every ASCII character with a value less than 32. This is
@@ -198,7 +218,7 @@
     javascript snippets.
     """
 
-    value = str(value)
+    value = six.text_type(value)
 
     for bad, good in _js_escapes:
         value = value.replace(bad, good)
@@ -237,7 +257,10 @@
             try:
                 environment.filters[filtername] = getattr(self, funcname)
             except AttributeError:
-                environment.filters[filtername] = globals()[funcname]
+                try:
+                    environment.filters[filtername] = globals()[funcname]
+                except KeyError:
+                    environment.filters[filtername] = getattr(builtins, funcname)
 
     def render(self, output):
         template = self.environment.get_template(self.templatename)
@@ -249,13 +272,14 @@
                   ('Database', self.blast.BlastOutput_db),
         )
 
-        output.write(template.render(blast=self.blast,
-                                     iterations=self.blast.BlastOutput_iterations.Iteration,
-                                     colors=self.colors,
-                                     # match_colors=self.match_colors(),
-                                     # hit_info=self.hit_info(),
-                                     genelink=genelink,
-                                     params=params))
+        result = template.render(blast=self.blast,
+                                 iterations=self.blast.BlastOutput_iterations.Iteration,
+                                 colors=self.colors,
+                                 genelink=genelink,
+                                 params=params)
+        if six.PY2:
+            result = result.encode('utf-8')
+        output.write(result)
 
     @filter
     def match_colors(self, result):
@@ -320,21 +344,22 @@
                 return (float(hsp[path]) for hsp in hsps)
 
             yield dict(hit = hit,
-                      title = firsttitle(hit),
-                       maxscore = "{:.1f}".format(max(hsp_val('Hsp_bit-score'))),
-                       totalscore = "{:.1f}".format(sum(hsp_val('Hsp_bit-score'))),
-                       cover = "{:.0%}".format(cover_count / query_length),
-                       e_value = "{:.4g}".format(min(hsp_val('Hsp_evalue'))),
+                       title = firsttitle(hit),
+                       maxscore = "{0:.1f}".format(max(hsp_val('Hsp_bit-score'))),
+                       totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))),
+                       cover = "{0:.0%}".format(cover_count / query_length),
+                       e_value = "{0:.4g}".format(min(hsp_val('Hsp_evalue'))),
                        # FIXME: is this the correct formula vv?
-                       ident = "{:.0%}".format(float(min(hsp.Hsp_identity / blastxml_len(hsp) for hsp in hsps))),
+                       # float(...) because non-flooring division doesn't work with lxml elements in python 2.6
+                       ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))),
                        accession = hit.Hit_accession)
 
 
 def main():
     default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')
-    
+
     parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",
-                                     usage="{} [-i] INPUT [-o OUTPUT]".format(sys.argv[0]))
+                                     usage="{0} [-i] INPUT [-o OUTPUT]".format(sys.argv[0]))
     input_group = parser.add_mutually_exclusive_group(required=True)
     input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
                              help='The input Blast XML file, same as -i/--input')