diff blast2html.py @ 53:4217bb9cf1d3

depend on python 3; fix internal links with multiple iterations
author Jan Kanis <jan.code@jankanis.nl>
date Mon, 26 May 2014 13:07:13 +0200
parents b15a20c2372a
children 19c48f2ec775
line wrap: on
line diff
--- a/blast2html.py	Wed May 21 18:39:51 2014 +0200
+++ b/blast2html.py	Mon May 26 13:07:13 2014 +0200
@@ -1,7 +1,5 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-#
-# Actually runs under either python 2 or 3
 
 # Copyright The Hyve B.V. 2014
 # License: GPL version 3 or higher
@@ -13,7 +11,6 @@
 import warnings
 from os import path
 from itertools import repeat
-import six
 import argparse
 from lxml import objectify
 import jinja2
@@ -23,7 +20,7 @@
 _filters = {}
 def filter(func_or_name):
     "Decorator to register a function as filter in the current jinja environment"
-    if isinstance(func_or_name, six.string_types):
+    if isinstance(func_or_name, str):
         def inner(func):
             _filters[func_or_name] = func.__name__
             return func
@@ -99,9 +96,25 @@
     elif node.tag == 'Iteration':
         return int(node['Iteration_query-len'])
     raise Exception("Unknown XML node type: "+node.tag)
-        
 
 @filter
+def nodeid(node):
+    id = []
+    if node.tag == 'Hsp':
+        id.insert(0, node.Hsp_num.text)
+        node = node.getparent().getparent()
+        assert node.tag == 'Hit'
+    if node.tag == 'Hit':
+        id.insert(0, node.Hit_num.text)
+        node = node.getparent().getparent()
+        assert node.tag == 'Iteration'
+    if node.tag == 'Iteration':
+        id.insert(0, node['Iteration_iter-num'].text)
+        return '-'.join(id)
+    raise ValueError("The nodeid filter can only be applied to Hsp, Hit or Iteration nodes in a BlastXML document")
+
+    
+@filter
 def asframe(frame):
     if frame == 1:
         return 'Plus'
@@ -174,7 +187,7 @@
         self.templatename = templatename
 
         self.blast = objectify.parse(self.input).getroot()
-        self.loader = jinja2.FileSystemLoader(searchpath=templatedir, encoding='utf-8')
+        self.loader = jinja2.FileSystemLoader(searchpath=templatedir)
         self.environment = jinja2.Environment(loader=self.loader,
                                               lstrip_blocks=True, trim_blocks=True, autoescape=True)
 
@@ -238,7 +251,7 @@
                 count = 1
             matches.append((count * percent_multiplier, self.colors[last] if last != 255 else 'transparent'))
 
-            yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=firsttitle(hit))
+            yield dict(colors=matches, hit=hit, defline=firsttitle(hit))
 
     @filter
     def queryscale(self, result):
@@ -269,8 +282,7 @@
                 return (float(hsp[path]) for hsp in hsps)
 
             yield dict(hit = hit,
-                       title = firsttitle(hit),
-                       link_id = hit.Hit_num,
+                      title = firsttitle(hit),
                        maxscore = "{:.1f}".format(max(hsp_val('Hsp_bit-score'))),
                        totalscore = "{:.1f}".format(sum(hsp_val('Hsp_bit-score'))),
                        cover = "{:.0%}".format(cover_count / query_length),
@@ -288,16 +300,16 @@
     input_group = parser.add_mutually_exclusive_group(required=True)
     input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
                              help='The input Blast XML file, same as -i/--input')
-    input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r', encoding='utf-8'), 
+    input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'), 
                              help='The input Blast XML file')
-    parser.add_argument('-o', '--output', type=argparse.FileType(mode='w', encoding='utf-8'), default=sys.stdout,
+    parser.add_argument('-o', '--output', type=argparse.FileType(mode='w'), default=sys.stdout,
                         help='The output html file')
     # We just want the file name here, so jinja can open the file
     # itself. But it is easier to just use a FileType so argparse can
     # handle the errors. This introduces a small race condition when
     # jinja later tries to re-open the template file, but we don't
     # care too much.
-    parser.add_argument('--template', type=argparse.FileType(mode='r', encoding='utf-8'), default=default_template,
+    parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
                         help='The template file to use. Defaults to blast_html.html.jinja')
 
     args = parser.parse_args()