diff visualise.py @ 10:2fbdf2eb27b4

All data is displayed now, still some formatting to do
author Jan Kanis <jan.code@jankanis.nl>
date Fri, 09 May 2014 18:16:48 +0200
parents 9e7927673089
children 7660519f2dc9
line wrap: on
line diff
--- a/visualise.py	Thu May 08 18:59:32 2014 +0200
+++ b/visualise.py	Fri May 09 18:16:48 2014 +0200
@@ -11,6 +11,21 @@
 import jinja2
 
 
+blast = objectify.parse('blast xml example1.xml').getroot()
+loader = jinja2.FileSystemLoader(searchpath='.')
+environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True)
+
+def filter(func_or_name):
+    if isinstance(func_or_name, str):
+        def inner(func):
+            environment.filters[func_or_name] = func
+            return func
+        return inner
+    else:
+        environment.filters[func_or_name.__name__] = func_or_name
+        return func_or_name
+
+
 def color_idx(length):
     if length < 40:
         return 0
@@ -22,20 +37,66 @@
         return 3
     return 4
 
-
 colors = ['black', 'blue', 'green', 'magenta', 'red']
 
-blast = objectify.parse('blast xml example1.xml').getroot()
-loader = jinja2.FileSystemLoader(searchpath='.')
-environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True)
 environment.filters['color'] = lambda length: match_colors[color_idx(length)]
 
+@filter
+def fmt(val, fmt):
+    return format(float(val), fmt)
+
+@filter
+def firsttitle(hit):
+    return hit.Hit_def.text.split('>')[0]
+
+@filter
+def othertitles(hit):
+    """Split a hit.Hit_def that contains multiple titles up, splitting out the hit ids from the titles."""
+    id_titles = hit.Hit_def.text.split('>')
+
+    titles = []
+    for t in id_titles[1:]:
+        fullid, title = t.split(' ', 1)
+        id = fullid.split('|', 2)[2]
+        titles.append(dict(id = id,
+                           fullid = fullid,
+                           title = title))
+    return titles
+
+@filter
+def hitid(hit):
+    return hit.Hit_id.text.split('|', 2)[1]
+
+@filter
+def seqid(hit):
+    return hit.Hit_id.text.split('|', 2)[2]
+
+@filter
+def alignment_pre(hsp):
+    return (
+        "Query   {:>7s}  {}  {}\n".format(hsp['Hsp_query-from'], hsp.Hsp_qseq, hsp['Hsp_query-to']) +
+        "        {:7s}  {}\n".format('', hsp.Hsp_midline) +
+        "Subject {:>7s}  {}  {}".format(hsp['Hsp_hit-from'], hsp.Hsp_hseq, hsp['Hsp_hit-to']))
+
+@filter('len')
+def hsplen(node):
+    return int(node['Hsp_align-len'])
+
+@filter
+def asframe(frame):
+    if frame == 1:
+        return 'Plus'
+    elif frame == -1:
+        return 'Minus'
+    raise Exception("frame should be either +1 or -1")
+
+
 query_length = int(blast["BlastOutput_query-len"])
 
 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit
 # sort hits by longest hotspot first
 ordered_hits = sorted(hits,
-                      key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp),
+                      key=lambda h: max(hsplen(hsp) for hsp in h.Hit_hsps.Hsp),
                       reverse=True)
 
 def match_colors():
@@ -48,12 +109,12 @@
     for hit in hits:
         # sort hotspots from short to long, so we can overwrite index colors of
         # short matches with those of long ones.
-        hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len'])
+        hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsplen(hsp))
         table = bytearray([255]) * query_length
         for hsp in hotspots:
             frm = hsp['Hsp_query-from'] - 1
             to = int(hsp['Hsp_query-to'])
-            table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm)
+            table[frm:to] = repeat(color_idx(hsplen(hsp)), to - frm)
 
         matches = []
         last = table[0]
@@ -67,7 +128,7 @@
             count = 1
         matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none'))
 
-        yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=hit.Hit_def)
+        yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=firsttitle(hit))
 
 
 def queryscale():
@@ -88,22 +149,23 @@
 
         cover = [False] * query_length
         for hsp in hsps:
-            cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, int(hsp['Hsp_align-len']))
+            cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, hsplen(hsp))
         cover_count = cover.count(True)
         
         def hsp_val(path):
             return (hsp[path] for hsp in hsps)
         
-        yield dict(description = hit.Hit_def,
-                   maxscore = max(hsp_val('Hsp_bit-score')),
-                   totalscore = sum(hsp_val('Hsp_bit-score')),
+        yield dict(title = firsttitle(hit),
+                   link_id = hit.Hit_num,
+                   maxscore = "{:.1f}".format(float(max(hsp_val('Hsp_bit-score')))),
+                   totalscore = "{:.1f}".format(float(sum(hsp_val('Hsp_bit-score')))),
                    cover = "{:.0%}".format(cover_count / query_length),
-                   e_value = min(hsp_val('Hsp_evalue')),
+                   e_value = "{:.4g}".format(float(min(hsp_val('Hsp_evalue')))),
                    # FIXME: is this the correct formula vv?
-                   ident = "{:.0%}".format(min(hsp.Hsp_identity / hsp['Hsp_align-len'] for hsp in hsps)),
+                   ident = "{:.0%}".format(float(min(hsp.Hsp_identity / hsplen(hsp) for hsp in hsps))),
                    accession = hit.Hit_accession)
-                   
-        
+
+
 def main():
     template = environment.get_template('visualise.html.jinja')
 
@@ -119,7 +181,7 @@
 
     sys.stdout.write(template.render(blast=blast,
                                      length=query_length,
-                                     #hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit,
+                                     hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit,
                                      colors=colors,
                                      match_colors=match_colors(),
                                      queryscale=queryscale(),