comparison visualise.py @ 7:9e7927673089

intermediate commit before converting some tables to divs
author Jan Kanis <jan.code@jankanis.nl>
date Thu, 08 May 2014 16:51:52 +0200
parents 1df2bfce5c24
children 2fbdf2eb27b4
comparison
equal deleted inserted replaced
6:d20ce91e1297 7:9e7927673089
2 2
3 # Copyright The Hyve B.V. 2014 3 # Copyright The Hyve B.V. 2014
4 # License: GPL version 3 or higher 4 # License: GPL version 3 or higher
5 5
6 import sys 6 import sys
7 import math
7 import warnings 8 import warnings
8 from itertools import repeat 9 from itertools import repeat
9 from lxml import objectify 10 from lxml import objectify
10 import jinja2 11 import jinja2
11 12
24 25
25 colors = ['black', 'blue', 'green', 'magenta', 'red'] 26 colors = ['black', 'blue', 'green', 'magenta', 'red']
26 27
27 blast = objectify.parse('blast xml example1.xml').getroot() 28 blast = objectify.parse('blast xml example1.xml').getroot()
28 loader = jinja2.FileSystemLoader(searchpath='.') 29 loader = jinja2.FileSystemLoader(searchpath='.')
29 environment = jinja2.Environment(loader=loader) 30 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True)
30 environment.filters['color'] = lambda length: match_colors[color_idx(length)] 31 environment.filters['color'] = lambda length: match_colors[color_idx(length)]
32
33 query_length = int(blast["BlastOutput_query-len"])
34
35 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit
36 # sort hits by longest hotspot first
37 ordered_hits = sorted(hits,
38 key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp),
39 reverse=True)
31 40
32 def match_colors(): 41 def match_colors():
33 """ 42 """
34 An iterator that yields lists of length-color pairs. 43 An iterator that yields lists of length-color pairs.
35 """ 44 """
45
46 percent_multiplier = 100 / query_length
36 47
37 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit
38 query_length = blast["BlastOutput_query-len"]
39 # sort hits by longest hotspot first
40 hits = sorted(hits, key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), reverse=True)
41
42 for hit in hits: 48 for hit in hits:
43 # sort hotspots from short to long, so we can overwrite index colors of 49 # sort hotspots from short to long, so we can overwrite index colors of
44 # short matches with those of long ones. 50 # short matches with those of long ones.
45 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) 51 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len'])
46 table = bytearray([255]) * query_length 52 table = bytearray([255]) * query_length
47 for hsp in hotspots: 53 for hsp in hotspots:
48 frm = hsp['Hsp_query-from'] - 1 54 frm = hsp['Hsp_query-from'] - 1
49 to = hsp['Hsp_query-to'] - 1 55 to = int(hsp['Hsp_query-to'])
50 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) 56 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm)
51 57
52 matches = [] 58 matches = []
53 last = table[0] 59 last = table[0]
54 count = 0 60 count = 0
55 for i in range(int(query_length)): 61 for i in range(query_length):
56 if table[i] == last: 62 if table[i] == last:
57 count += 1 63 count += 1
58 continue 64 continue
59 matches.append((count, colors[last] if last != 255 else 'none')) 65 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none'))
60 last = table[i] 66 last = table[i]
61 count = 1 67 count = 1
62 matches.append((count, colors[last] if last != 255 else 'none')) 68 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none'))
63 69
64 yield dict(colors=matches, link="#hit"+hit.Hit_num.text) 70 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=hit.Hit_def)
65 71
66 72
73 def queryscale():
74 max_labels = 10
75 skip = math.ceil(query_length / max_labels)
76 percent_multiplier = 100 / query_length
77 for i in range(1, query_length+1):
78 if i % skip == 0:
79 yield dict(label = i, width = skip * percent_multiplier)
80 if query_length % skip != 0:
81 yield dict(label = query_length, width = (query_length % skip) * percent_multiplier)
82
83
84 def hit_info():
85
86 for hit in ordered_hits:
87 hsps = hit.Hit_hsps.Hsp
88
89 cover = [False] * query_length
90 for hsp in hsps:
91 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, int(hsp['Hsp_align-len']))
92 cover_count = cover.count(True)
93
94 def hsp_val(path):
95 return (hsp[path] for hsp in hsps)
96
97 yield dict(description = hit.Hit_def,
98 maxscore = max(hsp_val('Hsp_bit-score')),
99 totalscore = sum(hsp_val('Hsp_bit-score')),
100 cover = "{:.0%}".format(cover_count / query_length),
101 e_value = min(hsp_val('Hsp_evalue')),
102 # FIXME: is this the correct formula vv?
103 ident = "{:.0%}".format(min(hsp.Hsp_identity / hsp['Hsp_align-len'] for hsp in hsps)),
104 accession = hit.Hit_accession)
105
106
67 def main(): 107 def main():
68 template = environment.get_template('visualise.html.jinja') 108 template = environment.get_template('visualise.html.jinja')
69 109
70 params = (('Query ID', blast["BlastOutput_query-ID"]), 110 params = (('Query ID', blast["BlastOutput_query-ID"]),
71 ('Query definition', blast["BlastOutput_query-def"]), 111 ('Query definition', blast["BlastOutput_query-def"]),
76 116
77 if len(blast.BlastOutput_iterations.Iteration) > 1: 117 if len(blast.BlastOutput_iterations.Iteration) > 1:
78 warnings.warn("Multiple 'Iteration' elements found, showing only the first") 118 warnings.warn("Multiple 'Iteration' elements found, showing only the first")
79 119
80 sys.stdout.write(template.render(blast=blast, 120 sys.stdout.write(template.render(blast=blast,
81 hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, 121 length=query_length,
122 #hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit,
82 colors=colors, 123 colors=colors,
83 match_colors=match_colors(), 124 match_colors=match_colors(),
125 queryscale=queryscale(),
126 hit_info=hit_info(),
84 params=params)) 127 params=params))
85 128
86 main() 129 main()