comparison visualise.py @ 10:2fbdf2eb27b4

All data is displayed now, still some formatting to do
author Jan Kanis <jan.code@jankanis.nl>
date Fri, 09 May 2014 18:16:48 +0200
parents 9e7927673089
children 7660519f2dc9
comparison
equal deleted inserted replaced
9:bbdc8fb0de2b 10:2fbdf2eb27b4
9 from itertools import repeat 9 from itertools import repeat
10 from lxml import objectify 10 from lxml import objectify
11 import jinja2 11 import jinja2
12 12
13 13
14 blast = objectify.parse('blast xml example1.xml').getroot()
15 loader = jinja2.FileSystemLoader(searchpath='.')
16 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True)
17
18 def filter(func_or_name):
19 if isinstance(func_or_name, str):
20 def inner(func):
21 environment.filters[func_or_name] = func
22 return func
23 return inner
24 else:
25 environment.filters[func_or_name.__name__] = func_or_name
26 return func_or_name
27
28
14 def color_idx(length): 29 def color_idx(length):
15 if length < 40: 30 if length < 40:
16 return 0 31 return 0
17 elif length < 50: 32 elif length < 50:
18 return 1 33 return 1
20 return 2 35 return 2
21 elif length < 200: 36 elif length < 200:
22 return 3 37 return 3
23 return 4 38 return 4
24 39
25
26 colors = ['black', 'blue', 'green', 'magenta', 'red'] 40 colors = ['black', 'blue', 'green', 'magenta', 'red']
27 41
28 blast = objectify.parse('blast xml example1.xml').getroot()
29 loader = jinja2.FileSystemLoader(searchpath='.')
30 environment = jinja2.Environment(loader=loader, lstrip_blocks=True, trim_blocks=True, autoescape=True)
31 environment.filters['color'] = lambda length: match_colors[color_idx(length)] 42 environment.filters['color'] = lambda length: match_colors[color_idx(length)]
43
44 @filter
45 def fmt(val, fmt):
46 return format(float(val), fmt)
47
48 @filter
49 def firsttitle(hit):
50 return hit.Hit_def.text.split('>')[0]
51
52 @filter
53 def othertitles(hit):
54 """Split a hit.Hit_def that contains multiple titles up, splitting out the hit ids from the titles."""
55 id_titles = hit.Hit_def.text.split('>')
56
57 titles = []
58 for t in id_titles[1:]:
59 fullid, title = t.split(' ', 1)
60 id = fullid.split('|', 2)[2]
61 titles.append(dict(id = id,
62 fullid = fullid,
63 title = title))
64 return titles
65
66 @filter
67 def hitid(hit):
68 return hit.Hit_id.text.split('|', 2)[1]
69
70 @filter
71 def seqid(hit):
72 return hit.Hit_id.text.split('|', 2)[2]
73
74 @filter
75 def alignment_pre(hsp):
76 return (
77 "Query {:>7s} {} {}\n".format(hsp['Hsp_query-from'], hsp.Hsp_qseq, hsp['Hsp_query-to']) +
78 " {:7s} {}\n".format('', hsp.Hsp_midline) +
79 "Subject {:>7s} {} {}".format(hsp['Hsp_hit-from'], hsp.Hsp_hseq, hsp['Hsp_hit-to']))
80
81 @filter('len')
82 def hsplen(node):
83 return int(node['Hsp_align-len'])
84
85 @filter
86 def asframe(frame):
87 if frame == 1:
88 return 'Plus'
89 elif frame == -1:
90 return 'Minus'
91 raise Exception("frame should be either +1 or -1")
92
32 93
33 query_length = int(blast["BlastOutput_query-len"]) 94 query_length = int(blast["BlastOutput_query-len"])
34 95
35 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit 96 hits = blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit
36 # sort hits by longest hotspot first 97 # sort hits by longest hotspot first
37 ordered_hits = sorted(hits, 98 ordered_hits = sorted(hits,
38 key=lambda h: max(hsp['Hsp_align-len'] for hsp in h.Hit_hsps.Hsp), 99 key=lambda h: max(hsplen(hsp) for hsp in h.Hit_hsps.Hsp),
39 reverse=True) 100 reverse=True)
40 101
41 def match_colors(): 102 def match_colors():
42 """ 103 """
43 An iterator that yields lists of length-color pairs. 104 An iterator that yields lists of length-color pairs.
46 percent_multiplier = 100 / query_length 107 percent_multiplier = 100 / query_length
47 108
48 for hit in hits: 109 for hit in hits:
49 # sort hotspots from short to long, so we can overwrite index colors of 110 # sort hotspots from short to long, so we can overwrite index colors of
50 # short matches with those of long ones. 111 # short matches with those of long ones.
51 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsp['Hsp_align-len']) 112 hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsplen(hsp))
52 table = bytearray([255]) * query_length 113 table = bytearray([255]) * query_length
53 for hsp in hotspots: 114 for hsp in hotspots:
54 frm = hsp['Hsp_query-from'] - 1 115 frm = hsp['Hsp_query-from'] - 1
55 to = int(hsp['Hsp_query-to']) 116 to = int(hsp['Hsp_query-to'])
56 table[frm:to] = repeat(color_idx(hsp['Hsp_align-len']), to - frm) 117 table[frm:to] = repeat(color_idx(hsplen(hsp)), to - frm)
57 118
58 matches = [] 119 matches = []
59 last = table[0] 120 last = table[0]
60 count = 0 121 count = 0
61 for i in range(query_length): 122 for i in range(query_length):
65 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) 126 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none'))
66 last = table[i] 127 last = table[i]
67 count = 1 128 count = 1
68 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none')) 129 matches.append((count * percent_multiplier, colors[last] if last != 255 else 'none'))
69 130
70 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=hit.Hit_def) 131 yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=firsttitle(hit))
71 132
72 133
73 def queryscale(): 134 def queryscale():
74 max_labels = 10 135 max_labels = 10
75 skip = math.ceil(query_length / max_labels) 136 skip = math.ceil(query_length / max_labels)
86 for hit in ordered_hits: 147 for hit in ordered_hits:
87 hsps = hit.Hit_hsps.Hsp 148 hsps = hit.Hit_hsps.Hsp
88 149
89 cover = [False] * query_length 150 cover = [False] * query_length
90 for hsp in hsps: 151 for hsp in hsps:
91 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, int(hsp['Hsp_align-len'])) 152 cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, hsplen(hsp))
92 cover_count = cover.count(True) 153 cover_count = cover.count(True)
93 154
94 def hsp_val(path): 155 def hsp_val(path):
95 return (hsp[path] for hsp in hsps) 156 return (hsp[path] for hsp in hsps)
96 157
97 yield dict(description = hit.Hit_def, 158 yield dict(title = firsttitle(hit),
98 maxscore = max(hsp_val('Hsp_bit-score')), 159 link_id = hit.Hit_num,
99 totalscore = sum(hsp_val('Hsp_bit-score')), 160 maxscore = "{:.1f}".format(float(max(hsp_val('Hsp_bit-score')))),
161 totalscore = "{:.1f}".format(float(sum(hsp_val('Hsp_bit-score')))),
100 cover = "{:.0%}".format(cover_count / query_length), 162 cover = "{:.0%}".format(cover_count / query_length),
101 e_value = min(hsp_val('Hsp_evalue')), 163 e_value = "{:.4g}".format(float(min(hsp_val('Hsp_evalue')))),
102 # FIXME: is this the correct formula vv? 164 # FIXME: is this the correct formula vv?
103 ident = "{:.0%}".format(min(hsp.Hsp_identity / hsp['Hsp_align-len'] for hsp in hsps)), 165 ident = "{:.0%}".format(float(min(hsp.Hsp_identity / hsplen(hsp) for hsp in hsps))),
104 accession = hit.Hit_accession) 166 accession = hit.Hit_accession)
105 167
106 168
107 def main(): 169 def main():
108 template = environment.get_template('visualise.html.jinja') 170 template = environment.get_template('visualise.html.jinja')
109 171
110 params = (('Query ID', blast["BlastOutput_query-ID"]), 172 params = (('Query ID', blast["BlastOutput_query-ID"]),
111 ('Query definition', blast["BlastOutput_query-def"]), 173 ('Query definition', blast["BlastOutput_query-def"]),
117 if len(blast.BlastOutput_iterations.Iteration) > 1: 179 if len(blast.BlastOutput_iterations.Iteration) > 1:
118 warnings.warn("Multiple 'Iteration' elements found, showing only the first") 180 warnings.warn("Multiple 'Iteration' elements found, showing only the first")
119 181
120 sys.stdout.write(template.render(blast=blast, 182 sys.stdout.write(template.render(blast=blast,
121 length=query_length, 183 length=query_length,
122 #hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit, 184 hits=blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit,
123 colors=colors, 185 colors=colors,
124 match_colors=match_colors(), 186 match_colors=match_colors(),
125 queryscale=queryscale(), 187 queryscale=queryscale(),
126 hit_info=hit_info(), 188 hit_info=hit_info(),
127 params=params)) 189 params=params))