Repository 'blast2html'
hg clone https://toolshed.g2.bx.psu.edu/repos/jankanis/blast2html

Changeset 19:67ddcb807b7d (2014-05-13)
Previous changeset 18:4434ffab721a (2014-05-13) Next changeset 20:53cd304c5f26 (2014-05-14)
Commit message:
make it work with multiple queries
modified:
blast_html.html.jinja
blast_html.py
b
diff -r 4434ffab721a -r 67ddcb807b7d blast_html.html.jinja
--- a/blast_html.html.jinja Tue May 13 15:26:20 2014 +0200
+++ b/blast_html.html.jinja Tue May 13 18:06:36 2014 +0200
[
b'@@ -2,6 +2,7 @@\n <html>\n   <head>\n     <meta charset="UTF-8">\n+    <meta name=generator content="blast_html; see ...">\n     \n     <title>Blast output</title>\n     \n@@ -97,12 +98,12 @@\n       margin: auto;\n       }\n \n-      .centered, #defline, div.legend, div.tablewrapper {\n+      .centered, .defline, div.legend, div.tablewrapper {\n       margin-left: auto;\n       margin-right: auto;\n       }\n \n-      #defline {\n+      .defline {\n       background-color: white;\n       border: 1px solid black;\n       margin: .5em auto;\n@@ -338,197 +339,213 @@\n   \n   <body>\n     <div id=content>\n-      <h1>Nucleotide Sequence ({{length}} letters)</h1>\n \n-      <section class=header>\n-\n-        <table class=headerdata>\n-          {% for param, value in params %}\n-            <tr><td class=param>{{param}}:</td><td>{{value}}</td></tr>\n-          {% endfor %}\n-        </table>\n-        \n-      </section>\n-      \n-      {% if not (blast.BlastOutput_iterations.findall(\'Iteration\') and\n-                 blast.BlastOutput_iterations.Iteration.Iteration_hits.findall(\'Hit\')) %}\n+      {% if not blast.BlastOutput_iterations.findall(\'Iteration\') %}\n       <section class=nodata>\n-        <h2>No Results</h2>\n-        <div class=grey>\n-          No Matches\n-        </div>\n-      </section>\n-      {% else %}\n-\n-\n-      \n-      <section class=graphics>\n-        <h2>Graphic Summary</h2>\n-\n+        <h1>No data</h1>\n         <div class=grey>\n-          <h3 class=centered>Distribution of {{hits|length}} Blast Hits on the Query Sequence</h3>\n-          \n-          <div id=defline>Mouse-over to show defline and scores, click to show alignments</div>\n-\n-          <div class=graphic>\n-            <h4 class=darkHeader>Color key for alignment scores</h4>\n-            <div class=legend><div class=graphicrow>\n-                <div class=graphicitem style="background-color: {{colors[0]}}">&lt;40</div>\n-                <div class=graphicitem style="background-color: {{colors[1]}}">40\xe2\x80\x9350</div>\n-                <div class=graphicitem style="background-color: {{colors[2]}}">50\xe2\x80\x9380</div>\n-                <div class=graphicitem style="background-color: {{colors[3]}}">80\xe2\x80\x93200</div>\n-                <div class=graphicitem style="background-color: {{colors[4]}}">200\xe2\x89\xa4</div>\n-            </div></div>\n-            <div style="clear: left"></div>\n-\n-            <div class=tablewrapper>\n-\n-              <div class=scale>\n-                <div>query:</div>\n-                <div class=graphicrow>\n-                  {% for s in queryscale %}\n-                  <div class=graphicitem style="width: {{s.width}}%">\n-                    <div>{{s.label}}</div>\n-                  </div>\n-                  {% endfor %}\n-                </div>\n-                <div style="clear: left"></div>\n-              </div>\n-              \n-              {% for line in match_colors %}\n-              <a class=matchresult\n-                 href="{{line.link}}"\n-                 onmouseover=\'document.getElementById("defline").innerHTML="{{line.defline|js_string_escape}}"\'\n-                 onmouseout=\'document.getElementById("defline").innerHTML="Mouse-over to show defline and scores, click to show alignments"\'\n-                 title="{{line.defline}}">\n-                <div class="matchrow graphicrow">\n-                  {% for match in line.colors %}\n-                  <div class="matchitem graphicitem"\n-                       style="background-color: {{match[1]}}; width: {{match[0]}}%"></div>\n-                  {% endfor %}\n-                </div>\n-              </a>\n-              \n-              {% endfor %}\n-            </div>\n-          </div>\n+          No matches\n         </div>\n       </section>\n \n-\n+      {% else %}\n+      {% for result in blast.BlastOutput_iterations.Iteration %}\n+      \n+      <section class=match>\n       \n-      <section class=descriptions>\n-        <h2>Descriptions</h2>\n+        <h1>Nucleotide Sequence ({{result|len}} letters)</h1>\n+\n+        <section class=header>\n+\n+      '..b' class=b>Length:</span> {{hit.Hit_len}}\n-                  <span class=b>Number of Matches:</span> {{hit.Hit_hsps.Hsp|length}}\n-                </p>\n+                {% for hsp in hit.Hit_hsps.Hsp %}\n+                <div class=hotspot>\n+                  <p class=range>\n+                    <span class=range>Range {{hsp.Hsp_num}}: {{hsp[\'Hsp_hit-from\']}} to {{hsp[\'Hsp_hit-to\']}}</span>\n+                    <a class=range href="{{genelink(hit|hitid, \'genbank\', hsp)}}">GenBank</a>\n+                    <a class=range href="{{genelink(hit|hitid, \'graph\', hsp)}}">Graphics</a>\n+                  </p>\n+\n+                  <table class=hotspotstable>\n+                    <tr>\n+                      <th>Score</th><th>Expect</th><th>Identities</th><th>Gaps</th><th>Strand</th>\n+                    </tr>\n+                    <tr>\n+                      <td>{{hsp[\'Hsp_bit-score\']|fmt(\'.1f\')}} bits({{hsp.Hsp_score}})</td>\n+                      <td>{{hsp.Hsp_evalue|fmt(\'.1f\')}}</td>\n+                      <td>{{ hsp.Hsp_identity }}/{{ hsp|len }}({{\n+                        (hsp.Hsp_identity/hsp|len) |fmt(\'.0%\') }})</td>\n+                      <td>{{ hsp.Hsp_gaps }}/{{ hsp|len\n+                        }}({{ (hsp.Hsp_gaps / hsp|len) | fmt(\'.0%\') }})</td>\n+                      <td>{{ hsp[\'Hsp_query-frame\']|asframe }}/{{ hsp[\'Hsp_hit-frame\']|asframe }}</td>\n+                    </tr>\n+                  </table>\n+\n+                  <pre class=alignmentgraphic>{{hsp|alignment_pre}}</pre>\n+                </div>\n+                {% endfor %}\n+\n               </div>\n \n-              {% if hit|othertitles|length %}\n-              <a class=showmoretitles onclick="toggle_visibility(\'moretitles{{hit.Hit_num|js_string_escape}}\'); return false;" href=\'\'>\n-                See {{hit|othertitles|length}} more title(s)\n-              </a>\n-\n-              <div class=moretitles id=moretitles{{hit.Hit_num}} style="display: none">\n-                {% for title in hit|othertitles %}\n-                <div class=title>\n-                  <p class=hittitle>{{title.title}}</p>\n-                  <p class=titleinfo>\n-                    <span class=b>Sequence ID:</span> <a href="{{genelink(title.hitid)}}">{{title.id}}</a>\n-                  </p>\n-                </div>\n-                {% endfor %}\n-              </div>\n-              {% endif %}\n-\n-              {% for hsp in hit.Hit_hsps.Hsp %}\n-              <div class=hotspot>\n-                <p class=range>\n-                  <span class=range>Range {{hsp.Hsp_num}}: {{hsp[\'Hsp_hit-from\']}} to {{hsp[\'Hsp_hit-to\']}}</span>\n-                  <a class=range href="{{genelink(hit|hitid, \'genbank\', hsp)}}">GenBank</a>\n-                  <a class=range href="{{genelink(hit|hitid, \'graph\', hsp)}}">Graphics</a>\n-                </p>\n+              {% endfor %}\n+          </div></div>\n+        </section>\n+        {% endif %}\n \n-                <table class=hotspotstable>\n-                  <tr>\n-                    <th>Score</th><th>Expect</th><th>Identities</th><th>Gaps</th><th>Strand</th>\n-                  </tr>\n-                  <tr>\n-                    <td>{{hsp[\'Hsp_bit-score\']|fmt(\'.1f\')}} bits({{hsp.Hsp_score}})</td>\n-                    <td>{{hsp.Hsp_evalue|fmt(\'.1f\')}}</td>\n-                    <td>{{ hsp.Hsp_identity }}/{{ hsp|len }}({{\n-                      (hsp.Hsp_identity/hsp|len) |fmt(\'.0%\') }})</td>\n-                    <td>{{ hsp.Hsp_gaps }}/{{ hsp|len\n-                      }}({{ (hsp.Hsp_gaps / hsp|len) | fmt(\'.0%\') }})</td>\n-                    <td>{{ hsp[\'Hsp_query-frame\']|asframe }}/{{ hsp[\'Hsp_hit-frame\']|asframe }}</td>\n-                  </tr>\n-                </table>\n-\n-                <pre class=alignmentgraphic>{{hsp|alignment_pre}}</pre>\n-              </div>\n-              {% endfor %}\n-              \n-            </div>\n-\n-            {% endfor %}\n-        </div></div>\n-      </section>\n-\n-      {% endif %}\n+        {% endfor %}\n+        {% endif %}\n     </div>\n   </body>\n </html>\n'
b
diff -r 4434ffab721a -r 67ddcb807b7d blast_html.py
--- a/blast_html.py Tue May 13 15:26:20 2014 +0200
+++ b/blast_html.py Tue May 13 18:06:36 2014 +0200
[
@@ -19,11 +19,11 @@
     "Decorator to register a function as filter in the current jinja environment"
     if isinstance(func_or_name, str):
         def inner(func):
-            _filters[func_or_name] = func
+            _filters[func_or_name] = func.__name__
             return func
         return inner
     else:
-        _filters[func_or_name.__name__] = func_or_name
+        _filters[func_or_name.__name__] = func_or_name.__name__
         return func_or_name
 
 
@@ -78,8 +78,13 @@
     )
 
 @filter('len')
-def hsplen(node):
-    return int(node['Hsp_align-len'])
+def blastxml_len(node):
+    if node.tag == 'Hsp':
+        return int(node['Hsp_align-len'])
+    elif node.tag == 'Iteration':
+        return int(node['Iteration_query-len'])
+    raise Exception("Unknown XML node type: "+node.tag)
+        
 
 @filter
 def asframe(frame):
@@ -134,6 +139,13 @@
 
     return value
 
+@filter
+def hits(result):
+    # sort hits by longest hotspot first
+    return sorted(result.Iteration_hits.findall('Hit'),
+                  key=lambda h: max(blastxml_len(hsp) for hsp in h.Hit_hsps.Hsp),
+                  reverse=True)
+
 
 
 class BlastVisualize:
@@ -151,15 +163,15 @@
         self.environment = jinja2.Environment(loader=self.loader,
                                               lstrip_blocks=True, trim_blocks=True, autoescape=True)
 
-        self.environment.filters.update(_filters)
-        self.environment.filters['color'] = lambda length: match_colors[color_idx(length)]
+        self._addfilters(self.environment)
+
 
-        self.query_length = int(self.blast["BlastOutput_query-len"])
-        self.hits = self.blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit
-        # sort hits by longest hotspot first
-        self.ordered_hits = sorted(self.hits,
-                                   key=lambda h: max(hsplen(hsp) for hsp in h.Hit_hsps.Hsp),
-                                   reverse=True)
+    def _addfilters(self, environment):
+        for filtername, funcname in _filters.items():
+            try:
+                environment.filters[filtername] = getattr(self, funcname)
+            except AttributeError:
+                environment.filters[filtername] = globals()[funcname]
 
     def render(self, output):
         template = self.environment.get_template(self.templatename)
@@ -171,41 +183,38 @@
                   ('Database', self.blast.BlastOutput_db),
         )
 
-        if len(self.blast.BlastOutput_iterations.Iteration) > 1:
-            warnings.warn("Multiple 'Iteration' elements found, showing only the first")
-
         output.write(template.render(blast=self.blast,
-                                     length=self.query_length,
-                                     hits=self.blast.BlastOutput_iterations.Iteration.Iteration_hits.Hit,
+                                     iterations=self.blast.BlastOutput_iterations.Iteration,
                                      colors=self.colors,
-                                     match_colors=self.match_colors(),
-                                     queryscale=self.queryscale(),
-                                     hit_info=self.hit_info(),
+                                     # match_colors=self.match_colors(),
+                                     # hit_info=self.hit_info(),
                                      genelink=genelink,
                                      params=params))
-        
 
-    def match_colors(self):
+    @filter
+    def match_colors(self, result):
         """
         An iterator that yields lists of length-color pairs. 
         """
 
-        percent_multiplier = 100 / self.query_length
+        query_length = blastxml_len(result)
+        
+        percent_multiplier = 100 / query_length
 
-        for hit in self.hits:
+        for hit in hits(result):
             # sort hotspots from short to long, so we can overwrite index colors of
             # short matches with those of long ones.
-            hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: hsplen(hsp))
-            table = bytearray([255]) * self.query_length
+            hotspots = sorted(hit.Hit_hsps.Hsp, key=lambda hsp: blastxml_len(hsp))
+            table = bytearray([255]) * query_length
             for hsp in hotspots:
                 frm = hsp['Hsp_query-from'] - 1
                 to = int(hsp['Hsp_query-to'])
-                table[frm:to] = repeat(color_idx(hsplen(hsp)), to - frm)
+                table[frm:to] = repeat(color_idx(blastxml_len(hsp)), to - frm)
 
             matches = []
             last = table[0]
             count = 0
-            for i in range(self.query_length):
+            for i in range(query_length):
                 if table[i] == last:
                     count += 1
                     continue
@@ -216,25 +225,28 @@
 
             yield dict(colors=matches, link="#hit"+hit.Hit_num.text, defline=firsttitle(hit))
 
-
-    def queryscale(self):
-        skip = math.ceil(self.query_length / self.max_scale_labels)
-        percent_multiplier = 100 / self.query_length
-        for i in range(1, self.query_length+1):
+    @filter
+    def queryscale(self, result):
+        query_length = blastxml_len(result)
+        skip = math.ceil(query_length / self.max_scale_labels)
+        percent_multiplier = 100 / query_length
+        for i in range(1, query_length+1):
             if i % skip == 0:
                 yield dict(label = i, width = skip * percent_multiplier)
-        if self.query_length % skip != 0:
-            yield dict(label = self.query_length, width = (self.query_length % skip) * percent_multiplier)
-
+        if query_length % skip != 0:
+            yield dict(label = query_length, width = (query_length % skip) * percent_multiplier)
 
-    def hit_info(self):
+    @filter
+    def hit_info(self, result):
 
-        for hit in self.ordered_hits:
+        query_length = blastxml_len(result)
+
+        for hit in hits(result):
             hsps = hit.Hit_hsps.Hsp
 
-            cover = [False] * self.query_length
+            cover = [False] * query_length
             for hsp in hsps:
-                cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, hsplen(hsp))
+                cover[hsp['Hsp_query-from']-1 : int(hsp['Hsp_query-to'])] = repeat(True, blastxml_len(hsp))
             cover_count = cover.count(True)
 
             def hsp_val(path):
@@ -245,10 +257,10 @@
                        link_id = hit.Hit_num,
                        maxscore = "{:.1f}".format(max(hsp_val('Hsp_bit-score'))),
                        totalscore = "{:.1f}".format(sum(hsp_val('Hsp_bit-score'))),
-                       cover = "{:.0%}".format(cover_count / self.query_length),
+                       cover = "{:.0%}".format(cover_count / query_length),
                        e_value = "{:.4g}".format(min(hsp_val('Hsp_evalue'))),
                        # FIXME: is this the correct formula vv?
-                       ident = "{:.0%}".format(float(min(hsp.Hsp_identity / hsplen(hsp) for hsp in hsps))),
+                       ident = "{:.0%}".format(float(min(hsp.Hsp_identity / blastxml_len(hsp) for hsp in hsps))),
                        accession = hit.Hit_accession)
 
 def main():