blast2html: blast2html.py comparison

comparison blast2html.py @ 95:4378d11f0ed7

implement configurable gene bank links

author	Jan Kanis <jan.code@jankanis.nl>
date	Mon, 30 Jun 2014 16:49:45 +0200
parents	9fb1a7d67317
children	02b795b784e1

comparison

equal deleted inserted replaced

-:df9fd5f35967
+:4378d11f0ed7
 import warnings
 import six, codecs
 from six.moves import builtins
 from os import path
 from itertools import repeat
+from collections import defaultdict
 import argparse
 from lxml import objectify
 import jinja2
+builtin_str = str
+str = six.text_type
 _filters = dict(float='float')
 def filter(func_or_name):
 """Split a hit.Hit_def that contains multiple titles up, splitting out the hit ids from the titles."""
 id_titles = hit.Hit_def.text.split('>')
 titles = []
 for t in id_titles[1:]:
-fullid, title = t.split(' ', 1)
+id, title = t.split(' ', 1)
-hitid, id = fullid.split('|', 2)[1:3]
+titles.append(argparse.Namespace(Hit_id = id,
-titles.append(dict(id = id,
+Hit_def = title,
-hitid = hitid,
+Hit_accession = '',
-fullid = fullid,
+getroottree = hit.getroottree))
-title = title))
 return titles
 @filter
 def hitid(hit):
-hitid = hit.Hit_id.text
+return str(hit.Hit_id)
-s = hitid.split('|', 2)
-if len(s) >= 2:
-return s[1]
-return hitid
-@filter
-def seqid(hit):
-hitid = hit.Hit_id.text
-s = hitid.split('|', 2)
-if len(s) >= 3:
-return s[2]
-return hitid
 @filter
 def alignment_pre(hsp):
 """Create the preformatted alignment blocks"""
 return 'Plus'
 elif frame == -1:
 return 'Minus'
 raise Exception("frame should be either +1 or -1")
-def genelink(hit, type='genbank', hsp=None):
+# def genelink(hit, type='genbank', hsp=None):
-if not isinstance(hit, six.string_types):
+#     if not isinstance(hit, six.string_types):
-hit = hitid(hit)
+#         hit = hitid(hit)
-link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type)
+#     link = "http://www.ncbi.nlm.nih.gov/nucleotide/{0}?report={1}&log$=nuclalign".format(hit, type)
-if hsp != None:
+#     if hsp != None:
-link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to'])
+#         link += "&from={0}&to={1}".format(hsp['Hsp_hit-from'], hsp['Hsp_hit-to'])
-return link
+#     return link
 # javascript escape filter based on Django's, from https://github.com/dsissitka/khan-website/blob/master/templatefilters.py#L112-139
 # I've removed the html escapes, since html escaping is already being performed by the template engine.
 Javascript string literal escape. Note that this only escapes data
 for embedding within javascript string literals, not in general
 javascript snippets.
 """
-value = six.text_type(value)
+value = str(value)
 for bad, good in _js_escapes:
 value = value.replace(bad, good)
 return value
 colors = ('black', 'blue', 'green', 'magenta', 'red')
 max_scale_labels = 10
-def __init__(self, input, templatedir, templatename):
+def __init__(self, input, templatedir, templatename, genelinks={}):
 self.input = input
 self.templatename = templatename
+self.genelinks = genelinks
 self.blast = objectify.parse(self.input).getroot()
 self.loader = jinja2.FileSystemLoader(searchpath=templatedir)
 self.environment = jinja2.Environment(loader=self.loader,
 lstrip_blocks=True, trim_blocks=True, autoescape=True)
 )
 result = template.render(blast=self.blast,
 iterations=self.blast.BlastOutput_iterations.Iteration,
 colors=self.colors,
-genelink=genelink,
 params=params)
 if six.PY2:
 result = result.encode('utf-8')
 output.write(result)
 totalscore = "{0:.1f}".format(sum(hsp_val('Hsp_bit-score'))),
 cover = "{0:.0%}".format(cover_count / query_length),
 e_value = "{0:.4g}".format(min(hsp_val('Hsp_evalue'))),
 # FIXME: is this the correct formula vv?
 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6
-ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))),
+ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))))
-accession = hit.Hit_accession)
+@filter
+def genelink(self, hit, text=None, clas=None, display_nolink=True):
+if text is None:
+text = hitid(hit)
+db = hit.getroottree().getroot().BlastOutput_db
+if isinstance(self.genelinks, six.string_types):
+template = self.genelinks
+else:
+template = self.genelinks.get(db)
+if template is None:
+return text if display_nolink else ''
+args = dict(id=hitid(hit).split('|'),
+fullid=hitid(hit),
+defline=str(hit.Hit_def).split('|'),
+fulldefline=str(hit.Hit_def),
+accession=str(hit.Hit_accession))
+try:
+link = template.format(**args)
+except Exception as e:
+warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e))
+return text if display_nolink else ''
+classattr = 'class="{}" '.format(jinja2.escape(clas)) if clas is not None else ''
+return jinja2.Markup("<a {}href=\"{}\">{}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text)))
+def read_genelinks(dir):
+links = {}
+for f in ('blastdb.loc', 'blastdb_p.loc', 'blastdb_d.loc'):
+try:
+f = open(path.join(dir, f))
+for l in f.readlines():
+if l.strip().startswith('#'):
+continue
+line = l.split('\t')
+try:
+links[line[2]] = line[3]
+except IndexError:
+continue
+f.close()
+except OSError:
+continue
+if not links:
+warnings.warn("No gene bank link templates found")
+return links
 def main():
 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')
 # handle the errors. This introduces a small race condition when
 # jinja later tries to re-open the template file, but we don't
 # care too much.
 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
 help='The template file to use. Defaults to blast_html.html.jinja')
+dblink_group = parser.add_mutually_exclusive_group()
+dblink_group.add_argument('--genelink-template', default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
+help="""A link template to link hits to a gene bank webpage. The template string is a
+Python format string. It can contain the following replacement elements: {id[N]}, {fullid},
+{defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be
+replaced by the Nth element of the id or defline, where '|' is the field separator.
+The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
+which is a link to the NCBI nucleotide database.""")
+dblink_group.add_argument('--db-config-dir',
+help="""The directory where databases are configured in blastdb*.loc files. These files
+are consulted for creating a gene bank link. The files should be tab-separated tables (with lines
+starting with '#' ignored), where the third field of a line should be a database path and the fourth
+a genebank link template conforming to the --genelink-template option syntax.
+This option is incompatible with --genelink-template.""")
 args = parser.parse_args()
 if args.input == None:
 args.input = args.positional_arg
 if args.input == None:
 parser.error('no input specified')
 templatedir, templatename = path.split(args.template.name)
 args.template.close()
 if not templatedir:
 templatedir = '.'
-b = BlastVisualize(args.input, templatedir, templatename)
+if args.db_config_dir is None:
+genelinks = args.genelink_template
+elif not path.isdir(args.db_config_dir):
+parser.error('db-config-dir does not exist or is not a directory')
+else:
+genelinks = read_genelinks(args.db_config_dir)
+b = BlastVisualize(args.input, templatedir, templatename, genelinks)
 b.render(args.output)
 if __name__ == '__main__':
 main()

Mercurial > repos > jankanis > blast2html

comparison blast2html.py @ 95:4378d11f0ed7