comparison blast2html.py @ 115:0c2a03f9740b

make external gene bank name configurable
author Jan Kanis <jan.code@jankanis.nl>
date Mon, 14 Jul 2014 15:01:32 +0200
parents e17aae23cc1c
children f5066973029a
comparison
equal deleted inserted replaced
114:4f0ed3b5ae46 115:0c2a03f9740b
13 import warnings 13 import warnings
14 import six, codecs, io 14 import six, codecs, io
15 from six.moves import builtins 15 from six.moves import builtins
16 from os import path 16 from os import path
17 from itertools import repeat 17 from itertools import repeat
18 from collections import defaultdict 18 from collections import defaultdict, namedtuple
19 import glob 19 import glob
20 import argparse 20 import argparse
21 from lxml import objectify 21 from lxml import objectify
22 import jinja2 22 import jinja2
23 23
236 236
237 colors = ('black', 'blue', 'green', 'magenta', 'red') 237 colors = ('black', 'blue', 'green', 'magenta', 'red')
238 238
239 max_scale_labels = 10 239 max_scale_labels = 10
240 240
241 def __init__(self, input, templatedir, templatename, genelinks={}): 241 def __init__(self, input, templatedir, templatename, dbname, genelinks={}):
242 self.input = input 242 self.input = input
243 self.templatename = templatename 243 self.templatename = templatename
244 self.dbname = dbname
244 self.genelinks = genelinks 245 self.genelinks = genelinks
245 246
246 self.blast = objectify.parse(self.input).getroot() 247 self.blast = objectify.parse(self.input).getroot()
247 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) 248 self.loader = jinja2.FileSystemLoader(searchpath=templatedir)
248 self.environment = jinja2.Environment(loader=self.loader, 249 self.environment = jinja2.Environment(loader=self.loader,
346 # FIXME: is this the correct formula vv? 347 # FIXME: is this the correct formula vv?
347 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6 348 # float(...) because non-flooring division doesn't work with lxml elements in python 2.6
348 ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps)))) 349 ident = "{0:.0%}".format(float(min(float(hsp.Hsp_identity) / blastxml_len(hsp) for hsp in hsps))))
349 350
350 @filter 351 @filter
351 def genelink(self, hit, text=None, clas=None, display_nolink=True): 352 def genelink(self, hit, text=None, text_from='hitid', cssclass=None, display_nolink=True):
352 """Create a html link from a hit node to a configured gene bank webpage. 353 """Create a html link from a hit node to a configured gene bank webpage.
353 text: The text of the link, defaults to the hit_id 354 text: The text of the link. If not set applies text_from.
354 clas: extra css classes that will be added to the <a> element 355 text_from: string, if text is not specified, take it from specified source. Either 'hitid' (default) or 'dbname'.
356 cssclass: extra css classes that will be added to the <a> element
355 display_nolink: boolean, if false don't display anything if no link can be created. Default True. 357 display_nolink: boolean, if false don't display anything if no link can be created. Default True.
356 """ 358 """
357 359
358 if text is None:
359 text = hitid(hit)
360
361 db = hit.getroottree().getroot().BlastOutput_db 360 db = hit.getroottree().getroot().BlastOutput_db
362 361
363 if isinstance(self.genelinks, six.string_types): 362 if isinstance(self.genelinks, six.string_types):
364 template = self.genelinks 363 template = self.genelinks
365 else: 364 else:
366 template = self.genelinks.get(db) 365 template = self.genelinks[db].template
366
367 if text is None:
368 if text_from == 'hitid':
369 text = hitid(hit)
370 elif text_from == 'dbname':
371 text = self.dbname or self.genelinks[db].dbname or 'Gene Bank'
372 else:
373 raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from))
374
367 if template is None: 375 if template is None:
368 return text if display_nolink else '' 376 return text if display_nolink else ''
377
369 args = dict(id=hitid(hit).split('|'), 378 args = dict(id=hitid(hit).split('|'),
370 fullid=hitid(hit), 379 fullid=hitid(hit),
371 defline=str(hit.Hit_def).split(' ', 1)[0].split('|'), 380 defline=str(hit.Hit_def).split(' ', 1)[0].split('|'),
372 fulldefline=str(hit.Hit_def).split(' ', 1)[0], 381 fulldefline=str(hit.Hit_def).split(' ', 1)[0],
373 accession=str(hit.Hit_accession)) 382 accession=str(hit.Hit_accession))
375 link = template.format(**args) 384 link = template.format(**args)
376 except Exception as e: 385 except Exception as e:
377 warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e)) 386 warnings.warn('Error in formatting gene bank link {} with {}: {}'.format(template, args, e))
378 return text if display_nolink else '' 387 return text if display_nolink else ''
379 388
380 classattr = 'class="{0}" '.format(jinja2.escape(clas)) if clas is not None else '' 389 classattr = 'class="{0}" '.format(jinja2.escape(cssclass)) if cssclass is not None else ''
381 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) 390 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text)))
382 391
383 392
393 genelinks_entry = namedtuple('genelinks_entry', 'dbname template')
384 def read_genelinks(dir): 394 def read_genelinks(dir):
385 links = {} 395 links = defaultdict(lambda: genelinks_entry(None, None))
386 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. 396 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc.
387 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) 397 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc')))
388 # reversed, so blastdb.loc will take precedence 398 # reversed, so blastdb.loc will take precedence
389 for f in reversed(files): 399 for f in reversed(files):
390 try: 400 try:
392 for l in f.readlines(): 402 for l in f.readlines():
393 if l.strip().startswith('#'): 403 if l.strip().startswith('#'):
394 continue 404 continue
395 line = l.rstrip('\n').split('\t') 405 line = l.rstrip('\n').split('\t')
396 try: 406 try:
397 links[line[2]] = line[3] 407 links[line[2]] = genelinks_entry(dbname=line[3], template=line[4])
398 except IndexError: 408 except IndexError:
399 continue 409 continue
400 f.close() 410 f.close()
401 except OSError: 411 except OSError:
402 continue 412 continue
425 # handle the errors. This introduces a small race condition when 435 # handle the errors. This introduces a small race condition when
426 # jinja later tries to re-open the template file, but we don't 436 # jinja later tries to re-open the template file, but we don't
427 # care too much. 437 # care too much.
428 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, 438 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
429 help='The template file to use. Defaults to blast_html.html.jinja') 439 help='The template file to use. Defaults to blast_html.html.jinja')
430 440
441 parser.add_argument('--dbname', type=str, default=None,
442 help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'")
431 dblink_group = parser.add_mutually_exclusive_group() 443 dblink_group = parser.add_mutually_exclusive_group()
432 dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE', 444 dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE',
433 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', 445 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
434 help="""A link template to link hits to a gene bank webpage. The template string is a 446 help="""A link template to link hits to a gene bank webpage. The template string is a
435 Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, 447 Python format string. It can contain the following replacement elements: {id[N]}, {fullid},
436 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be 448 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be
437 replaced by the Nth element of the id or defline, where '|' is the field separator. 449 replaced by the Nth element of the id or defline, where '|' is the field separator.
438 450
439 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', 451 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
440 which is a link to the NCBI nucleotide database.""") 452 which is a link to the NCBI nucleotide database.""")
441 453
442 dblink_group.add_argument('--db-config-dir', 454 dblink_group.add_argument('--db-config-dir',
443 help="""The directory where databases are configured in blastdb*.loc files. These files 455 help="""The directory where databases are configured in blastdb*.loc files. These files
444 are consulted for creating a gene bank link. The files should be tab-separated tables (with lines 456 are consulted for creating a gene bank link. The files should be tab-separated tables (with lines
445 starting with '#' ignored), where the third field of a line should be a database path and the fourth 457 starting with '#' ignored), where the third field of a line should be a database path and the fourth
446 a genebank link template conforming to the --genelink-template option syntax. 458 a genebank link template conforming to the --genelink-template option syntax.
471 # self.write(i) 483 # self.write(i)
472 # args.output.writelines = fixed_writelines 484 # args.output.writelines = fixed_writelines
473 485
474 args.output.close() 486 args.output.close()
475 args.output = io.open(args.output.name, 'w', encoding='utf-8') 487 args.output = io.open(args.output.name, 'w', encoding='utf-8')
476 488
477 templatedir, templatename = path.split(args.template.name) 489 templatedir, templatename = path.split(args.template.name)
478 args.template.close() 490 args.template.close()
479 if not templatedir: 491 if not templatedir:
480 templatedir = '.' 492 templatedir = '.'
481 493
482 if args.db_config_dir is None: 494 if args.db_config_dir is None:
483 genelinks = args.genelink_template 495 genelinks = defaultdict(lambda: genelinks_entry(template=args.genelink_template, dbname=None))
484 elif not path.isdir(args.db_config_dir): 496 elif not path.isdir(args.db_config_dir):
485 parser.error('db-config-dir does not exist or is not a directory') 497 parser.error('db-config-dir does not exist or is not a directory')
486 else: 498 else:
487 genelinks = read_genelinks(args.db_config_dir) 499 genelinks = read_genelinks(args.db_config_dir)
488 500
489 b = BlastVisualize(args.input, templatedir, templatename, genelinks) 501 b = BlastVisualize(args.input, templatedir, templatename, dbname=args.dbname, genelinks=genelinks)
490 b.render(args.output) 502 b.render(args.output)
491 args.output.close() 503 args.output.close()
492 504
493 505
494 if __name__ == '__main__': 506 if __name__ == '__main__':