comparison blast2html.py @ 116:f5066973029a

refactor
author Jan Kanis <jan.code@jankanis.nl>
date Mon, 14 Jul 2014 15:47:48 +0200
parents 0c2a03f9740b
children 7f3f8c10f44b
comparison
equal deleted inserted replaced
115:0c2a03f9740b 116:f5066973029a
236 236
237 colors = ('black', 'blue', 'green', 'magenta', 'red') 237 colors = ('black', 'blue', 'green', 'magenta', 'red')
238 238
239 max_scale_labels = 10 239 max_scale_labels = 10
240 240
241 def __init__(self, input, templatedir, templatename, dbname, genelinks={}): 241 def __init__(self, input, templatedir, templatename, genelinks):
242 self.input = input 242 self.input = input
243 self.templatename = templatename 243 self.templatename = templatename
244 self.dbname = dbname
245 self.genelinks = genelinks 244 self.genelinks = genelinks
246 245
247 self.blast = objectify.parse(self.input).getroot() 246 self.blast = objectify.parse(self.input).getroot()
248 self.loader = jinja2.FileSystemLoader(searchpath=templatedir) 247 self.loader = jinja2.FileSystemLoader(searchpath=templatedir)
249 self.environment = jinja2.Environment(loader=self.loader, 248 self.environment = jinja2.Environment(loader=self.loader,
357 display_nolink: boolean, if false don't display anything if no link can be created. Default True. 356 display_nolink: boolean, if false don't display anything if no link can be created. Default True.
358 """ 357 """
359 358
360 db = hit.getroottree().getroot().BlastOutput_db 359 db = hit.getroottree().getroot().BlastOutput_db
361 360
362 if isinstance(self.genelinks, six.string_types): 361 template = self.genelinks[db].template
363 template = self.genelinks
364 else:
365 template = self.genelinks[db].template
366 362
367 if text is None: 363 if text is None:
368 if text_from == 'hitid': 364 if text_from == 'hitid':
369 text = hitid(hit) 365 text = hitid(hit)
370 elif text_from == 'dbname': 366 elif text_from == 'dbname':
371 text = self.dbname or self.genelinks[db].dbname or 'Gene Bank' 367 text = self.genelinks[db].dbname
372 else: 368 else:
373 raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from)) 369 raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from))
374 370
375 if template is None: 371 if template is None:
376 return text if display_nolink else '' 372 return text if display_nolink else ''
389 classattr = 'class="{0}" '.format(jinja2.escape(cssclass)) if cssclass is not None else '' 385 classattr = 'class="{0}" '.format(jinja2.escape(cssclass)) if cssclass is not None else ''
390 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text))) 386 return jinja2.Markup("<a {0}href=\"{1}\">{2}</a>".format(classattr, jinja2.escape(link), jinja2.escape(text)))
391 387
392 388
393 genelinks_entry = namedtuple('genelinks_entry', 'dbname template') 389 genelinks_entry = namedtuple('genelinks_entry', 'dbname template')
394 def read_genelinks(dir): 390 def read_blastdb(dir, default):
395 links = defaultdict(lambda: genelinks_entry(None, None)) 391 links = defaultdict(lambda: default)
396 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. 392 # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc.
397 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) 393 files = sorted(glob.glob(path.join(dir, 'blastdb*.loc')))
398 # reversed, so blastdb.loc will take precedence 394 # reversed, so blastdb.loc will take precedence
399 for f in reversed(files): 395 for f in reversed(files):
400 try: 396 try:
402 for l in f.readlines(): 398 for l in f.readlines():
403 if l.strip().startswith('#'): 399 if l.strip().startswith('#'):
404 continue 400 continue
405 line = l.rstrip('\n').split('\t') 401 line = l.rstrip('\n').split('\t')
406 try: 402 try:
407 links[line[2]] = genelinks_entry(dbname=line[3], template=line[4]) 403 links[line[2]] = genelinks_entry(dbname=line[3] or default.dbname, template=line[4])
408 except IndexError: 404 except IndexError:
409 continue 405 continue
410 f.close() 406 f.close()
411 except OSError: 407 except OSError:
412 continue 408 continue
420 416
421 def main(): 417 def main():
422 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja') 418 default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja')
423 419
424 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page", 420 parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page",
425 usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE]".format(sys.argv[0])) 421 usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE] [--dbname DBNAME]".format(sys.argv[0]))
426 input_group = parser.add_mutually_exclusive_group(required=True) 422 input_group = parser.add_mutually_exclusive_group(required=True)
427 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'), 423 input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'),
428 help='The input Blast XML file, same as -i/--input') 424 help='The input Blast XML file, same as -i/--input')
429 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'), 425 input_group.add_argument('-i', '--input', type=argparse.FileType(mode='r'),
430 help='The input Blast XML file') 426 help='The input Blast XML file')
436 # jinja later tries to re-open the template file, but we don't 432 # jinja later tries to re-open the template file, but we don't
437 # care too much. 433 # care too much.
438 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, 434 parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template,
439 help='The template file to use. Defaults to blast_html.html.jinja') 435 help='The template file to use. Defaults to blast_html.html.jinja')
440 436
441 parser.add_argument('--dbname', type=str, default=None, 437 parser.add_argument('--dbname', type=str, default='Gene Bank',
442 help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'") 438 help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'")
443 dblink_group = parser.add_mutually_exclusive_group() 439 parser.add_argument('--genelink-template', metavar='URL_TEMPLATE',
444 dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE', 440 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
445 default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', 441 help="""A link template to link hits to a gene bank webpage. The template string is a
446 help="""A link template to link hits to a gene bank webpage. The template string is a 442 Python format string. It can contain the following replacement elements: {id[N]}, {fullid},
447 Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, 443 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be
448 {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be 444 replaced by the Nth element of the id or defline, where '|' is the field separator.
449 replaced by the Nth element of the id or defline, where '|' is the field separator. 445
450 446 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign',
451 The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', 447 which is a link to the NCBI nucleotide database.""")
452 which is a link to the NCBI nucleotide database.""") 448
453 449 parser.add_argument('--db-config-dir',
454 dblink_group.add_argument('--db-config-dir', 450 help="""The directory where databases are configured in blastdb*.loc files. These files
455 help="""The directory where databases are configured in blastdb*.loc files. These files 451 are consulted for creating a gene bank link. The files should conform to the format that
456 are consulted for creating a gene bank link. The files should be tab-separated tables (with lines 452 Galaxy's BLAST expect, i.e. tab-separated tables (with lines starting with '#' ignored),
457 starting with '#' ignored), where the third field of a line should be a database path and the fourth 453 with two extra fields. The third field of a line should be a database path and the fourth
458 a genebank link template conforming to the --genelink-template option syntax. 454 a genebank link template conforming to the --genelink-template option syntax. Entries in
459 455 these config files override links specified using --genelink-template and --dbname.""")
460 This option is incompatible with --genelink-template.""")
461 456
462 args = parser.parse_args() 457 args = parser.parse_args()
463 if args.input == None: 458 if args.input == None:
464 args.input = args.positional_arg 459 args.input = args.positional_arg
465 if args.input == None: 460 if args.input == None:
489 templatedir, templatename = path.split(args.template.name) 484 templatedir, templatename = path.split(args.template.name)
490 args.template.close() 485 args.template.close()
491 if not templatedir: 486 if not templatedir:
492 templatedir = '.' 487 templatedir = '.'
493 488
489 defaultentry = genelinks_entry(args.dbname, args.genelink_template)
494 if args.db_config_dir is None: 490 if args.db_config_dir is None:
495 genelinks = defaultdict(lambda: genelinks_entry(template=args.genelink_template, dbname=None)) 491 genelinks = defaultdict(lambda: defaultentry)
496 elif not path.isdir(args.db_config_dir): 492 elif not path.isdir(args.db_config_dir):
497 parser.error('db-config-dir does not exist or is not a directory') 493 parser.error('db-config-dir does not exist or is not a directory')
498 else: 494 else:
499 genelinks = read_genelinks(args.db_config_dir) 495 genelinks = read_blastdb(args.db_config_dir, default=defaultentry)
500 496
501 b = BlastVisualize(args.input, templatedir, templatename, dbname=args.dbname, genelinks=genelinks) 497 b = BlastVisualize(args.input, templatedir, templatename, genelinks=genelinks)
502 b.render(args.output) 498 b.render(args.output)
503 args.output.close() 499 args.output.close()
504 500
505 501
506 if __name__ == '__main__': 502 if __name__ == '__main__':