# HG changeset patch # User Jan Kanis # Date 1405345668 -7200 # Node ID f5066973029a72fc0a24f8e163b1c02662109c30 # Parent 0c2a03f9740bb074180b854071ab1ce92b160b42 refactor diff -r 0c2a03f9740b -r f5066973029a blast2html.py --- a/blast2html.py Mon Jul 14 15:01:32 2014 +0200 +++ b/blast2html.py Mon Jul 14 15:47:48 2014 +0200 @@ -238,10 +238,9 @@ max_scale_labels = 10 - def __init__(self, input, templatedir, templatename, dbname, genelinks={}): + def __init__(self, input, templatedir, templatename, genelinks): self.input = input self.templatename = templatename - self.dbname = dbname self.genelinks = genelinks self.blast = objectify.parse(self.input).getroot() @@ -359,16 +358,13 @@ db = hit.getroottree().getroot().BlastOutput_db - if isinstance(self.genelinks, six.string_types): - template = self.genelinks - else: - template = self.genelinks[db].template + template = self.genelinks[db].template if text is None: if text_from == 'hitid': text = hitid(hit) elif text_from == 'dbname': - text = self.dbname or self.genelinks[db].dbname or 'Gene Bank' + text = self.genelinks[db].dbname else: raise ValueError("Unknown value for text_from: '{0}'. Use 'hitid' or 'dbname'.".format(text_from)) @@ -391,8 +387,8 @@ genelinks_entry = namedtuple('genelinks_entry', 'dbname template') -def read_genelinks(dir): - links = defaultdict(lambda: genelinks_entry(None, None)) +def read_blastdb(dir, default): + links = defaultdict(lambda: default) # blastdb.loc, blastdb_p.loc, blastdb_d.loc, etc. files = sorted(glob.glob(path.join(dir, 'blastdb*.loc'))) # reversed, so blastdb.loc will take precedence @@ -404,7 +400,7 @@ continue line = l.rstrip('\n').split('\t') try: - links[line[2]] = genelinks_entry(dbname=line[3], template=line[4]) + links[line[2]] = genelinks_entry(dbname=line[3] or default.dbname, template=line[4]) except IndexError: continue f.close() @@ -422,7 +418,7 @@ default_template = path.join(path.dirname(__file__), 'blast2html.html.jinja') parser = argparse.ArgumentParser(description="Convert a BLAST XML result into a nicely readable html page", - usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE]".format(sys.argv[0])) + usage="{0} [-i] INPUT [-o OUTPUT] [--genelink-template URL_TEMPLATE] [--dbname DBNAME]".format(sys.argv[0])) input_group = parser.add_mutually_exclusive_group(required=True) input_group.add_argument('positional_arg', metavar='INPUT', nargs='?', type=argparse.FileType(mode='r'), help='The input Blast XML file, same as -i/--input') @@ -438,26 +434,25 @@ parser.add_argument('--template', type=argparse.FileType(mode='r'), default=default_template, help='The template file to use. Defaults to blast_html.html.jinja') - parser.add_argument('--dbname', type=str, default=None, + parser.add_argument('--dbname', type=str, default='Gene Bank', help="The link text to use for external links to a gene bank database. Defaults to 'Gene Bank'") - dblink_group = parser.add_mutually_exclusive_group() - dblink_group.add_argument('--genelink-template', metavar='URL_TEMPLATE', - default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', - help="""A link template to link hits to a gene bank webpage. The template string is a - Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, - {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be - replaced by the Nth element of the id or defline, where '|' is the field separator. + parser.add_argument('--genelink-template', metavar='URL_TEMPLATE', + default='http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', + help="""A link template to link hits to a gene bank webpage. The template string is a + Python format string. It can contain the following replacement elements: {id[N]}, {fullid}, + {defline[N]}, {fulldefline}, {accession}, where N is a number. id[N] and defline[N] will be + replaced by the Nth element of the id or defline, where '|' is the field separator. + + The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', + which is a link to the NCBI nucleotide database.""") - The default is 'http://www.ncbi.nlm.nih.gov/nucleotide/{accession}?report=genbank&log$=nuclalign', - which is a link to the NCBI nucleotide database.""") - - dblink_group.add_argument('--db-config-dir', - help="""The directory where databases are configured in blastdb*.loc files. These files - are consulted for creating a gene bank link. The files should be tab-separated tables (with lines - starting with '#' ignored), where the third field of a line should be a database path and the fourth - a genebank link template conforming to the --genelink-template option syntax. - - This option is incompatible with --genelink-template.""") + parser.add_argument('--db-config-dir', + help="""The directory where databases are configured in blastdb*.loc files. These files + are consulted for creating a gene bank link. The files should conform to the format that + Galaxy's BLAST expect, i.e. tab-separated tables (with lines starting with '#' ignored), + with two extra fields. The third field of a line should be a database path and the fourth + a genebank link template conforming to the --genelink-template option syntax. Entries in + these config files override links specified using --genelink-template and --dbname.""") args = parser.parse_args() if args.input == None: @@ -491,14 +486,15 @@ if not templatedir: templatedir = '.' + defaultentry = genelinks_entry(args.dbname, args.genelink_template) if args.db_config_dir is None: - genelinks = defaultdict(lambda: genelinks_entry(template=args.genelink_template, dbname=None)) + genelinks = defaultdict(lambda: defaultentry) elif not path.isdir(args.db_config_dir): parser.error('db-config-dir does not exist or is not a directory') else: - genelinks = read_genelinks(args.db_config_dir) + genelinks = read_blastdb(args.db_config_dir, default=defaultentry) - b = BlastVisualize(args.input, templatedir, templatename, dbname=args.dbname, genelinks=genelinks) + b = BlastVisualize(args.input, templatedir, templatename, genelinks=genelinks) b.render(args.output) args.output.close()