Mercurial > repos > si-datascience > interps_test
view interproscan5/create_html_index.py @ 0:0da2847fc108 draft default tip
Uploaded
author | si-datascience |
---|---|
date | Thu, 24 May 2018 14:57:30 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python import os import re import sys START = '''<html> <head> <style> a:link { text-decoration: none; color: red; } a:visited { text-decoration: none; color: blue; } a:hover { text-decoration: underline; color: green; } a:active { text-decoration: underline; color: green; } </style> </head> <body> <h1>InterProScan result summary page</h1> ''' END = ''' </body> </html> ''' def raw_mode(html_file, directory): with open(html_file, 'w') as h: h.write(START) h.write('<ul>') for filename in [f for f in sorted(os.listdir(directory)) if os.path.isfile(os.path.join(directory, f))]: h.write('<li><a href="%s"> %s </a></li>' % (filename, os.path.splitext(filename)[0])) h.write('</ul>') h.write(END) def fix_name(name): return re.sub('[&/]', '_', name) def cooked_mode(orfed_ids, tsv_file, html_file, directory): name_freq = {} with open(tsv_file) as f: for line in f: name = line.split("\t", 1)[0] if orfed_ids: deorfed_name = re.sub('_\\d+$', '', name) else: deorfed_name = name data = name_freq.get(deorfed_name, []) if data: data[0] += 1 data[1][name] = data[1].get(name, 0) + 1 else: data = [1, {name: 1}] name_freq[deorfed_name] = data name_freq_sorted = [(x[0], (x[1][0], sorted(x[1][1].items(), key=lambda t: t[1], reverse=True))) for x in sorted(name_freq.items(), key=lambda t: t[1][0], reverse=True) ] filename_dict = {} for filename in [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]: filename_dict[os.path.splitext(filename)[0]] = filename with open(html_file, 'w') as h: h.write(START) h.write('<ol>') for don, (freq, members) in name_freq_sorted: h.write('\n<li>') if len(members) == 1: fn = fix_name(members[0][0]) if fn in filename_dict: h.write('<a href="%s"> %s (%d features)</a>' % (filename_dict[fn], members[0][0], members[0][1])) del filename_dict[fn] else: h.write('<a href="%s"> %s (%d features, broken link!)</a>' % (members[0][0], members[0][0], members[0][1])) else: h.write('%s (%d features)' % (don, freq)) h.write('\n<ul>') for n, f in members: fn = fix_name(n) if fn in filename_dict: h.write('\n<li><a href="%s"> %s (%d features)</a></li>' % (filename_dict[fn], n, f)) del filename_dict[fn] else: h.write('\n<li><a href="%s"> %s (%d features, broken link!)</a></li>' % (n, n, f)) h.write('</ul>') h.write('</li>') if len(filename_dict) > 0: h.write('<h2>Sequences without any features</h2>') h.write('\n<ul>') for n, p in sorted(filename_dict.items(), key=lambda t: t[0]): h.write('\n<li><a href="%s">%s</li>' % (p, n)) h.write('</ul>') h.write('</ol>') h.write(END) def main(): if len(sys.argv) == 3: raw_mode(sys.argv[1], sys.argv[2]) elif len(sys.argv) == 5: cooked_mode(sys.argv[1] != '0', sys.argv[2], sys.argv[3], sys.argv[4]) else: print('Args must be "html_file directory" or "[0|1] tsv_file html_file directory"') sys.exit(1) main()