Mercurial > repos > mkh > ips5
comparison interproscan5/create_html_index.py @ 3:74cf3f49f10c draft
Better HTML index.
| author | mkh | 
|---|---|
| date | Tue, 02 Feb 2016 12:27:19 -0500 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 2:d1d34608dba0 | 3:74cf3f49f10c | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import os | |
| 4 import re | |
| 5 import sys | |
| 6 | |
| 7 START = '''<html> | |
| 8 <head> | |
| 9 <style> | |
| 10 a:link { text-decoration: none; color: red; } | |
| 11 a:visited { text-decoration: none; color: blue; } | |
| 12 a:hover { text-decoration: underline; color: green; } a:active { text-decoration: underline; color: green; } | |
| 13 </style> | |
| 14 </head> | |
| 15 <body> | |
| 16 <h1>InterProScan result summary page</h1> | |
| 17 ''' | |
| 18 | |
| 19 END = ''' | |
| 20 </body> | |
| 21 </html> | |
| 22 ''' | |
| 23 | |
| 24 | |
| 25 def raw_mode(html_file, directory): | |
| 26 with open(html_file, 'w') as h: | |
| 27 h.write(START) | |
| 28 h.write('<ul>') | |
| 29 for filename in [f for f in sorted(os.listdir(directory)) if os.path.isfile(os.path.join(directory, f))]: | |
| 30 h.write('<li><a href="%s"> %s </a></li>' % (filename, os.path.splitext(filename)[0])) | |
| 31 h.write('</ul>') | |
| 32 h.write(END) | |
| 33 | |
| 34 | |
| 35 def fix_name(name): | |
| 36 return re.sub('[&/]', '_', name) | |
| 37 | |
| 38 | |
| 39 def cooked_mode(orfed_ids, tsv_file, html_file, directory): | |
| 40 name_freq = {} | |
| 41 with open(tsv_file) as f: | |
| 42 for line in f: | |
| 43 name = line.split("\t", 1)[0] | |
| 44 if orfed_ids: | |
| 45 deorfed_name = re.sub('_\\d+$', '', name) | |
| 46 else: | |
| 47 deorfed_name = name | |
| 48 | |
| 49 data = name_freq.get(deorfed_name, []) | |
| 50 if data: | |
| 51 data[0] += 1 | |
| 52 data[1][name] = data[1].get(name, 0) + 1 | |
| 53 else: | |
| 54 data = [1, {name: 1}] | |
| 55 name_freq[deorfed_name] = data | |
| 56 | |
| 57 name_freq_sorted = [(x[0], (x[1][0], sorted(x[1][1].items(), key=lambda t: t[1], reverse=True))) | |
| 58 for x in sorted(name_freq.items(), key=lambda t: t[1][0], reverse=True) | |
| 59 ] | |
| 60 | |
| 61 filename_dict = {} | |
| 62 for filename in [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]: | |
| 63 filename_dict[os.path.splitext(filename)[0]] = filename | |
| 64 | |
| 65 with open(html_file, 'w') as h: | |
| 66 h.write(START) | |
| 67 h.write('<ol>') | |
| 68 for don, (freq, members) in name_freq_sorted: | |
| 69 h.write('\n<li>') | |
| 70 if len(members) == 1: | |
| 71 fn = fix_name(members[0][0]) | |
| 72 if fn in filename_dict: | |
| 73 h.write('<a href="%s"> %s (%d features)</a>' % | |
| 74 (filename_dict[fn], members[0][0], members[0][1])) | |
| 75 del filename_dict[fn] | |
| 76 else: | |
| 77 h.write('<a href="%s"> %s (%d features, broken link!)</a>' % | |
| 78 (members[0][0], members[0][0], members[0][1])) | |
| 79 else: | |
| 80 h.write('%s (%d features)' % (don, freq)) | |
| 81 h.write('\n<ul>') | |
| 82 for n, f in members: | |
| 83 fn = fix_name(n) | |
| 84 if fn in filename_dict: | |
| 85 h.write('\n<li><a href="%s"> %s (%d features)</a></li>' % (filename_dict[fn], n, f)) | |
| 86 del filename_dict[fn] | |
| 87 else: | |
| 88 h.write('\n<li><a href="%s"> %s (%d features, broken link!)</a></li>' % (n, n, f)) | |
| 89 h.write('</ul>') | |
| 90 h.write('</li>') | |
| 91 | |
| 92 if len(filename_dict) > 0: | |
| 93 h.write('<h2>Sequences without any features</h2>') | |
| 94 h.write('\n<ul>') | |
| 95 for n, p in sorted(filename_dict.items(), key=lambda t: t[0]): | |
| 96 h.write('\n<li><a href="%s">%s</li>' % (p, n)) | |
| 97 h.write('</ul>') | |
| 98 h.write('</ol>') | |
| 99 h.write(END) | |
| 100 | |
| 101 | |
| 102 def main(): | |
| 103 if len(sys.argv) == 3: | |
| 104 raw_mode(sys.argv[1], sys.argv[2]) | |
| 105 elif len(sys.argv) == 5: | |
| 106 cooked_mode(sys.argv[1] != '0', sys.argv[2], sys.argv[3], sys.argv[4]) | |
| 107 else: | |
| 108 print('Args must be "html_file directory" or "[0|1] tsv_file html_file directory"') | |
| 109 sys.exit(1) | |
| 110 | |
| 111 | |
| 112 main() | 
