comparison interproscan5/create_html_index.py @ 3:74cf3f49f10c draft

Better HTML index.
author mkh
date Tue, 02 Feb 2016 12:27:19 -0500
parents
children
comparison
equal deleted inserted replaced
2:d1d34608dba0 3:74cf3f49f10c
1 #!/usr/bin/env python
2
3 import os
4 import re
5 import sys
6
7 START = '''<html>
8 <head>
9 <style>
10 a:link { text-decoration: none; color: red; }
11 a:visited { text-decoration: none; color: blue; }
12 a:hover { text-decoration: underline; color: green; } a:active { text-decoration: underline; color: green; }
13 </style>
14 </head>
15 <body>
16 <h1>InterProScan result summary page</h1>
17 '''
18
19 END = '''
20 </body>
21 </html>
22 '''
23
24
25 def raw_mode(html_file, directory):
26 with open(html_file, 'w') as h:
27 h.write(START)
28 h.write('<ul>')
29 for filename in [f for f in sorted(os.listdir(directory)) if os.path.isfile(os.path.join(directory, f))]:
30 h.write('<li><a href="%s"> %s </a></li>' % (filename, os.path.splitext(filename)[0]))
31 h.write('</ul>')
32 h.write(END)
33
34
35 def fix_name(name):
36 return re.sub('[&/]', '_', name)
37
38
39 def cooked_mode(orfed_ids, tsv_file, html_file, directory):
40 name_freq = {}
41 with open(tsv_file) as f:
42 for line in f:
43 name = line.split("\t", 1)[0]
44 if orfed_ids:
45 deorfed_name = re.sub('_\\d+$', '', name)
46 else:
47 deorfed_name = name
48
49 data = name_freq.get(deorfed_name, [])
50 if data:
51 data[0] += 1
52 data[1][name] = data[1].get(name, 0) + 1
53 else:
54 data = [1, {name: 1}]
55 name_freq[deorfed_name] = data
56
57 name_freq_sorted = [(x[0], (x[1][0], sorted(x[1][1].items(), key=lambda t: t[1], reverse=True)))
58 for x in sorted(name_freq.items(), key=lambda t: t[1][0], reverse=True)
59 ]
60
61 filename_dict = {}
62 for filename in [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]:
63 filename_dict[os.path.splitext(filename)[0]] = filename
64
65 with open(html_file, 'w') as h:
66 h.write(START)
67 h.write('<ol>')
68 for don, (freq, members) in name_freq_sorted:
69 h.write('\n<li>')
70 if len(members) == 1:
71 fn = fix_name(members[0][0])
72 if fn in filename_dict:
73 h.write('<a href="%s"> %s (%d features)</a>' %
74 (filename_dict[fn], members[0][0], members[0][1]))
75 del filename_dict[fn]
76 else:
77 h.write('<a href="%s"> %s (%d features, broken link!)</a>' %
78 (members[0][0], members[0][0], members[0][1]))
79 else:
80 h.write('%s (%d features)' % (don, freq))
81 h.write('\n<ul>')
82 for n, f in members:
83 fn = fix_name(n)
84 if fn in filename_dict:
85 h.write('\n<li><a href="%s"> %s (%d features)</a></li>' % (filename_dict[fn], n, f))
86 del filename_dict[fn]
87 else:
88 h.write('\n<li><a href="%s"> %s (%d features, broken link!)</a></li>' % (n, n, f))
89 h.write('</ul>')
90 h.write('</li>')
91
92 if len(filename_dict) > 0:
93 h.write('<h2>Sequences without any features</h2>')
94 h.write('\n<ul>')
95 for n, p in sorted(filename_dict.items(), key=lambda t: t[0]):
96 h.write('\n<li><a href="%s">%s</li>' % (p, n))
97 h.write('</ul>')
98 h.write('</ol>')
99 h.write(END)
100
101
102 def main():
103 if len(sys.argv) == 3:
104 raw_mode(sys.argv[1], sys.argv[2])
105 elif len(sys.argv) == 5:
106 cooked_mode(sys.argv[1] != '0', sys.argv[2], sys.argv[3], sys.argv[4])
107 else:
108 print('Args must be "html_file directory" or "[0|1] tsv_file html_file directory"')
109 sys.exit(1)
110
111
112 main()