annotate interproscan5/create_html_index.py @ 3:74cf3f49f10c draft

Better HTML index.
author mkh
date Tue, 02 Feb 2016 12:27:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
1 #!/usr/bin/env python
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
2
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
3 import os
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
4 import re
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
5 import sys
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
6
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
7 START = '''<html>
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
8 <head>
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
9 <style>
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
10 a:link { text-decoration: none; color: red; }
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
11 a:visited { text-decoration: none; color: blue; }
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
12 a:hover { text-decoration: underline; color: green; } a:active { text-decoration: underline; color: green; }
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
13 </style>
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
14 </head>
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
15 <body>
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
16 <h1>InterProScan result summary page</h1>
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
17 '''
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
18
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
19 END = '''
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
20 </body>
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
21 </html>
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
22 '''
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
23
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
24
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
25 def raw_mode(html_file, directory):
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
26 with open(html_file, 'w') as h:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
27 h.write(START)
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
28 h.write('<ul>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
29 for filename in [f for f in sorted(os.listdir(directory)) if os.path.isfile(os.path.join(directory, f))]:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
30 h.write('<li><a href="%s"> %s </a></li>' % (filename, os.path.splitext(filename)[0]))
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
31 h.write('</ul>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
32 h.write(END)
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
33
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
34
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
35 def fix_name(name):
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
36 return re.sub('[&/]', '_', name)
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
37
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
38
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
39 def cooked_mode(orfed_ids, tsv_file, html_file, directory):
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
40 name_freq = {}
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
41 with open(tsv_file) as f:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
42 for line in f:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
43 name = line.split("\t", 1)[0]
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
44 if orfed_ids:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
45 deorfed_name = re.sub('_\\d+$', '', name)
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
46 else:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
47 deorfed_name = name
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
48
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
49 data = name_freq.get(deorfed_name, [])
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
50 if data:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
51 data[0] += 1
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
52 data[1][name] = data[1].get(name, 0) + 1
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
53 else:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
54 data = [1, {name: 1}]
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
55 name_freq[deorfed_name] = data
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
56
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
57 name_freq_sorted = [(x[0], (x[1][0], sorted(x[1][1].items(), key=lambda t: t[1], reverse=True)))
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
58 for x in sorted(name_freq.items(), key=lambda t: t[1][0], reverse=True)
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
59 ]
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
60
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
61 filename_dict = {}
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
62 for filename in [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
63 filename_dict[os.path.splitext(filename)[0]] = filename
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
64
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
65 with open(html_file, 'w') as h:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
66 h.write(START)
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
67 h.write('<ol>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
68 for don, (freq, members) in name_freq_sorted:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
69 h.write('\n<li>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
70 if len(members) == 1:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
71 fn = fix_name(members[0][0])
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
72 if fn in filename_dict:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
73 h.write('<a href="%s"> %s (%d features)</a>' %
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
74 (filename_dict[fn], members[0][0], members[0][1]))
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
75 del filename_dict[fn]
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
76 else:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
77 h.write('<a href="%s"> %s (%d features, broken link!)</a>' %
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
78 (members[0][0], members[0][0], members[0][1]))
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
79 else:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
80 h.write('%s (%d features)' % (don, freq))
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
81 h.write('\n<ul>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
82 for n, f in members:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
83 fn = fix_name(n)
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
84 if fn in filename_dict:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
85 h.write('\n<li><a href="%s"> %s (%d features)</a></li>' % (filename_dict[fn], n, f))
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
86 del filename_dict[fn]
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
87 else:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
88 h.write('\n<li><a href="%s"> %s (%d features, broken link!)</a></li>' % (n, n, f))
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
89 h.write('</ul>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
90 h.write('</li>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
91
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
92 if len(filename_dict) > 0:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
93 h.write('<h2>Sequences without any features</h2>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
94 h.write('\n<ul>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
95 for n, p in sorted(filename_dict.items(), key=lambda t: t[0]):
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
96 h.write('\n<li><a href="%s">%s</li>' % (p, n))
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
97 h.write('</ul>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
98 h.write('</ol>')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
99 h.write(END)
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
100
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
101
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
102 def main():
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
103 if len(sys.argv) == 3:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
104 raw_mode(sys.argv[1], sys.argv[2])
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
105 elif len(sys.argv) == 5:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
106 cooked_mode(sys.argv[1] != '0', sys.argv[2], sys.argv[3], sys.argv[4])
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
107 else:
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
108 print('Args must be "html_file directory" or "[0|1] tsv_file html_file directory"')
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
109 sys.exit(1)
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
110
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
111
74cf3f49f10c Better HTML index.
mkh
parents:
diff changeset
112 main()