3
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os
|
|
4 import re
|
|
5 import sys
|
|
6
|
|
7 START = '''<html>
|
|
8 <head>
|
|
9 <style>
|
|
10 a:link { text-decoration: none; color: red; }
|
|
11 a:visited { text-decoration: none; color: blue; }
|
|
12 a:hover { text-decoration: underline; color: green; } a:active { text-decoration: underline; color: green; }
|
|
13 </style>
|
|
14 </head>
|
|
15 <body>
|
|
16 <h1>InterProScan result summary page</h1>
|
|
17 '''
|
|
18
|
|
19 END = '''
|
|
20 </body>
|
|
21 </html>
|
|
22 '''
|
|
23
|
|
24
|
|
25 def raw_mode(html_file, directory):
|
|
26 with open(html_file, 'w') as h:
|
|
27 h.write(START)
|
|
28 h.write('<ul>')
|
|
29 for filename in [f for f in sorted(os.listdir(directory)) if os.path.isfile(os.path.join(directory, f))]:
|
|
30 h.write('<li><a href="%s"> %s </a></li>' % (filename, os.path.splitext(filename)[0]))
|
|
31 h.write('</ul>')
|
|
32 h.write(END)
|
|
33
|
|
34
|
|
35 def fix_name(name):
|
|
36 return re.sub('[&/]', '_', name)
|
|
37
|
|
38
|
|
39 def cooked_mode(orfed_ids, tsv_file, html_file, directory):
|
|
40 name_freq = {}
|
|
41 with open(tsv_file) as f:
|
|
42 for line in f:
|
|
43 name = line.split("\t", 1)[0]
|
|
44 if orfed_ids:
|
|
45 deorfed_name = re.sub('_\\d+$', '', name)
|
|
46 else:
|
|
47 deorfed_name = name
|
|
48
|
|
49 data = name_freq.get(deorfed_name, [])
|
|
50 if data:
|
|
51 data[0] += 1
|
|
52 data[1][name] = data[1].get(name, 0) + 1
|
|
53 else:
|
|
54 data = [1, {name: 1}]
|
|
55 name_freq[deorfed_name] = data
|
|
56
|
|
57 name_freq_sorted = [(x[0], (x[1][0], sorted(x[1][1].items(), key=lambda t: t[1], reverse=True)))
|
|
58 for x in sorted(name_freq.items(), key=lambda t: t[1][0], reverse=True)
|
|
59 ]
|
|
60
|
|
61 filename_dict = {}
|
|
62 for filename in [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]:
|
|
63 filename_dict[os.path.splitext(filename)[0]] = filename
|
|
64
|
|
65 with open(html_file, 'w') as h:
|
|
66 h.write(START)
|
|
67 h.write('<ol>')
|
|
68 for don, (freq, members) in name_freq_sorted:
|
|
69 h.write('\n<li>')
|
|
70 if len(members) == 1:
|
|
71 fn = fix_name(members[0][0])
|
|
72 if fn in filename_dict:
|
|
73 h.write('<a href="%s"> %s (%d features)</a>' %
|
|
74 (filename_dict[fn], members[0][0], members[0][1]))
|
|
75 del filename_dict[fn]
|
|
76 else:
|
|
77 h.write('<a href="%s"> %s (%d features, broken link!)</a>' %
|
|
78 (members[0][0], members[0][0], members[0][1]))
|
|
79 else:
|
|
80 h.write('%s (%d features)' % (don, freq))
|
|
81 h.write('\n<ul>')
|
|
82 for n, f in members:
|
|
83 fn = fix_name(n)
|
|
84 if fn in filename_dict:
|
|
85 h.write('\n<li><a href="%s"> %s (%d features)</a></li>' % (filename_dict[fn], n, f))
|
|
86 del filename_dict[fn]
|
|
87 else:
|
|
88 h.write('\n<li><a href="%s"> %s (%d features, broken link!)</a></li>' % (n, n, f))
|
|
89 h.write('</ul>')
|
|
90 h.write('</li>')
|
|
91
|
|
92 if len(filename_dict) > 0:
|
|
93 h.write('<h2>Sequences without any features</h2>')
|
|
94 h.write('\n<ul>')
|
|
95 for n, p in sorted(filename_dict.items(), key=lambda t: t[0]):
|
|
96 h.write('\n<li><a href="%s">%s</li>' % (p, n))
|
|
97 h.write('</ul>')
|
|
98 h.write('</ol>')
|
|
99 h.write(END)
|
|
100
|
|
101
|
|
102 def main():
|
|
103 if len(sys.argv) == 3:
|
|
104 raw_mode(sys.argv[1], sys.argv[2])
|
|
105 elif len(sys.argv) == 5:
|
|
106 cooked_mode(sys.argv[1] != '0', sys.argv[2], sys.argv[3], sys.argv[4])
|
|
107 else:
|
|
108 print('Args must be "html_file directory" or "[0|1] tsv_file html_file directory"')
|
|
109 sys.exit(1)
|
|
110
|
|
111
|
|
112 main()
|