Mercurial > repos > si-datascience > interps_test
comparison interproscan5/create_html_index.py @ 0:0da2847fc108 draft default tip
Uploaded
author | si-datascience |
---|---|
date | Thu, 24 May 2018 14:57:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0da2847fc108 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os | |
4 import re | |
5 import sys | |
6 | |
7 START = '''<html> | |
8 <head> | |
9 <style> | |
10 a:link { text-decoration: none; color: red; } | |
11 a:visited { text-decoration: none; color: blue; } | |
12 a:hover { text-decoration: underline; color: green; } a:active { text-decoration: underline; color: green; } | |
13 </style> | |
14 </head> | |
15 <body> | |
16 <h1>InterProScan result summary page</h1> | |
17 ''' | |
18 | |
19 END = ''' | |
20 </body> | |
21 </html> | |
22 ''' | |
23 | |
24 | |
25 def raw_mode(html_file, directory): | |
26 with open(html_file, 'w') as h: | |
27 h.write(START) | |
28 h.write('<ul>') | |
29 for filename in [f for f in sorted(os.listdir(directory)) if os.path.isfile(os.path.join(directory, f))]: | |
30 h.write('<li><a href="%s"> %s </a></li>' % (filename, os.path.splitext(filename)[0])) | |
31 h.write('</ul>') | |
32 h.write(END) | |
33 | |
34 | |
35 def fix_name(name): | |
36 return re.sub('[&/]', '_', name) | |
37 | |
38 | |
39 def cooked_mode(orfed_ids, tsv_file, html_file, directory): | |
40 name_freq = {} | |
41 with open(tsv_file) as f: | |
42 for line in f: | |
43 name = line.split("\t", 1)[0] | |
44 if orfed_ids: | |
45 deorfed_name = re.sub('_\\d+$', '', name) | |
46 else: | |
47 deorfed_name = name | |
48 | |
49 data = name_freq.get(deorfed_name, []) | |
50 if data: | |
51 data[0] += 1 | |
52 data[1][name] = data[1].get(name, 0) + 1 | |
53 else: | |
54 data = [1, {name: 1}] | |
55 name_freq[deorfed_name] = data | |
56 | |
57 name_freq_sorted = [(x[0], (x[1][0], sorted(x[1][1].items(), key=lambda t: t[1], reverse=True))) | |
58 for x in sorted(name_freq.items(), key=lambda t: t[1][0], reverse=True) | |
59 ] | |
60 | |
61 filename_dict = {} | |
62 for filename in [f for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]: | |
63 filename_dict[os.path.splitext(filename)[0]] = filename | |
64 | |
65 with open(html_file, 'w') as h: | |
66 h.write(START) | |
67 h.write('<ol>') | |
68 for don, (freq, members) in name_freq_sorted: | |
69 h.write('\n<li>') | |
70 if len(members) == 1: | |
71 fn = fix_name(members[0][0]) | |
72 if fn in filename_dict: | |
73 h.write('<a href="%s"> %s (%d features)</a>' % | |
74 (filename_dict[fn], members[0][0], members[0][1])) | |
75 del filename_dict[fn] | |
76 else: | |
77 h.write('<a href="%s"> %s (%d features, broken link!)</a>' % | |
78 (members[0][0], members[0][0], members[0][1])) | |
79 else: | |
80 h.write('%s (%d features)' % (don, freq)) | |
81 h.write('\n<ul>') | |
82 for n, f in members: | |
83 fn = fix_name(n) | |
84 if fn in filename_dict: | |
85 h.write('\n<li><a href="%s"> %s (%d features)</a></li>' % (filename_dict[fn], n, f)) | |
86 del filename_dict[fn] | |
87 else: | |
88 h.write('\n<li><a href="%s"> %s (%d features, broken link!)</a></li>' % (n, n, f)) | |
89 h.write('</ul>') | |
90 h.write('</li>') | |
91 | |
92 if len(filename_dict) > 0: | |
93 h.write('<h2>Sequences without any features</h2>') | |
94 h.write('\n<ul>') | |
95 for n, p in sorted(filename_dict.items(), key=lambda t: t[0]): | |
96 h.write('\n<li><a href="%s">%s</li>' % (p, n)) | |
97 h.write('</ul>') | |
98 h.write('</ol>') | |
99 h.write(END) | |
100 | |
101 | |
102 def main(): | |
103 if len(sys.argv) == 3: | |
104 raw_mode(sys.argv[1], sys.argv[2]) | |
105 elif len(sys.argv) == 5: | |
106 cooked_mode(sys.argv[1] != '0', sys.argv[2], sys.argv[3], sys.argv[4]) | |
107 else: | |
108 print('Args must be "html_file directory" or "[0|1] tsv_file html_file directory"') | |
109 sys.exit(1) | |
110 | |
111 | |
112 main() |