Mercurial > repos > tyty > structurefold
comparison structurefold/predict/predict_RNAs.py @ 113:aedb21527abd draft
Uploaded
author | tyty |
---|---|
date | Tue, 14 Apr 2015 14:09:42 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
112:87ec0ecdc2af | 113:aedb21527abd |
---|---|
1 #RNA structure prediction & Output and illustrate reactivities | |
2 | |
3 import sys | |
4 import shlex | |
5 import subprocess | |
6 import tarfile | |
7 from parse_dis_pac import * | |
8 from read_file import * | |
9 from Bio import SeqIO | |
10 import os | |
11 from rtts_plot import * | |
12 import random | |
13 import string | |
14 | |
15 | |
16 id_file = sys.argv[1] | |
17 seq_file = sys.argv[2] | |
18 predict_type = sys.argv[3] | |
19 temperature = sys.argv[4] | |
20 predict_program = sys.argv[5] | |
21 output_html = sys.argv[6] | |
22 output_directory = sys.argv[7] | |
23 | |
24 | |
25 | |
26 flag = False | |
27 if predict_type!='silico': #input reactivity file if provided | |
28 if predict_program == 'rs': | |
29 react_file = sys.argv[8] | |
30 slope = sys.argv[9] | |
31 intercept = sys.argv[10] | |
32 else: | |
33 react_file = sys.argv[8] | |
34 thres_h = sys.argv[9] | |
35 thres_h = float(thres_h) | |
36 thres_l = sys.argv[10] | |
37 thres_l = float(thres_l) | |
38 gqs = sys.argv[11] | |
39 gqs = int(gqs) | |
40 | |
41 react = parse_dist(react_file) | |
42 react = react[1] | |
43 flag = True | |
44 else: | |
45 if predict_program!='rs': | |
46 gqs = sys.argv[8] | |
47 gqs = int(gqs) | |
48 | |
49 | |
50 ospath = os.path.realpath(sys.argv[0]) | |
51 ost = ospath.split('/') | |
52 syspathpt = "" | |
53 for i in range(len(ost)-1): | |
54 syspathpt = syspathpt+ost[i].strip() | |
55 syspathpt = syspathpt+'/' | |
56 | |
57 | |
58 syspath = os.getcwd() | |
59 | |
60 ids = read_t_file(id_file) | |
61 sequences = SeqIO.parse(seq_file, 'fasta') | |
62 | |
63 | |
64 seqs = {} | |
65 for seq in sequences: | |
66 seqs[seq.id] = seq.seq.tostring() | |
67 | |
68 if len(ids)>100: #setup a limit of the number of sequence to be predicted | |
69 print("Number of sequences exceeds limitation!") | |
70 sys.exit(0) | |
71 | |
72 | |
73 #predict RNA structures | |
74 | |
75 os.mkdir(output_directory) | |
76 flag3 = 0 | |
77 | |
78 id_predicted = set() | |
79 for i in range(len(ids)): | |
80 flag2 = 0 | |
81 id_s = ids[i][0] | |
82 #print(id_s) | |
83 #Put RNA sequence and reactivities into files | |
84 if id_s in seqs: | |
85 fh = file(os.path.join(syspath,"temp.txt"), 'w') | |
86 fh.write('>'+id_s) | |
87 fh.write('\n') | |
88 fh.write(seqs[id_s]) | |
89 fh.close() | |
90 if not flag: | |
91 if predict_program == 'rs': | |
92 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) | |
93 subprocess.call(command) | |
94 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | |
95 subprocess.call(command) | |
96 else: | |
97 if gqs: | |
98 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | |
99 | |
100 else: | |
101 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | |
102 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | |
103 subprocess.call(command) | |
104 if not gqs: | |
105 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | |
106 else: | |
107 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | |
108 subprocess.call(command) | |
109 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | |
110 subprocess.call(command) | |
111 else: | |
112 if id_s in react: | |
113 fh = file(os.path.join(syspath, "constraint.txt"), 'w') | |
114 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA | |
115 if predict_program == 'rs': | |
116 for j in range(0, (len(react[id_s]))): | |
117 if react[id_s][j]!='NA': | |
118 fh.write(str(j+1)) | |
119 fh.write('\t') | |
120 fh.write(str(react[id_s][j])) | |
121 fh.write('\n') | |
122 fh.close() | |
123 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), | |
124 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, | |
125 os.path.join(output_directory, "%s.ct" % id_s))) | |
126 subprocess.call(command) | |
127 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | |
128 subprocess.call(command) | |
129 else: | |
130 fh.write('>'+id_s) | |
131 fh.write('\n') | |
132 fh.write(seqs[id_s]) | |
133 fh.write('\n') | |
134 for j in range(0, (len(react[id_s]))): | |
135 if react[id_s][j]!='NA': | |
136 re = float(react[id_s][j]) | |
137 if re>thres_h: | |
138 fh.write('x') | |
139 else: | |
140 if re<thres_l: | |
141 fh.write('|') | |
142 else: | |
143 fh.write('.') | |
144 else: | |
145 fh.write('.') | |
146 fh.write('.') | |
147 fh.close() | |
148 if gqs: | |
149 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | |
150 | |
151 else: | |
152 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | |
153 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | |
154 subprocess.call(command) | |
155 if not gqs: | |
156 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | |
157 else: | |
158 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | |
159 subprocess.call(command) | |
160 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | |
161 subprocess.call(command) | |
162 | |
163 else: | |
164 print(id_s+" not in the data of react!") | |
165 flag2 = 1 | |
166 if flag2 == 0: | |
167 if predict_program == 'rs': | |
168 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | |
169 subprocess.call(command) | |
170 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | |
171 subprocess.call(command) | |
172 else: | |
173 if not gqs: | |
174 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | |
175 subprocess.call(command) | |
176 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | |
177 subprocess.call(command) | |
178 flag3 = 1 | |
179 id_predicted.add(id_s) | |
180 else: | |
181 print(id_s+" not in the data of sequences!") | |
182 | |
183 #Remove the unnecessary files | |
184 if flag3 == 1: | |
185 | |
186 tarball = tarfile.open(os.path.join(output_directory,'prediction_results.tar'), 'w:') | |
187 for filename in os.listdir(output_directory): | |
188 filepath = os.path.join(output_directory, filename) | |
189 print filepath | |
190 tarball.add(filepath, arcname=filename) | |
191 #print os.listdir(syspath) | |
192 #print os.listdir(output_directory) | |
193 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) | |
194 tarball.close() | |
195 | |
196 h = open(output_html, 'wb' ) | |
197 h.write('<html><head><title><h1>Results of RNA structure prediction</h1></title></head><body>\n') | |
198 | |
199 h.write('<p>\n') | |
200 h.write('Click <a href="%s">here</a> to download the compressed file containing all prediction results.\n' % (('prediction_results.tar'))) | |
201 #h.write('<\p>\n') | |
202 h.write('<hr>\n') | |
203 | |
204 | |
205 for id_p in id_predicted: | |
206 h.write('<h4>'+id_p+'</h4><p><ul>\n') | |
207 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.dbn' % id_p), ('%s.dbn' % id_p))) | |
208 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.ps' % id_p), ('%s.ps' % id_p))) | |
209 if flag: | |
210 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.tif' % id_p), ('%s.tif' % id_p))) | |
211 h.write( '</ul></p>\n' ) | |
212 h.write('<hr>\n') | |
213 h.write( '</body></html>\n' ) | |
214 h.close() | |
215 | |
216 | |
217 | |
218 | |
219 |