comparison structurefold/predict/predict_RNAs.py @ 113:aedb21527abd draft

Uploaded
author tyty
date Tue, 14 Apr 2015 14:09:42 -0400
parents
children
comparison
equal deleted inserted replaced
112:87ec0ecdc2af 113:aedb21527abd
1 #RNA structure prediction & Output and illustrate reactivities
2
3 import sys
4 import shlex
5 import subprocess
6 import tarfile
7 from parse_dis_pac import *
8 from read_file import *
9 from Bio import SeqIO
10 import os
11 from rtts_plot import *
12 import random
13 import string
14
15
16 id_file = sys.argv[1]
17 seq_file = sys.argv[2]
18 predict_type = sys.argv[3]
19 temperature = sys.argv[4]
20 predict_program = sys.argv[5]
21 output_html = sys.argv[6]
22 output_directory = sys.argv[7]
23
24
25
26 flag = False
27 if predict_type!='silico': #input reactivity file if provided
28 if predict_program == 'rs':
29 react_file = sys.argv[8]
30 slope = sys.argv[9]
31 intercept = sys.argv[10]
32 else:
33 react_file = sys.argv[8]
34 thres_h = sys.argv[9]
35 thres_h = float(thres_h)
36 thres_l = sys.argv[10]
37 thres_l = float(thres_l)
38 gqs = sys.argv[11]
39 gqs = int(gqs)
40
41 react = parse_dist(react_file)
42 react = react[1]
43 flag = True
44 else:
45 if predict_program!='rs':
46 gqs = sys.argv[8]
47 gqs = int(gqs)
48
49
50 ospath = os.path.realpath(sys.argv[0])
51 ost = ospath.split('/')
52 syspathpt = ""
53 for i in range(len(ost)-1):
54 syspathpt = syspathpt+ost[i].strip()
55 syspathpt = syspathpt+'/'
56
57
58 syspath = os.getcwd()
59
60 ids = read_t_file(id_file)
61 sequences = SeqIO.parse(seq_file, 'fasta')
62
63
64 seqs = {}
65 for seq in sequences:
66 seqs[seq.id] = seq.seq.tostring()
67
68 if len(ids)>100: #setup a limit of the number of sequence to be predicted
69 print("Number of sequences exceeds limitation!")
70 sys.exit(0)
71
72
73 #predict RNA structures
74
75 os.mkdir(output_directory)
76 flag3 = 0
77
78 id_predicted = set()
79 for i in range(len(ids)):
80 flag2 = 0
81 id_s = ids[i][0]
82 #print(id_s)
83 #Put RNA sequence and reactivities into files
84 if id_s in seqs:
85 fh = file(os.path.join(syspath,"temp.txt"), 'w')
86 fh.write('>'+id_s)
87 fh.write('\n')
88 fh.write(seqs[id_s])
89 fh.close()
90 if not flag:
91 if predict_program == 'rs':
92 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
93 subprocess.call(command)
94 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
95 subprocess.call(command)
96 else:
97 if gqs:
98 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
99
100 else:
101 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
102 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
103 subprocess.call(command)
104 if not gqs:
105 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
106 else:
107 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
108 subprocess.call(command)
109 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
110 subprocess.call(command)
111 else:
112 if id_s in react:
113 fh = file(os.path.join(syspath, "constraint.txt"), 'w')
114 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
115 if predict_program == 'rs':
116 for j in range(0, (len(react[id_s]))):
117 if react[id_s][j]!='NA':
118 fh.write(str(j+1))
119 fh.write('\t')
120 fh.write(str(react[id_s][j]))
121 fh.write('\n')
122 fh.close()
123 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"),
124 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature,
125 os.path.join(output_directory, "%s.ct" % id_s)))
126 subprocess.call(command)
127 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
128 subprocess.call(command)
129 else:
130 fh.write('>'+id_s)
131 fh.write('\n')
132 fh.write(seqs[id_s])
133 fh.write('\n')
134 for j in range(0, (len(react[id_s]))):
135 if react[id_s][j]!='NA':
136 re = float(react[id_s][j])
137 if re>thres_h:
138 fh.write('x')
139 else:
140 if re<thres_l:
141 fh.write('|')
142 else:
143 fh.write('.')
144 else:
145 fh.write('.')
146 fh.write('.')
147 fh.close()
148 if gqs:
149 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
150
151 else:
152 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
153 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
154 subprocess.call(command)
155 if not gqs:
156 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
157 else:
158 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
159 subprocess.call(command)
160 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
161 subprocess.call(command)
162
163 else:
164 print(id_s+" not in the data of react!")
165 flag2 = 1
166 if flag2 == 0:
167 if predict_program == 'rs':
168 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
169 subprocess.call(command)
170 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
171 subprocess.call(command)
172 else:
173 if not gqs:
174 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
175 subprocess.call(command)
176 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
177 subprocess.call(command)
178 flag3 = 1
179 id_predicted.add(id_s)
180 else:
181 print(id_s+" not in the data of sequences!")
182
183 #Remove the unnecessary files
184 if flag3 == 1:
185
186 tarball = tarfile.open(os.path.join(output_directory,'prediction_results.tar'), 'w:')
187 for filename in os.listdir(output_directory):
188 filepath = os.path.join(output_directory, filename)
189 print filepath
190 tarball.add(filepath, arcname=filename)
191 #print os.listdir(syspath)
192 #print os.listdir(output_directory)
193 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
194 tarball.close()
195
196 h = open(output_html, 'wb' )
197 h.write('<html><head><title><h1>Results of RNA structure prediction</h1></title></head><body>\n')
198
199 h.write('<p>\n')
200 h.write('Click <a href="%s">here</a> to download the compressed file containing all prediction results.\n' % (('prediction_results.tar')))
201 #h.write('<\p>\n')
202 h.write('<hr>\n')
203
204
205 for id_p in id_predicted:
206 h.write('<h4>'+id_p+'</h4><p><ul>\n')
207 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.dbn' % id_p), ('%s.dbn' % id_p)))
208 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.ps' % id_p), ('%s.ps' % id_p)))
209 if flag:
210 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.tif' % id_p), ('%s.tif' % id_p)))
211 h.write( '</ul></p>\n' )
212 h.write('<hr>\n')
213 h.write( '</body></html>\n' )
214 h.close()
215
216
217
218
219