117
|
1 #RNA structure prediction & Output and illustrate reactivities
|
|
2
|
|
3 import sys
|
|
4 import shlex
|
|
5 import subprocess
|
|
6 import tarfile
|
|
7 from parse_dis_pac import *
|
|
8 from read_file import *
|
|
9 from Bio import SeqIO
|
|
10 import os
|
|
11 from rtts_plot import *
|
|
12 import random
|
|
13 import string
|
|
14
|
|
15
|
|
16 id_file = sys.argv[1]
|
|
17 seq_file = sys.argv[2]
|
|
18 predict_type = sys.argv[3]
|
|
19 temperature = sys.argv[4]
|
|
20 predict_program = sys.argv[5]
|
|
21 output_html = sys.argv[6]
|
|
22 output_directory = sys.argv[7]
|
|
23
|
|
24
|
|
25
|
|
26 flag = False
|
|
27 if predict_type!='silico': #input reactivity file if provided
|
|
28 if predict_program == 'rs':
|
|
29 react_file = sys.argv[8]
|
|
30 slope = sys.argv[9]
|
|
31 intercept = sys.argv[10]
|
|
32 else:
|
|
33 react_file = sys.argv[8]
|
|
34 thres_h = sys.argv[9]
|
|
35 thres_h = float(thres_h)
|
|
36 thres_l = sys.argv[10]
|
|
37 thres_l = float(thres_l)
|
|
38 gqs = sys.argv[11]
|
|
39 gqs = int(gqs)
|
|
40
|
|
41 react = parse_dist(react_file)
|
|
42 react = react[1]
|
|
43 flag = True
|
|
44 else:
|
|
45 if predict_program!='rs':
|
|
46 gqs = sys.argv[8]
|
|
47 gqs = int(gqs)
|
|
48
|
|
49
|
|
50 ospath = os.path.realpath(sys.argv[0])
|
|
51 ost = ospath.split('/')
|
|
52 syspathpt = ""
|
|
53 for i in range(len(ost)-1):
|
|
54 syspathpt = syspathpt+ost[i].strip()
|
|
55 syspathpt = syspathpt+'/'
|
|
56
|
|
57
|
|
58 syspath = os.getcwd()
|
|
59
|
|
60 ids = read_t_file(id_file)
|
|
61 sequences = SeqIO.parse(seq_file, 'fasta')
|
|
62
|
|
63
|
|
64 seqs = {}
|
|
65 for seq in sequences:
|
|
66 seqs[seq.id] = seq.seq.tostring()
|
|
67
|
|
68 if len(ids)>100: #setup a limit of the number of sequence to be predicted
|
|
69 print("Number of sequences exceeds limitation!")
|
|
70 sys.exit(0)
|
|
71
|
|
72
|
|
73 #predict RNA structures
|
|
74
|
|
75 os.mkdir(output_directory)
|
|
76 flag3 = 0
|
|
77
|
|
78 id_predicted = set()
|
|
79 for i in range(len(ids)):
|
|
80 flag2 = 0
|
|
81 id_s = ids[i][0]
|
|
82 #print(id_s)
|
|
83 #Put RNA sequence and reactivities into files
|
|
84 if id_s in seqs:
|
|
85 fh = file(os.path.join(syspath,"temp.txt"), 'w')
|
|
86 fh.write('>'+id_s)
|
|
87 fh.write('\n')
|
|
88 fh.write(seqs[id_s])
|
|
89 fh.close()
|
|
90 if not flag:
|
|
91 if predict_program == 'rs':
|
|
92 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
|
|
93 subprocess.call(command)
|
|
94 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
|
|
95 subprocess.call(command)
|
|
96 else:
|
|
97 if gqs:
|
|
98 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
|
|
99
|
|
100 else:
|
|
101 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
|
|
102 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
|
|
103 subprocess.call(command)
|
|
104 if not gqs:
|
|
105 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
|
|
106 else:
|
|
107 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
|
|
108 subprocess.call(command)
|
|
109 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
|
|
110 subprocess.call(command)
|
|
111 else:
|
|
112 if id_s in react:
|
|
113 fh = file(os.path.join(syspath, "constraint.txt"), 'w')
|
|
114 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
|
|
115 if predict_program == 'rs':
|
|
116 for j in range(0, (len(react[id_s]))):
|
|
117 if react[id_s][j]!='NA':
|
|
118 fh.write(str(j+1))
|
|
119 fh.write('\t')
|
|
120 fh.write(str(react[id_s][j]))
|
|
121 fh.write('\n')
|
|
122 fh.close()
|
|
123 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"),
|
|
124 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature,
|
|
125 os.path.join(output_directory, "%s.ct" % id_s)))
|
|
126 subprocess.call(command)
|
|
127 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
|
|
128 subprocess.call(command)
|
|
129 else:
|
|
130 fh.write('>'+id_s)
|
|
131 fh.write('\n')
|
|
132 fh.write(seqs[id_s])
|
|
133 fh.write('\n')
|
|
134 for j in range(0, (len(react[id_s]))):
|
|
135 if react[id_s][j]!='NA':
|
|
136 re = float(react[id_s][j])
|
|
137 if re>thres_h:
|
|
138 fh.write('x')
|
|
139 else:
|
|
140 if re<thres_l:
|
|
141 fh.write('|')
|
|
142 else:
|
|
143 fh.write('.')
|
|
144 else:
|
|
145 fh.write('.')
|
|
146 fh.write('.')
|
|
147 fh.close()
|
|
148 if gqs:
|
|
149 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
|
|
150
|
|
151 else:
|
|
152 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
|
|
153 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
|
|
154 subprocess.call(command)
|
|
155 if not gqs:
|
|
156 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
|
|
157 else:
|
|
158 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
|
|
159 subprocess.call(command)
|
|
160 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
|
|
161 subprocess.call(command)
|
|
162
|
|
163 else:
|
|
164 print(id_s+" not in the data of react!")
|
|
165 flag2 = 1
|
|
166 if flag2 == 0:
|
|
167 if predict_program == 'rs':
|
|
168 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
|
|
169 subprocess.call(command)
|
|
170 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
|
|
171 subprocess.call(command)
|
|
172 else:
|
|
173 if not gqs:
|
|
174 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
|
|
175 subprocess.call(command)
|
|
176 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
|
|
177 subprocess.call(command)
|
|
178 flag3 = 1
|
|
179 id_predicted.add(id_s)
|
|
180 else:
|
|
181 print(id_s+" not in the data of sequences!")
|
|
182
|
|
183 #Remove the unnecessary files
|
|
184 if flag3 == 1:
|
|
185
|
|
186 tarball = tarfile.open(os.path.join(output_directory,'prediction_results.tar'), 'w:')
|
|
187 for filename in os.listdir(output_directory):
|
|
188 filepath = os.path.join(output_directory, filename)
|
|
189 print filepath
|
|
190 tarball.add(filepath, arcname=filename)
|
|
191 #print os.listdir(syspath)
|
|
192 #print os.listdir(output_directory)
|
|
193 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
|
|
194 tarball.close()
|
|
195
|
|
196 h = open(output_html, 'wb' )
|
|
197 h.write('<html><head><title><h1>Results of RNA structure prediction</h1></title></head><body>\n')
|
|
198
|
|
199 h.write('<p>\n')
|
|
200 h.write('Click <a href="%s">here</a> to download the compressed file containing all prediction results.\n' % (('prediction_results.tar')))
|
|
201 #h.write('<\p>\n')
|
|
202 h.write('<hr>\n')
|
|
203
|
|
204
|
|
205 for id_p in id_predicted:
|
|
206 h.write('<h4>'+id_p+'</h4><p><ul>\n')
|
|
207 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.dbn' % id_p), ('%s.dbn' % id_p)))
|
|
208 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.ps' % id_p), ('%s.ps' % id_p)))
|
|
209 if flag:
|
|
210 h.write('<li><a href="%s">%s</a></li>\n' % (('%s.tif' % id_p), ('%s.tif' % id_p)))
|
|
211 h.write( '</ul></p>\n' )
|
|
212 h.write('<hr>\n')
|
|
213 h.write( '</body></html>\n' )
|
|
214 h.close()
|
|
215
|
|
216
|
|
217
|
|
218
|
|
219
|