Mercurial > repos > tyty > structurefold
comparison predict/predict_RNAs.py @ 93:f1eb39775b93 draft
Uploaded
author | tyty |
---|---|
date | Mon, 16 Feb 2015 02:29:27 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
92:976dcf4d45b2 | 93:f1eb39775b93 |
---|---|
1 #RNA structure prediction & Output and illustrate reactivities | |
2 | |
3 import sys | |
4 import shlex | |
5 import subprocess | |
6 import tarfile | |
7 from parse_dis_pac import * | |
8 from read_file import * | |
9 from Bio import SeqIO | |
10 import os | |
11 from rtts_plot import * | |
12 import random | |
13 import string | |
14 | |
15 | |
16 id_file = sys.argv[1] | |
17 seq_file = sys.argv[2] | |
18 predict_type = sys.argv[3] | |
19 temperature = sys.argv[4] | |
20 predict_program = sys.argv[5] | |
21 output_file = sys.argv[6] | |
22 | |
23 | |
24 flag = False | |
25 if predict_type!='silico': #input reactivity file if provided | |
26 if predict_program == 'rs': | |
27 react_file = sys.argv[7] | |
28 slope = sys.argv[8] | |
29 intercept = sys.argv[9] | |
30 else: | |
31 react_file = sys.argv[7] | |
32 thres_h = sys.argv[8] | |
33 thres_h = float(thres_h) | |
34 thres_l = sys.argv[9] | |
35 thres_l = float(thres_l) | |
36 gqs = sys.argv[10] | |
37 gqs = int(gqs) | |
38 | |
39 react = parse_dist(react_file) | |
40 react = react[1] | |
41 flag = True | |
42 else: | |
43 if predict_program!='rs': | |
44 gqs = sys.argv[7] | |
45 gqs = int(gqs) | |
46 | |
47 | |
48 ospath = os.path.realpath(sys.argv[0]) | |
49 ost = ospath.split('/') | |
50 syspathpt = "" | |
51 for i in range(len(ost)-1): | |
52 syspathpt = syspathpt+ost[i].strip() | |
53 syspathpt = syspathpt+'/' | |
54 | |
55 | |
56 syspath = os.getcwd() | |
57 | |
58 ids = read_t_file(id_file) | |
59 sequences = SeqIO.parse(seq_file, 'fasta') | |
60 | |
61 | |
62 seqs = {} | |
63 for seq in sequences: | |
64 seqs[seq.id] = seq.seq.tostring() | |
65 | |
66 if len(ids)>100: #setup a limit of the number of sequence to be predicted | |
67 print("Number of sequences exceeds limitation!") | |
68 sys.exit(0) | |
69 | |
70 | |
71 #predict RNA structures | |
72 output_directory = os.path.join(syspath, "output_files") | |
73 if not os.path.exists(output_directory): | |
74 os.makedirs(output_directory) | |
75 flag3 = 0 | |
76 for i in range(len(ids)): | |
77 flag2 = 0 | |
78 id_s = ids[i][0] | |
79 #print(id_s) | |
80 #Put RNA sequence and reactivities into files | |
81 if id_s in seqs: | |
82 fh = file(os.path.join(syspath,"temp.txt"), 'w') | |
83 fh.write('>'+id_s) | |
84 fh.write('\n') | |
85 fh.write(seqs[id_s]) | |
86 fh.close() | |
87 if not flag: | |
88 if predict_program == 'rs': | |
89 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) | |
90 subprocess.call(command) | |
91 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | |
92 subprocess.call(command) | |
93 else: | |
94 if gqs: | |
95 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | |
96 | |
97 else: | |
98 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | |
99 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | |
100 subprocess.call(command) | |
101 if not gqs: | |
102 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | |
103 else: | |
104 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | |
105 subprocess.call(command) | |
106 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | |
107 subprocess.call(command) | |
108 else: | |
109 if id_s in react: | |
110 fh = file(os.path.join(syspath, "constraint.txt"), 'w') | |
111 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA | |
112 if predict_program == 'rs': | |
113 for j in range(0, (len(react[id_s]))): | |
114 if react[id_s][j]!='NA': | |
115 fh.write(str(j+1)) | |
116 fh.write('\t') | |
117 fh.write(str(react[id_s][j])) | |
118 fh.write('\n') | |
119 fh.close() | |
120 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), | |
121 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, | |
122 os.path.join(output_directory, "%s.ct" % id_s))) | |
123 subprocess.call(command) | |
124 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | |
125 subprocess.call(command) | |
126 else: | |
127 fh.write('>'+id_s) | |
128 fh.write('\n') | |
129 fh.write(seqs[id_s]) | |
130 fh.write('\n') | |
131 for j in range(0, (len(react[id_s]))): | |
132 if react[id_s][j]!='NA': | |
133 re = float(react[id_s][j]) | |
134 if re>thres_h: | |
135 fh.write('x') | |
136 else: | |
137 if re<thres_l: | |
138 fh.write('|') | |
139 else: | |
140 fh.write('.') | |
141 else: | |
142 fh.write('.') | |
143 fh.write('.') | |
144 fh.close() | |
145 if gqs: | |
146 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | |
147 | |
148 else: | |
149 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | |
150 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | |
151 subprocess.call(command) | |
152 if not gqs: | |
153 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | |
154 else: | |
155 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | |
156 subprocess.call(command) | |
157 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | |
158 subprocess.call(command) | |
159 | |
160 else: | |
161 print(id_s+" not in the data of react!") | |
162 flag2 = 1 | |
163 if flag2 == 0: | |
164 if predict_program == 'rs': | |
165 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | |
166 subprocess.call(command) | |
167 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | |
168 subprocess.call(command) | |
169 else: | |
170 if not gqs: | |
171 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | |
172 subprocess.call(command) | |
173 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | |
174 subprocess.call(command) | |
175 flag3 = 1 | |
176 else: | |
177 print(id_s+" not in the data of sequences!") | |
178 | |
179 #Remove the unnecessary files | |
180 if flag3 == 1: | |
181 tarball = tarfile.open(output_file, 'w:') | |
182 for filename in os.listdir(output_directory): | |
183 filepath = os.path.join(output_directory, filename) | |
184 print filepath | |
185 tarball.add(filepath, arcname=filename) | |
186 #print os.listdir(syspath) | |
187 #print os.listdir(output_directory) | |
188 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) | |
189 tarball.close() |