annotate predict/predict_RNAs.py @ 106:c1c1d38ec295 draft

Deleted selected files
author tyty
date Thu, 19 Mar 2015 17:43:55 -0400
parents f1eb39775b93
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
93
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
1 #RNA structure prediction & Output and illustrate reactivities
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
2
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
3 import sys
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
4 import shlex
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
5 import subprocess
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
6 import tarfile
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
7 from parse_dis_pac import *
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
8 from read_file import *
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
9 from Bio import SeqIO
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
10 import os
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
11 from rtts_plot import *
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
12 import random
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
13 import string
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
14
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
15
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
16 id_file = sys.argv[1]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
17 seq_file = sys.argv[2]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
18 predict_type = sys.argv[3]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
19 temperature = sys.argv[4]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
20 predict_program = sys.argv[5]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
21 output_file = sys.argv[6]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
22
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
23
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
24 flag = False
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
25 if predict_type!='silico': #input reactivity file if provided
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
26 if predict_program == 'rs':
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
27 react_file = sys.argv[7]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
28 slope = sys.argv[8]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
29 intercept = sys.argv[9]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
30 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
31 react_file = sys.argv[7]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
32 thres_h = sys.argv[8]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
33 thres_h = float(thres_h)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
34 thres_l = sys.argv[9]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
35 thres_l = float(thres_l)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
36 gqs = sys.argv[10]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
37 gqs = int(gqs)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
38
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
39 react = parse_dist(react_file)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
40 react = react[1]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
41 flag = True
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
42 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
43 if predict_program!='rs':
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
44 gqs = sys.argv[7]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
45 gqs = int(gqs)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
46
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
47
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
48 ospath = os.path.realpath(sys.argv[0])
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
49 ost = ospath.split('/')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
50 syspathpt = ""
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
51 for i in range(len(ost)-1):
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
52 syspathpt = syspathpt+ost[i].strip()
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
53 syspathpt = syspathpt+'/'
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
54
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
55
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
56 syspath = os.getcwd()
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
57
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
58 ids = read_t_file(id_file)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
59 sequences = SeqIO.parse(seq_file, 'fasta')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
60
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
61
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
62 seqs = {}
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
63 for seq in sequences:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
64 seqs[seq.id] = seq.seq.tostring()
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
65
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
66 if len(ids)>100: #setup a limit of the number of sequence to be predicted
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
67 print("Number of sequences exceeds limitation!")
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
68 sys.exit(0)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
69
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
70
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
71 #predict RNA structures
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
72 output_directory = os.path.join(syspath, "output_files")
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
73 if not os.path.exists(output_directory):
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
74 os.makedirs(output_directory)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
75 flag3 = 0
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
76 for i in range(len(ids)):
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
77 flag2 = 0
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
78 id_s = ids[i][0]
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
79 #print(id_s)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
80 #Put RNA sequence and reactivities into files
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
81 if id_s in seqs:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
82 fh = file(os.path.join(syspath,"temp.txt"), 'w')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
83 fh.write('>'+id_s)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
84 fh.write('\n')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
85 fh.write(seqs[id_s])
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
86 fh.close()
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
87 if not flag:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
88 if predict_program == 'rs':
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
89 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
90 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
91 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
92 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
93 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
94 if gqs:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
95 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
96
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
97 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
98 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
99 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
100 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
101 if not gqs:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
102 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
103 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
104 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
105 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
106 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
107 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
108 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
109 if id_s in react:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
110 fh = file(os.path.join(syspath, "constraint.txt"), 'w')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
111 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
112 if predict_program == 'rs':
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
113 for j in range(0, (len(react[id_s]))):
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
114 if react[id_s][j]!='NA':
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
115 fh.write(str(j+1))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
116 fh.write('\t')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
117 fh.write(str(react[id_s][j]))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
118 fh.write('\n')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
119 fh.close()
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
120 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"),
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
121 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature,
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
122 os.path.join(output_directory, "%s.ct" % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
123 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
124 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
125 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
126 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
127 fh.write('>'+id_s)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
128 fh.write('\n')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
129 fh.write(seqs[id_s])
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
130 fh.write('\n')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
131 for j in range(0, (len(react[id_s]))):
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
132 if react[id_s][j]!='NA':
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
133 re = float(react[id_s][j])
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
134 if re>thres_h:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
135 fh.write('x')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
136 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
137 if re<thres_l:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
138 fh.write('|')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
139 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
140 fh.write('.')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
141 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
142 fh.write('.')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
143 fh.write('.')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
144 fh.close()
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
145 if gqs:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
146 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
147
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
148 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
149 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
150 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
151 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
152 if not gqs:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
153 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
154 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
155 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
156 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
157 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
158 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
159
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
160 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
161 print(id_s+" not in the data of react!")
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
162 flag2 = 1
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
163 if flag2 == 0:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
164 if predict_program == 'rs':
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
165 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
166 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
167 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
168 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
169 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
170 if not gqs:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
171 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
172 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
173 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
174 subprocess.call(command)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
175 flag3 = 1
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
176 else:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
177 print(id_s+" not in the data of sequences!")
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
178
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
179 #Remove the unnecessary files
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
180 if flag3 == 1:
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
181 tarball = tarfile.open(output_file, 'w:')
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
182 for filename in os.listdir(output_directory):
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
183 filepath = os.path.join(output_directory, filename)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
184 print filepath
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
185 tarball.add(filepath, arcname=filename)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
186 #print os.listdir(syspath)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
187 #print os.listdir(output_directory)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
188 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
f1eb39775b93 Uploaded
tyty
parents:
diff changeset
189 tarball.close()