Mercurial > repos > tyty > structurefold
comparison predict/predict_RNAs.py @ 93:f1eb39775b93 draft
Uploaded
| author | tyty |
|---|---|
| date | Mon, 16 Feb 2015 02:29:27 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 92:976dcf4d45b2 | 93:f1eb39775b93 |
|---|---|
| 1 #RNA structure prediction & Output and illustrate reactivities | |
| 2 | |
| 3 import sys | |
| 4 import shlex | |
| 5 import subprocess | |
| 6 import tarfile | |
| 7 from parse_dis_pac import * | |
| 8 from read_file import * | |
| 9 from Bio import SeqIO | |
| 10 import os | |
| 11 from rtts_plot import * | |
| 12 import random | |
| 13 import string | |
| 14 | |
| 15 | |
| 16 id_file = sys.argv[1] | |
| 17 seq_file = sys.argv[2] | |
| 18 predict_type = sys.argv[3] | |
| 19 temperature = sys.argv[4] | |
| 20 predict_program = sys.argv[5] | |
| 21 output_file = sys.argv[6] | |
| 22 | |
| 23 | |
| 24 flag = False | |
| 25 if predict_type!='silico': #input reactivity file if provided | |
| 26 if predict_program == 'rs': | |
| 27 react_file = sys.argv[7] | |
| 28 slope = sys.argv[8] | |
| 29 intercept = sys.argv[9] | |
| 30 else: | |
| 31 react_file = sys.argv[7] | |
| 32 thres_h = sys.argv[8] | |
| 33 thres_h = float(thres_h) | |
| 34 thres_l = sys.argv[9] | |
| 35 thres_l = float(thres_l) | |
| 36 gqs = sys.argv[10] | |
| 37 gqs = int(gqs) | |
| 38 | |
| 39 react = parse_dist(react_file) | |
| 40 react = react[1] | |
| 41 flag = True | |
| 42 else: | |
| 43 if predict_program!='rs': | |
| 44 gqs = sys.argv[7] | |
| 45 gqs = int(gqs) | |
| 46 | |
| 47 | |
| 48 ospath = os.path.realpath(sys.argv[0]) | |
| 49 ost = ospath.split('/') | |
| 50 syspathpt = "" | |
| 51 for i in range(len(ost)-1): | |
| 52 syspathpt = syspathpt+ost[i].strip() | |
| 53 syspathpt = syspathpt+'/' | |
| 54 | |
| 55 | |
| 56 syspath = os.getcwd() | |
| 57 | |
| 58 ids = read_t_file(id_file) | |
| 59 sequences = SeqIO.parse(seq_file, 'fasta') | |
| 60 | |
| 61 | |
| 62 seqs = {} | |
| 63 for seq in sequences: | |
| 64 seqs[seq.id] = seq.seq.tostring() | |
| 65 | |
| 66 if len(ids)>100: #setup a limit of the number of sequence to be predicted | |
| 67 print("Number of sequences exceeds limitation!") | |
| 68 sys.exit(0) | |
| 69 | |
| 70 | |
| 71 #predict RNA structures | |
| 72 output_directory = os.path.join(syspath, "output_files") | |
| 73 if not os.path.exists(output_directory): | |
| 74 os.makedirs(output_directory) | |
| 75 flag3 = 0 | |
| 76 for i in range(len(ids)): | |
| 77 flag2 = 0 | |
| 78 id_s = ids[i][0] | |
| 79 #print(id_s) | |
| 80 #Put RNA sequence and reactivities into files | |
| 81 if id_s in seqs: | |
| 82 fh = file(os.path.join(syspath,"temp.txt"), 'w') | |
| 83 fh.write('>'+id_s) | |
| 84 fh.write('\n') | |
| 85 fh.write(seqs[id_s]) | |
| 86 fh.close() | |
| 87 if not flag: | |
| 88 if predict_program == 'rs': | |
| 89 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) | |
| 90 subprocess.call(command) | |
| 91 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | |
| 92 subprocess.call(command) | |
| 93 else: | |
| 94 if gqs: | |
| 95 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | |
| 96 | |
| 97 else: | |
| 98 os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | |
| 99 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | |
| 100 subprocess.call(command) | |
| 101 if not gqs: | |
| 102 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | |
| 103 else: | |
| 104 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | |
| 105 subprocess.call(command) | |
| 106 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | |
| 107 subprocess.call(command) | |
| 108 else: | |
| 109 if id_s in react: | |
| 110 fh = file(os.path.join(syspath, "constraint.txt"), 'w') | |
| 111 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA | |
| 112 if predict_program == 'rs': | |
| 113 for j in range(0, (len(react[id_s]))): | |
| 114 if react[id_s][j]!='NA': | |
| 115 fh.write(str(j+1)) | |
| 116 fh.write('\t') | |
| 117 fh.write(str(react[id_s][j])) | |
| 118 fh.write('\n') | |
| 119 fh.close() | |
| 120 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), | |
| 121 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, | |
| 122 os.path.join(output_directory, "%s.ct" % id_s))) | |
| 123 subprocess.call(command) | |
| 124 command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) | |
| 125 subprocess.call(command) | |
| 126 else: | |
| 127 fh.write('>'+id_s) | |
| 128 fh.write('\n') | |
| 129 fh.write(seqs[id_s]) | |
| 130 fh.write('\n') | |
| 131 for j in range(0, (len(react[id_s]))): | |
| 132 if react[id_s][j]!='NA': | |
| 133 re = float(react[id_s][j]) | |
| 134 if re>thres_h: | |
| 135 fh.write('x') | |
| 136 else: | |
| 137 if re<thres_l: | |
| 138 fh.write('|') | |
| 139 else: | |
| 140 fh.write('.') | |
| 141 else: | |
| 142 fh.write('.') | |
| 143 fh.write('.') | |
| 144 fh.close() | |
| 145 if gqs: | |
| 146 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb') | |
| 147 | |
| 148 else: | |
| 149 os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') | |
| 150 command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) | |
| 151 subprocess.call(command) | |
| 152 if not gqs: | |
| 153 command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) | |
| 154 else: | |
| 155 command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) | |
| 156 subprocess.call(command) | |
| 157 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) | |
| 158 subprocess.call(command) | |
| 159 | |
| 160 else: | |
| 161 print(id_s+" not in the data of react!") | |
| 162 flag2 = 1 | |
| 163 if flag2 == 0: | |
| 164 if predict_program == 'rs': | |
| 165 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | |
| 166 subprocess.call(command) | |
| 167 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | |
| 168 subprocess.call(command) | |
| 169 else: | |
| 170 if not gqs: | |
| 171 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | |
| 172 subprocess.call(command) | |
| 173 command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) | |
| 174 subprocess.call(command) | |
| 175 flag3 = 1 | |
| 176 else: | |
| 177 print(id_s+" not in the data of sequences!") | |
| 178 | |
| 179 #Remove the unnecessary files | |
| 180 if flag3 == 1: | |
| 181 tarball = tarfile.open(output_file, 'w:') | |
| 182 for filename in os.listdir(output_directory): | |
| 183 filepath = os.path.join(output_directory, filename) | |
| 184 print filepath | |
| 185 tarball.add(filepath, arcname=filename) | |
| 186 #print os.listdir(syspath) | |
| 187 #print os.listdir(output_directory) | |
| 188 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) | |
| 189 tarball.close() |
