Mercurial > repos > tyty > structurefold
view predict/predict_RNAs.py @ 93:f1eb39775b93 draft
Uploaded
author | tyty |
---|---|
date | Mon, 16 Feb 2015 02:29:27 -0500 |
parents | |
children |
line wrap: on
line source
#RNA structure prediction & Output and illustrate reactivities import sys import shlex import subprocess import tarfile from parse_dis_pac import * from read_file import * from Bio import SeqIO import os from rtts_plot import * import random import string id_file = sys.argv[1] seq_file = sys.argv[2] predict_type = sys.argv[3] temperature = sys.argv[4] predict_program = sys.argv[5] output_file = sys.argv[6] flag = False if predict_type!='silico': #input reactivity file if provided if predict_program == 'rs': react_file = sys.argv[7] slope = sys.argv[8] intercept = sys.argv[9] else: react_file = sys.argv[7] thres_h = sys.argv[8] thres_h = float(thres_h) thres_l = sys.argv[9] thres_l = float(thres_l) gqs = sys.argv[10] gqs = int(gqs) react = parse_dist(react_file) react = react[1] flag = True else: if predict_program!='rs': gqs = sys.argv[7] gqs = int(gqs) ospath = os.path.realpath(sys.argv[0]) ost = ospath.split('/') syspathpt = "" for i in range(len(ost)-1): syspathpt = syspathpt+ost[i].strip() syspathpt = syspathpt+'/' syspath = os.getcwd() ids = read_t_file(id_file) sequences = SeqIO.parse(seq_file, 'fasta') seqs = {} for seq in sequences: seqs[seq.id] = seq.seq.tostring() if len(ids)>100: #setup a limit of the number of sequence to be predicted print("Number of sequences exceeds limitation!") sys.exit(0) #predict RNA structures output_directory = os.path.join(syspath, "output_files") if not os.path.exists(output_directory): os.makedirs(output_directory) flag3 = 0 for i in range(len(ids)): flag2 = 0 id_s = ids[i][0] #print(id_s) #Put RNA sequence and reactivities into files if id_s in seqs: fh = file(os.path.join(syspath,"temp.txt"), 'w') fh.write('>'+id_s) fh.write('\n') fh.write(seqs[id_s]) fh.close() if not flag: if predict_program == 'rs': command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) subprocess.call(command) command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) subprocess.call(command) else: if gqs: os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb') else: os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) subprocess.call(command) if not gqs: command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) else: command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) subprocess.call(command) command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) subprocess.call(command) else: if id_s in react: fh = file(os.path.join(syspath, "constraint.txt"), 'w') make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA if predict_program == 'rs': for j in range(0, (len(react[id_s]))): if react[id_s][j]!='NA': fh.write(str(j+1)) fh.write('\t') fh.write(str(react[id_s][j])) fh.write('\n') fh.close() command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, os.path.join(output_directory, "%s.ct" % id_s))) subprocess.call(command) command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s))) subprocess.call(command) else: fh.write('>'+id_s) fh.write('\n') fh.write(seqs[id_s]) fh.write('\n') for j in range(0, (len(react[id_s]))): if react[id_s][j]!='NA': re = float(react[id_s][j]) if re>thres_h: fh.write('x') else: if re<thres_l: fh.write('|') else: fh.write('.') else: fh.write('.') fh.write('.') fh.close() if gqs: os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb') else: os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb') command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s))) subprocess.call(command) if not gqs: command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s))) else: command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s))) subprocess.call(command) command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s))) subprocess.call(command) else: print(id_s+" not in the data of react!") flag2 = 1 if flag2 == 0: if predict_program == 'rs': command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) subprocess.call(command) command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) subprocess.call(command) else: if not gqs: command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) subprocess.call(command) command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s))) subprocess.call(command) flag3 = 1 else: print(id_s+" not in the data of sequences!") #Remove the unnecessary files if flag3 == 1: tarball = tarfile.open(output_file, 'w:') for filename in os.listdir(output_directory): filepath = os.path.join(output_directory, filename) print filepath tarball.add(filepath, arcname=filename) #print os.listdir(syspath) #print os.listdir(output_directory) # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) tarball.close()