diff predict/predict_RNAs.py @ 93:f1eb39775b93 draft

Uploaded
author tyty
date Mon, 16 Feb 2015 02:29:27 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/predict/predict_RNAs.py	Mon Feb 16 02:29:27 2015 -0500
@@ -0,0 +1,189 @@
+#RNA structure prediction & Output and illustrate reactivities
+
+import sys
+import shlex
+import subprocess
+import tarfile
+from parse_dis_pac import *
+from read_file import *
+from Bio import SeqIO
+import os
+from rtts_plot import *
+import random
+import string
+
+
+id_file = sys.argv[1]
+seq_file = sys.argv[2]
+predict_type = sys.argv[3]
+temperature = sys.argv[4]
+predict_program = sys.argv[5]
+output_file = sys.argv[6]
+
+
+flag = False
+if predict_type!='silico': #input reactivity file if provided
+    if predict_program == 'rs':
+        react_file = sys.argv[7]
+        slope = sys.argv[8]
+        intercept = sys.argv[9]
+    else:
+        react_file = sys.argv[7]
+        thres_h = sys.argv[8]
+        thres_h = float(thres_h)
+        thres_l = sys.argv[9]
+        thres_l = float(thres_l)
+        gqs = sys.argv[10]
+        gqs = int(gqs)
+        
+    react = parse_dist(react_file)
+    react = react[1]
+    flag = True
+else:
+    if predict_program!='rs':
+        gqs = sys.argv[7]
+        gqs = int(gqs)
+
+
+ospath = os.path.realpath(sys.argv[0])
+ost = ospath.split('/')
+syspathpt = ""
+for i in range(len(ost)-1):
+    syspathpt = syspathpt+ost[i].strip()
+    syspathpt = syspathpt+'/'
+
+
+syspath = os.getcwd()
+
+ids = read_t_file(id_file)
+sequences = SeqIO.parse(seq_file, 'fasta')
+
+
+seqs = {}
+for seq in sequences:
+    seqs[seq.id] = seq.seq.tostring()
+
+if len(ids)>100: #setup a limit of the number of sequence to be predicted
+    print("Number of sequences exceeds limitation!")
+    sys.exit(0)
+    
+
+#predict RNA structures
+output_directory = os.path.join(syspath, "output_files")
+if not os.path.exists(output_directory):
+    os.makedirs(output_directory)
+flag3 = 0
+for i in range(len(ids)):
+    flag2 = 0
+    id_s = ids[i][0]
+    #print(id_s)
+    #Put RNA sequence and reactivities into files
+    if id_s in seqs:
+        fh = file(os.path.join(syspath,"temp.txt"), 'w')        
+        fh.write('>'+id_s)
+        fh.write('\n')
+        fh.write(seqs[id_s])
+        fh.close()
+        if not flag:
+            if predict_program == 'rs':
+                command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
+                subprocess.call(command)
+                command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
+                subprocess.call(command)               
+            else:
+                if gqs:
+                    os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
+                    
+                else:
+                    os.system('RNAfold < '+syspath+'/temp.txt -T '+str(float(temperature)-273.15)+' --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
+                command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
+                subprocess.call(command)
+                if not gqs:
+                    command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
+                else:
+                    command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
+                subprocess.call(command)
+                command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
+                subprocess.call(command)
+        else:
+            if id_s in react:
+                fh = file(os.path.join(syspath, "constraint.txt"), 'w')
+                make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
+                if predict_program == 'rs': 
+                    for j in range(0, (len(react[id_s]))):
+                        if react[id_s][j]!='NA':
+                            fh.write(str(j+1))
+                            fh.write('\t')
+                            fh.write(str(react[id_s][j]))
+                            fh.write('\n')
+                    fh.close()
+                    command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), 
+                                                                 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, 
+                                                                 os.path.join(output_directory, "%s.ct" % id_s)))
+                    subprocess.call(command)
+                    command = shlex.split('python %s %s %s %s %s' % (os.path.join(syspathpt, 'ct_to_dot.py'), os.path.join(output_directory, '%s.ct' % id_s), output_directory, id_s, os.path.join(output_directory, '%s.dbn' % id_s)))
+                    subprocess.call(command)
+                else:
+                    fh.write('>'+id_s)
+                    fh.write('\n')
+                    fh.write(seqs[id_s])
+                    fh.write('\n')
+                    for j in range(0, (len(react[id_s]))):
+                        if react[id_s][j]!='NA':
+                            re = float(react[id_s][j])
+                            if re>thres_h:
+                                fh.write('x')
+                            else:
+                                if re<thres_l:
+                                    fh.write('|')
+                                else:
+                                    fh.write('.')
+                        else:
+                            fh.write('.')
+                    fh.write('.')
+                    fh.close()
+                    if gqs:
+                        os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv -g > '+output_directory+'/'+id_s+'.dbnb')
+                        
+                    else:
+                        os.system('RNAfold < '+syspath+'/constraint.txt -T '+str(float(temperature)-273.15)+' -C --noconv --noPS > '+output_directory+'/'+id_s+'.dbnb')
+                    command = shlex.split('python %s %s %s' % (os.path.join(syspathpt, 'dot_convert.py'), os.path.join(output_directory, '%s.dbnb' % id_s), os.path.join(output_directory, '%s.dbn' % id_s)))
+                    subprocess.call(command)
+                    if not gqs:
+                        command = shlex.split('dot2ct %s %s' % (os.path.join(output_directory, '%s.dbn' % id_s), os.path.join(output_directory, '%s.ct' % id_s)))
+                    else:
+                        command = shlex.split('mv -f %s %s' % (os.path.join(syspath, '%s_ss.ps' % id_s), os.path.join(output_directory, '%s.ps' % id_s)))
+                    subprocess.call(command)
+                    command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.dbnb' % id_s)))
+                    subprocess.call(command)                  
+
+            else:
+                print(id_s+" not in the data of react!")
+                flag2 = 1
+        if flag2 == 0:
+            if predict_program == 'rs':
+                command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
+                subprocess.call(command)
+                command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
+                subprocess.call(command)
+            else:
+                if not gqs:
+                    command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
+                    subprocess.call(command)
+                    command = shlex.split('rm %s' % (os.path.join(output_directory, '%s.ct' % id_s)))
+                    subprocess.call(command)
+            flag3 = 1
+    else:
+        print(id_s+" not in the data of sequences!")
+
+#Remove the unnecessary files
+if flag3 == 1:
+    tarball = tarfile.open(output_file, 'w:')
+    for filename in os.listdir(output_directory):
+        filepath = os.path.join(output_directory, filename)
+        print filepath
+        tarball.add(filepath, arcname=filename)
+    #print os.listdir(syspath)
+    #print os.listdir(output_directory)
+    # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
+    tarball.close()