annotate predict/predict_RNAs.py @ 86:e581e89e3318 draft

Uploaded
author tyty
date Fri, 19 Dec 2014 13:22:36 -0500
parents 63c41304b221
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
74
63c41304b221 Uploaded
tyty
parents:
diff changeset
1 #RNA structure prediction & Output and illustrate reactivities
63c41304b221 Uploaded
tyty
parents:
diff changeset
2
63c41304b221 Uploaded
tyty
parents:
diff changeset
3 import sys
63c41304b221 Uploaded
tyty
parents:
diff changeset
4 import shlex
63c41304b221 Uploaded
tyty
parents:
diff changeset
5 import subprocess
63c41304b221 Uploaded
tyty
parents:
diff changeset
6 import tarfile
63c41304b221 Uploaded
tyty
parents:
diff changeset
7 from parse_dis_pac import *
63c41304b221 Uploaded
tyty
parents:
diff changeset
8 from read_file import *
63c41304b221 Uploaded
tyty
parents:
diff changeset
9 from Bio import SeqIO
63c41304b221 Uploaded
tyty
parents:
diff changeset
10 import os
63c41304b221 Uploaded
tyty
parents:
diff changeset
11 from rtts_plot import *
63c41304b221 Uploaded
tyty
parents:
diff changeset
12 import random
63c41304b221 Uploaded
tyty
parents:
diff changeset
13 import string
63c41304b221 Uploaded
tyty
parents:
diff changeset
14
63c41304b221 Uploaded
tyty
parents:
diff changeset
15
63c41304b221 Uploaded
tyty
parents:
diff changeset
16 id_file = sys.argv[1]
63c41304b221 Uploaded
tyty
parents:
diff changeset
17 seq_file = sys.argv[2]
63c41304b221 Uploaded
tyty
parents:
diff changeset
18 predict_type = sys.argv[3]
63c41304b221 Uploaded
tyty
parents:
diff changeset
19 temperature = sys.argv[4]
63c41304b221 Uploaded
tyty
parents:
diff changeset
20 output_file = sys.argv[5]
63c41304b221 Uploaded
tyty
parents:
diff changeset
21
63c41304b221 Uploaded
tyty
parents:
diff changeset
22
63c41304b221 Uploaded
tyty
parents:
diff changeset
23 flag = False
63c41304b221 Uploaded
tyty
parents:
diff changeset
24 if predict_type!='silico': #input reactivity file if provided
63c41304b221 Uploaded
tyty
parents:
diff changeset
25 react_file = sys.argv[6]
63c41304b221 Uploaded
tyty
parents:
diff changeset
26 slope = sys.argv[7]
63c41304b221 Uploaded
tyty
parents:
diff changeset
27 intercept = sys.argv[8]
63c41304b221 Uploaded
tyty
parents:
diff changeset
28 react = parse_dist(react_file)
63c41304b221 Uploaded
tyty
parents:
diff changeset
29 react = react[1]
63c41304b221 Uploaded
tyty
parents:
diff changeset
30 flag = True
63c41304b221 Uploaded
tyty
parents:
diff changeset
31
63c41304b221 Uploaded
tyty
parents:
diff changeset
32 syspath = os.getcwd()
63c41304b221 Uploaded
tyty
parents:
diff changeset
33
63c41304b221 Uploaded
tyty
parents:
diff changeset
34 ids = read_t_file(id_file)
63c41304b221 Uploaded
tyty
parents:
diff changeset
35 sequences = SeqIO.parse(seq_file, 'fasta')
63c41304b221 Uploaded
tyty
parents:
diff changeset
36
63c41304b221 Uploaded
tyty
parents:
diff changeset
37
63c41304b221 Uploaded
tyty
parents:
diff changeset
38 seqs = {}
63c41304b221 Uploaded
tyty
parents:
diff changeset
39 for seq in sequences:
63c41304b221 Uploaded
tyty
parents:
diff changeset
40 seqs[seq.id] = seq.seq.tostring()
63c41304b221 Uploaded
tyty
parents:
diff changeset
41
63c41304b221 Uploaded
tyty
parents:
diff changeset
42 if len(ids)>100: #setup a limit of the number of sequence to be predicted
63c41304b221 Uploaded
tyty
parents:
diff changeset
43 print("Number of sequences exceeds limitation!")
63c41304b221 Uploaded
tyty
parents:
diff changeset
44 sys.exit(0)
63c41304b221 Uploaded
tyty
parents:
diff changeset
45
63c41304b221 Uploaded
tyty
parents:
diff changeset
46
63c41304b221 Uploaded
tyty
parents:
diff changeset
47 #predict RNA structures
63c41304b221 Uploaded
tyty
parents:
diff changeset
48 output_directory = os.path.join(syspath, "output_files")
63c41304b221 Uploaded
tyty
parents:
diff changeset
49 if not os.path.exists(output_directory):
63c41304b221 Uploaded
tyty
parents:
diff changeset
50 os.makedirs(output_directory)
63c41304b221 Uploaded
tyty
parents:
diff changeset
51 for i in range(len(ids)):
63c41304b221 Uploaded
tyty
parents:
diff changeset
52 flag2 = 0
63c41304b221 Uploaded
tyty
parents:
diff changeset
53 id_s = ids[i][0]
63c41304b221 Uploaded
tyty
parents:
diff changeset
54 #print(id_s)
63c41304b221 Uploaded
tyty
parents:
diff changeset
55 #Put RNA sequence and reactivities into files
63c41304b221 Uploaded
tyty
parents:
diff changeset
56 if id_s in seqs:
63c41304b221 Uploaded
tyty
parents:
diff changeset
57 fh = file(os.path.join(syspath,"temp.txt"), 'w')
63c41304b221 Uploaded
tyty
parents:
diff changeset
58 fh.write('>'+id_s)
63c41304b221 Uploaded
tyty
parents:
diff changeset
59 fh.write('\n')
63c41304b221 Uploaded
tyty
parents:
diff changeset
60 fh.write(seqs[id_s])
63c41304b221 Uploaded
tyty
parents:
diff changeset
61 fh.close()
63c41304b221 Uploaded
tyty
parents:
diff changeset
62 if not flag:
63c41304b221 Uploaded
tyty
parents:
diff changeset
63 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
63c41304b221 Uploaded
tyty
parents:
diff changeset
64 subprocess.call(command)
63c41304b221 Uploaded
tyty
parents:
diff changeset
65 else:
63c41304b221 Uploaded
tyty
parents:
diff changeset
66 if id_s in react:
63c41304b221 Uploaded
tyty
parents:
diff changeset
67 fh = file(os.path.join(syspath, "constraint.txt"), 'w')
63c41304b221 Uploaded
tyty
parents:
diff changeset
68 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
63c41304b221 Uploaded
tyty
parents:
diff changeset
69 for j in range(0, (len(react[id_s]))):
63c41304b221 Uploaded
tyty
parents:
diff changeset
70 if react[id_s][j]!='NA':
63c41304b221 Uploaded
tyty
parents:
diff changeset
71 fh.write(str(j+1))
63c41304b221 Uploaded
tyty
parents:
diff changeset
72 fh.write('\t')
63c41304b221 Uploaded
tyty
parents:
diff changeset
73 fh.write(str(react[id_s][j]))
63c41304b221 Uploaded
tyty
parents:
diff changeset
74 fh.write('\n')
63c41304b221 Uploaded
tyty
parents:
diff changeset
75 #h.write(str(react[id_s][j])) #Output the reactivities
63c41304b221 Uploaded
tyty
parents:
diff changeset
76 #h.write('\t')
63c41304b221 Uploaded
tyty
parents:
diff changeset
77 fh.close()
63c41304b221 Uploaded
tyty
parents:
diff changeset
78 #h.write('\n')
63c41304b221 Uploaded
tyty
parents:
diff changeset
79 #h.write('\n')
63c41304b221 Uploaded
tyty
parents:
diff changeset
80 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"),
63c41304b221 Uploaded
tyty
parents:
diff changeset
81 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature,
63c41304b221 Uploaded
tyty
parents:
diff changeset
82 os.path.join(output_directory, "%s.ct" % id_s)))
63c41304b221 Uploaded
tyty
parents:
diff changeset
83 subprocess.call(command)
63c41304b221 Uploaded
tyty
parents:
diff changeset
84 else:
63c41304b221 Uploaded
tyty
parents:
diff changeset
85 print(id_s+" not in the data of react!")
63c41304b221 Uploaded
tyty
parents:
diff changeset
86 flag2 = 1
63c41304b221 Uploaded
tyty
parents:
diff changeset
87 if flag2 == 0:
63c41304b221 Uploaded
tyty
parents:
diff changeset
88 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
63c41304b221 Uploaded
tyty
parents:
diff changeset
89 subprocess.call(command)
63c41304b221 Uploaded
tyty
parents:
diff changeset
90 else:
63c41304b221 Uploaded
tyty
parents:
diff changeset
91 print(id_s+" not in the data of sequences!")
63c41304b221 Uploaded
tyty
parents:
diff changeset
92
63c41304b221 Uploaded
tyty
parents:
diff changeset
93 #Remove the unnecessary files
63c41304b221 Uploaded
tyty
parents:
diff changeset
94 tarball = tarfile.open(output_file, 'w:')
63c41304b221 Uploaded
tyty
parents:
diff changeset
95 for filename in os.listdir(output_directory):
63c41304b221 Uploaded
tyty
parents:
diff changeset
96 filepath = os.path.join(output_directory, filename)
63c41304b221 Uploaded
tyty
parents:
diff changeset
97 print filepath
63c41304b221 Uploaded
tyty
parents:
diff changeset
98 tarball.add(filepath, arcname=filename)
63c41304b221 Uploaded
tyty
parents:
diff changeset
99 #print os.listdir(syspath)
63c41304b221 Uploaded
tyty
parents:
diff changeset
100 #print os.listdir(output_directory)
63c41304b221 Uploaded
tyty
parents:
diff changeset
101 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
63c41304b221 Uploaded
tyty
parents:
diff changeset
102 tarball.close()