74
|
1 #RNA structure prediction & Output and illustrate reactivities
|
|
2
|
|
3 import sys
|
|
4 import shlex
|
|
5 import subprocess
|
|
6 import tarfile
|
|
7 from parse_dis_pac import *
|
|
8 from read_file import *
|
|
9 from Bio import SeqIO
|
|
10 import os
|
|
11 from rtts_plot import *
|
|
12 import random
|
|
13 import string
|
|
14
|
|
15
|
|
16 id_file = sys.argv[1]
|
|
17 seq_file = sys.argv[2]
|
|
18 predict_type = sys.argv[3]
|
|
19 temperature = sys.argv[4]
|
|
20 output_file = sys.argv[5]
|
|
21
|
|
22
|
|
23 flag = False
|
|
24 if predict_type!='silico': #input reactivity file if provided
|
|
25 react_file = sys.argv[6]
|
|
26 slope = sys.argv[7]
|
|
27 intercept = sys.argv[8]
|
|
28 react = parse_dist(react_file)
|
|
29 react = react[1]
|
|
30 flag = True
|
|
31
|
|
32 syspath = os.getcwd()
|
|
33
|
|
34 ids = read_t_file(id_file)
|
|
35 sequences = SeqIO.parse(seq_file, 'fasta')
|
|
36
|
|
37
|
|
38 seqs = {}
|
|
39 for seq in sequences:
|
|
40 seqs[seq.id] = seq.seq.tostring()
|
|
41
|
|
42 if len(ids)>100: #setup a limit of the number of sequence to be predicted
|
|
43 print("Number of sequences exceeds limitation!")
|
|
44 sys.exit(0)
|
|
45
|
|
46
|
|
47 #predict RNA structures
|
|
48 output_directory = os.path.join(syspath, "output_files")
|
|
49 if not os.path.exists(output_directory):
|
|
50 os.makedirs(output_directory)
|
|
51 for i in range(len(ids)):
|
|
52 flag2 = 0
|
|
53 id_s = ids[i][0]
|
|
54 #print(id_s)
|
|
55 #Put RNA sequence and reactivities into files
|
|
56 if id_s in seqs:
|
|
57 fh = file(os.path.join(syspath,"temp.txt"), 'w')
|
|
58 fh.write('>'+id_s)
|
|
59 fh.write('\n')
|
|
60 fh.write(seqs[id_s])
|
|
61 fh.close()
|
|
62 if not flag:
|
|
63 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
|
|
64 subprocess.call(command)
|
|
65 else:
|
|
66 if id_s in react:
|
|
67 fh = file(os.path.join(syspath, "constraint.txt"), 'w')
|
|
68 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
|
|
69 for j in range(0, (len(react[id_s]))):
|
|
70 if react[id_s][j]!='NA':
|
|
71 fh.write(str(j+1))
|
|
72 fh.write('\t')
|
|
73 fh.write(str(react[id_s][j]))
|
|
74 fh.write('\n')
|
|
75 #h.write(str(react[id_s][j])) #Output the reactivities
|
|
76 #h.write('\t')
|
|
77 fh.close()
|
|
78 #h.write('\n')
|
|
79 #h.write('\n')
|
|
80 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"),
|
|
81 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature,
|
|
82 os.path.join(output_directory, "%s.ct" % id_s)))
|
|
83 subprocess.call(command)
|
|
84 else:
|
|
85 print(id_s+" not in the data of react!")
|
|
86 flag2 = 1
|
|
87 if flag2 == 0:
|
|
88 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
|
|
89 subprocess.call(command)
|
|
90 else:
|
|
91 print(id_s+" not in the data of sequences!")
|
|
92
|
|
93 #Remove the unnecessary files
|
|
94 tarball = tarfile.open(output_file, 'w:')
|
|
95 for filename in os.listdir(output_directory):
|
|
96 filepath = os.path.join(output_directory, filename)
|
|
97 print filepath
|
|
98 tarball.add(filepath, arcname=filename)
|
|
99 #print os.listdir(syspath)
|
|
100 #print os.listdir(output_directory)
|
|
101 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
|
|
102 tarball.close()
|