comparison predict/predict_RNAs.py @ 74:63c41304b221 draft

Uploaded
author tyty
date Tue, 09 Dec 2014 03:03:30 -0500
parents
children
comparison
equal deleted inserted replaced
73:1c325ff557d9 74:63c41304b221
1 #RNA structure prediction & Output and illustrate reactivities
2
3 import sys
4 import shlex
5 import subprocess
6 import tarfile
7 from parse_dis_pac import *
8 from read_file import *
9 from Bio import SeqIO
10 import os
11 from rtts_plot import *
12 import random
13 import string
14
15
16 id_file = sys.argv[1]
17 seq_file = sys.argv[2]
18 predict_type = sys.argv[3]
19 temperature = sys.argv[4]
20 output_file = sys.argv[5]
21
22
23 flag = False
24 if predict_type!='silico': #input reactivity file if provided
25 react_file = sys.argv[6]
26 slope = sys.argv[7]
27 intercept = sys.argv[8]
28 react = parse_dist(react_file)
29 react = react[1]
30 flag = True
31
32 syspath = os.getcwd()
33
34 ids = read_t_file(id_file)
35 sequences = SeqIO.parse(seq_file, 'fasta')
36
37
38 seqs = {}
39 for seq in sequences:
40 seqs[seq.id] = seq.seq.tostring()
41
42 if len(ids)>100: #setup a limit of the number of sequence to be predicted
43 print("Number of sequences exceeds limitation!")
44 sys.exit(0)
45
46
47 #predict RNA structures
48 output_directory = os.path.join(syspath, "output_files")
49 if not os.path.exists(output_directory):
50 os.makedirs(output_directory)
51 for i in range(len(ids)):
52 flag2 = 0
53 id_s = ids[i][0]
54 #print(id_s)
55 #Put RNA sequence and reactivities into files
56 if id_s in seqs:
57 fh = file(os.path.join(syspath,"temp.txt"), 'w')
58 fh.write('>'+id_s)
59 fh.write('\n')
60 fh.write(seqs[id_s])
61 fh.close()
62 if not flag:
63 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s)))
64 subprocess.call(command)
65 else:
66 if id_s in react:
67 fh = file(os.path.join(syspath, "constraint.txt"), 'w')
68 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
69 for j in range(0, (len(react[id_s]))):
70 if react[id_s][j]!='NA':
71 fh.write(str(j+1))
72 fh.write('\t')
73 fh.write(str(react[id_s][j]))
74 fh.write('\n')
75 #h.write(str(react[id_s][j])) #Output the reactivities
76 #h.write('\t')
77 fh.close()
78 #h.write('\n')
79 #h.write('\n')
80 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"),
81 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature,
82 os.path.join(output_directory, "%s.ct" % id_s)))
83 subprocess.call(command)
84 else:
85 print(id_s+" not in the data of react!")
86 flag2 = 1
87 if flag2 == 0:
88 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
89 subprocess.call(command)
90 else:
91 print(id_s+" not in the data of sequences!")
92
93 #Remove the unnecessary files
94 tarball = tarfile.open(output_file, 'w:')
95 for filename in os.listdir(output_directory):
96 filepath = os.path.join(output_directory, filename)
97 print filepath
98 tarball.add(filepath, arcname=filename)
99 #print os.listdir(syspath)
100 #print os.listdir(output_directory)
101 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
102 tarball.close()