Mercurial > repos > tyty > structurefold
comparison predict/predict_RNAs.py @ 74:63c41304b221 draft
Uploaded
author | tyty |
---|---|
date | Tue, 09 Dec 2014 03:03:30 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
73:1c325ff557d9 | 74:63c41304b221 |
---|---|
1 #RNA structure prediction & Output and illustrate reactivities | |
2 | |
3 import sys | |
4 import shlex | |
5 import subprocess | |
6 import tarfile | |
7 from parse_dis_pac import * | |
8 from read_file import * | |
9 from Bio import SeqIO | |
10 import os | |
11 from rtts_plot import * | |
12 import random | |
13 import string | |
14 | |
15 | |
16 id_file = sys.argv[1] | |
17 seq_file = sys.argv[2] | |
18 predict_type = sys.argv[3] | |
19 temperature = sys.argv[4] | |
20 output_file = sys.argv[5] | |
21 | |
22 | |
23 flag = False | |
24 if predict_type!='silico': #input reactivity file if provided | |
25 react_file = sys.argv[6] | |
26 slope = sys.argv[7] | |
27 intercept = sys.argv[8] | |
28 react = parse_dist(react_file) | |
29 react = react[1] | |
30 flag = True | |
31 | |
32 syspath = os.getcwd() | |
33 | |
34 ids = read_t_file(id_file) | |
35 sequences = SeqIO.parse(seq_file, 'fasta') | |
36 | |
37 | |
38 seqs = {} | |
39 for seq in sequences: | |
40 seqs[seq.id] = seq.seq.tostring() | |
41 | |
42 if len(ids)>100: #setup a limit of the number of sequence to be predicted | |
43 print("Number of sequences exceeds limitation!") | |
44 sys.exit(0) | |
45 | |
46 | |
47 #predict RNA structures | |
48 output_directory = os.path.join(syspath, "output_files") | |
49 if not os.path.exists(output_directory): | |
50 os.makedirs(output_directory) | |
51 for i in range(len(ids)): | |
52 flag2 = 0 | |
53 id_s = ids[i][0] | |
54 #print(id_s) | |
55 #Put RNA sequence and reactivities into files | |
56 if id_s in seqs: | |
57 fh = file(os.path.join(syspath,"temp.txt"), 'w') | |
58 fh.write('>'+id_s) | |
59 fh.write('\n') | |
60 fh.write(seqs[id_s]) | |
61 fh.close() | |
62 if not flag: | |
63 command = shlex.split('Fold %s -T %s %s' % (os.path.join(syspath, 'temp.txt'), temperature, os.path.join(output_directory, '%s.ct' % id_s))) | |
64 subprocess.call(command) | |
65 else: | |
66 if id_s in react: | |
67 fh = file(os.path.join(syspath, "constraint.txt"), 'w') | |
68 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA | |
69 for j in range(0, (len(react[id_s]))): | |
70 if react[id_s][j]!='NA': | |
71 fh.write(str(j+1)) | |
72 fh.write('\t') | |
73 fh.write(str(react[id_s][j])) | |
74 fh.write('\n') | |
75 #h.write(str(react[id_s][j])) #Output the reactivities | |
76 #h.write('\t') | |
77 fh.close() | |
78 #h.write('\n') | |
79 #h.write('\n') | |
80 command = shlex.split("Fold %s -sh %s -si %s -sm %s -T %s %s" % (os.path.join(syspath, "temp.txt"), | |
81 os.path.join(syspath, "constraint.txt"), intercept, slope, temperature, | |
82 os.path.join(output_directory, "%s.ct" % id_s))) | |
83 subprocess.call(command) | |
84 else: | |
85 print(id_s+" not in the data of react!") | |
86 flag2 = 1 | |
87 if flag2 == 0: | |
88 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s))) | |
89 subprocess.call(command) | |
90 else: | |
91 print(id_s+" not in the data of sequences!") | |
92 | |
93 #Remove the unnecessary files | |
94 tarball = tarfile.open(output_file, 'w:') | |
95 for filename in os.listdir(output_directory): | |
96 filepath = os.path.join(output_directory, filename) | |
97 print filepath | |
98 tarball.add(filepath, arcname=filename) | |
99 #print os.listdir(syspath) | |
100 #print os.listdir(output_directory) | |
101 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s) | |
102 tarball.close() |