annotate predict/predict_RNAs.py @ 69:bd02fcf26f7a draft

Uploaded wrapper that correctly handles structure prediction without constraints.
author devteam
date Mon, 24 Nov 2014 18:21:30 -0500
parents fb80870002a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
1 #RNA structure prediction & Output and illustrate reactivities
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
2
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
3 import sys
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
4 import shlex
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
5 import subprocess
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
6 import tarfile
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
7 from parse_dis_pac import *
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
8 from read_file import *
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
9 from Bio import SeqIO
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
10 import os
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
11 from rtts_plot import *
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
12 import random
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
13 import string
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
14
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
15
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
16 id_file = sys.argv[1]
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
17 seq_file = sys.argv[2]
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
18 output_file = sys.argv[4]
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
19
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
20
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
21 flag = False
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
22 if sys.argv[3]!='None': #input reactivity file if provided
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
23 react_file = sys.argv[3]
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
24 react = parse_dist(react_file)
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
25 react = react[1]
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
26 flag = True
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
27
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
28 syspath = os.getcwd()
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
29
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
30 ids = read_t_file(id_file)
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
31 sequences = SeqIO.parse(seq_file, 'fasta')
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
32
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
33
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
34 seqs = {}
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
35 for seq in sequences:
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
36 seqs[seq.id] = seq.seq.tostring()
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
37
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
38 if len(ids)>100: #setup a limit of the number of sequence to be predicted
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
39 print("Number of sequences exceeds limitation!")
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
40 sys.exit(0)
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
41
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
42
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
43 #predict RNA structures
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
44 output_directory = os.path.join(syspath, "output_files")
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
45 if not os.path.exists(output_directory):
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
46 os.makedirs(output_directory)
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
47 for i in range(len(ids)):
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
48 id_s = ids[i][0]
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
49 print(id_s)
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
50 #Put RNA sequence and reactivities into files
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
51 if id_s in seqs:
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
52 fh = file(os.path.join(syspath,"temp.txt"), 'w')
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
53 fh.write('>'+id_s)
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
54 fh.write('\n')
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
55 fh.write(seqs[id_s])
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
56 fh.close()
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
57 if not flag:
69
bd02fcf26f7a Uploaded wrapper that correctly handles structure prediction without constraints.
devteam
parents: 68
diff changeset
58 command = shlex.split('Fold %s %s' % (os.path.join(syspath, 'temp.txt'), os.path.join(output_directory, '%s.ct' % id_s)))
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
59 subprocess.call(command)
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
60 else:
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
61 if id_s in react:
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
62 fh = file(os.path.join(syspath, "constraint.txt"), 'w')
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
63 make_plot(react[id_s], id_s, output_directory) #make a plot of the distribution of the reactivites of the input RNA
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
64 for j in range(0, (len(react[id_s]))):
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
65 if react[id_s][j]!='NA':
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
66 fh.write(str(j+1))
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
67 fh.write('\t')
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
68 fh.write(str(react[id_s][j]))
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
69 fh.write('\n')
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
70 #h.write(str(react[id_s][j])) #Output the reactivities
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
71 #h.write('\t')
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
72 fh.close()
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
73 #h.write('\n')
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
74 #h.write('\n')
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
75 command = shlex.split("Fold %s -sh %s %s" % (os.path.join(syspath, "temp.txt"),
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
76 os.path.join(syspath, "constraint.txt"),
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
77 os.path.join(output_directory, "%s.ct" % id_s)))
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
78 subprocess.call(command)
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
79 else:
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
80 print(id_s+" not in the data of react!")
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
81 command = shlex.split('draw %s.ct %s.ps' % (os.path.join(output_directory, id_s), os.path.join(output_directory, id_s)))
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
82 subprocess.call(command)
64
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
83 else:
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
84 print(id_s+" not in the data of sequences!")
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
85
a1ce42d5258d Uploaded
tyty
parents:
diff changeset
86 #Remove the unnecessary files
68
fb80870002a3 Uploaded
devteam
parents: 67
diff changeset
87 tarball = tarfile.open(output_file, 'w:')
67
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
88 for filename in os.listdir(output_directory):
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
89 filepath = os.path.join(output_directory, filename)
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
90 print filepath
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
91 tarball.add(filepath, arcname=filename)
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
92 print os.listdir(syspath)
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
93 print os.listdir(output_directory)
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
94 # tarball.add('%s.tif' % os.path.join(syspath, id_s), arcname='%s.tif' % id_s)
96a827962750 Uploaded updated scripts, removed *.pyc and .DS_Store
devteam
parents: 64
diff changeset
95 tarball.close()