annotate reactivity_cal/react_cal.py @ 43:9fbbe00a2de2 draft

Uploaded
author tyty
date Mon, 20 Oct 2014 16:03:47 -0400
parents 13cf6f40ad1e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
1 #!/usr/bin/env python
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
2 # -*- coding: utf-8 -*-
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
3 import sys
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
4 from Bio import SeqIO
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
5 import math
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
6 from parse_dis_react import *
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
7 from react_norm_function import *
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
8 import os
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
9
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
10
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
11 dist_file1 = sys.argv[1] #plus library
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
12 dist_file2 = sys.argv[2] #minus library
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
13 seq_file = sys.argv[3] #Reference library(genome/cDNA)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
14 nt_spec = sys.argv[4] #only show reactivity for AC or ATCG
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
15 flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
16 threshold = sys.argv[6] #Threshold to cap the reactivities
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
17 output_file = sys.argv[7]
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
18
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
19
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
20 distri_p = parse_dist(dist_file1)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
21 distri_m = parse_dist(dist_file2)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
22 threshold = float(threshold)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
23
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
24
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
25 ospath = os.path.realpath(sys.argv[0])
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
26 ost = ospath.split('/')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
27 syspath = ""
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
28 for i in range(len(ost)-1):
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
29 syspath = syspath+ost[i].strip()
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
30 syspath = syspath+'/'
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
31
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
32 h = file(syspath+"react.txt",'w')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
33 flag_in = int(flag_in)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
34
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
35 seqs = SeqIO.parse(open(seq_file),'fasta');
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
36 nt_s = set()
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
37 for i in range(len(nt_spec)):
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
38 nt_s.add(nt_spec[i])
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
39
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
40 flag = 0
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
41 trans = []
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
42 distri_p = distri_p[1]
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
43 distri_m = distri_m[1]
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
44
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
45 #thres = int(threshold)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
46
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
47
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
48 transcripts = {}
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
49 for seq in seqs:
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
50 n = seq.id
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
51 trans.append(n)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
52 transcripts[n] = seq.seq.tostring()
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
53
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
54
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
55 #print(distri_p)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
56
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
57
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
58 for i in range(0, len(trans)):
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
59 h.write(trans[i])
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
60 h.write('\n')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
61 for j in range(len(distri_p[trans[i]])):
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
62 distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
63 for j in range(len(distri_m[trans[i]])):
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
64 distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
65 s_p = sum(distri_p[trans[i]])
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
66 s_m = sum(distri_m[trans[i]])
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
67 length = len(distri_p[trans[i]])
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
68 if s_p!= 0 and s_m!= 0:
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
69 r = []
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
70 for j in range(0, len(distri_p[trans[i]])):
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
71 f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
72 f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
73 raw_react = f_p-f_m
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
74 r.append(max(0, raw_react))
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
75
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
76 if s_p!= 0 and s_m!= 0:
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
77 for k in range(1,(len(r)-1)):
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
78 if transcripts[trans[i]][k-1] in nt_s:
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
79 h.write(str(r[k]))
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
80 h.write('\t')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
81 else:
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
82 h.write('NA')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
83 h.write('\t')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
84 k = k+1
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
85 if transcripts[trans[i]][k-1] in nt_s:
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
86 h.write(str(r[k]))
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
87 h.write('\n')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
88 else:
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
89 h.write('NA')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
90 h.write('\n')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
91
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
92
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
93 h.close()
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
94
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
95 if flag_in:
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
96 react_norm((syspath+"react.txt"),output_file, threshold)
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
97 else:
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
98 h_o = file(output_file, 'w')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
99 f_i = open(syspath+"react.txt")
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
100 for aline in f_i.readlines():
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
101 h_o.write(aline.strip())
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
102 h_o.write('\n')
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
103 os.system("rm -f "+syspath+"react.txt")
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
104
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
105
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
106
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
107
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
108
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
109
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
110
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
111
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
112
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
113
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
114
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
115
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
116
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
117
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
118
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
119
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
120
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
121
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
122
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
123
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
124
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
125
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
126
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
127
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
128
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
129
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
130
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
131
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
132
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
133
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
134
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
135
13cf6f40ad1e Uploaded
tyty
parents:
diff changeset
136