annotate reactivity_cal/react_cal.py @ 112:87ec0ecdc2af draft

Uploaded
author tyty
date Sun, 12 Apr 2015 14:28:30 -0400
parents 8bcc5cbbdf91
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
100
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
1 #!/usr/bin/env python
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
2 # -*- coding: utf-8 -*-
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
3 import sys
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
4 from Bio import SeqIO
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
5 import math
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
6 from parse_dis_react import *
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
7 from react_norm_function import *
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
8 import os
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
9 import random
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
10 import string
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
11
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
12
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
13 dist_file1 = sys.argv[1] #plus library
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
14 dist_file2 = sys.argv[2] #minus library
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
15 seq_file = sys.argv[3] #Reference library(genome/cDNA)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
16 nt_spec = sys.argv[4] #only show reactivity for AC or ATCG
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
17 flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
18 threshold = sys.argv[6] #Threshold to cap the reactivities
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
19 output_file = sys.argv[7]
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
20
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
21
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
22 distri_p = parse_dist(dist_file1)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
23 distri_m = parse_dist(dist_file2)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
24 threshold = float(threshold)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
25
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
26
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
27 syspathrs = os.getcwd()
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
28
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
29 h = file(syspathrs+"react.txt",'w')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
30 flag_in = int(flag_in)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
31
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
32 seqs = SeqIO.parse(open(seq_file),'fasta');
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
33 nt_s = set()
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
34 for i in range(len(nt_spec)):
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
35 nt_s.add(nt_spec[i])
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
36
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
37 flag = 0
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
38 trans = []
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
39 distri_p = distri_p[1]
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
40 distri_m = distri_m[1]
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
41
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
42 #thres = int(threshold)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
43
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
44
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
45 transcripts = {}
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
46 for seq in seqs:
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
47 n = seq.id
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
48 trans.append(n)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
49 transcripts[n] = seq.seq.tostring()
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
50
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
51
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
52 #print(distri_p)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
53
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
54
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
55 for i in range(0, len(trans)):
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
56 h.write(trans[i])
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
57 h.write('\n')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
58 for j in range(len(distri_p[trans[i]])):
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
59 distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
60 for j in range(len(distri_m[trans[i]])):
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
61 distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
62 s_p = sum(distri_p[trans[i]])
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
63 s_m = sum(distri_m[trans[i]])
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
64 length = len(distri_p[trans[i]])
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
65 if s_p!= 0 and s_m!= 0:
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
66 r = []
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
67 for j in range(0, len(distri_p[trans[i]])):
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
68 f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
69 f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
70 raw_react = f_p-f_m
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
71 r.append(max(0, raw_react))
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
72
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
73 if s_p!= 0 and s_m!= 0:
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
74 for k in range(1,(len(r)-1)):
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
75 if transcripts[trans[i]][k-1] in nt_s:
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
76 h.write(str(float('%.3f'%r[k])))
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
77 h.write('\t')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
78 else:
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
79 h.write('NA')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
80 h.write('\t')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
81 k = k+1
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
82 if transcripts[trans[i]][k-1] in nt_s:
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
83 h.write(str(float('%.3f'%r[k])))
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
84 h.write('\n')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
85 else:
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
86 h.write('NA')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
87 h.write('\n')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
88
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
89
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
90 h.close()
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
91
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
92 if flag_in:
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
93 react_norm((syspathrs+"react.txt"),output_file, threshold)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
94 else:
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
95 h_o = file(output_file, 'w')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
96 f_i = open(syspathrs+"react.txt")
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
97 for aline in f_i.readlines():
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
98 h_o.write(aline.strip())
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
99 h_o.write('\n')
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
100 os.system("rm -f "+syspathrs+"react.txt")
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
101
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
102 #os.system("rm -r "+syspathrs)
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
103
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
104
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
105
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
106
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
107
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
108
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
109
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
110
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
111
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
112
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
113
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
114
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
115
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
116
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
117
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
118
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
119
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
120
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
121
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
122
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
123
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
124
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
125
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
126
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
127
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
128
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
129
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
130
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
131
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
132
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
133
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
134
8bcc5cbbdf91 Uploaded
tyty
parents:
diff changeset
135