comparison reactivity_cal/react_cal.py @ 10:13cf6f40ad1e draft

Uploaded
author tyty
date Mon, 20 Oct 2014 01:43:30 -0400
parents
children
comparison
equal deleted inserted replaced
9:b0243634fe6b 10:13cf6f40ad1e
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 import sys
4 from Bio import SeqIO
5 import math
6 from parse_dis_react import *
7 from react_norm_function import *
8 import os
9
10
11 dist_file1 = sys.argv[1] #plus library
12 dist_file2 = sys.argv[2] #minus library
13 seq_file = sys.argv[3] #Reference library(genome/cDNA)
14 nt_spec = sys.argv[4] #only show reactivity for AC or ATCG
15 flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0)
16 threshold = sys.argv[6] #Threshold to cap the reactivities
17 output_file = sys.argv[7]
18
19
20 distri_p = parse_dist(dist_file1)
21 distri_m = parse_dist(dist_file2)
22 threshold = float(threshold)
23
24
25 ospath = os.path.realpath(sys.argv[0])
26 ost = ospath.split('/')
27 syspath = ""
28 for i in range(len(ost)-1):
29 syspath = syspath+ost[i].strip()
30 syspath = syspath+'/'
31
32 h = file(syspath+"react.txt",'w')
33 flag_in = int(flag_in)
34
35 seqs = SeqIO.parse(open(seq_file),'fasta');
36 nt_s = set()
37 for i in range(len(nt_spec)):
38 nt_s.add(nt_spec[i])
39
40 flag = 0
41 trans = []
42 distri_p = distri_p[1]
43 distri_m = distri_m[1]
44
45 #thres = int(threshold)
46
47
48 transcripts = {}
49 for seq in seqs:
50 n = seq.id
51 trans.append(n)
52 transcripts[n] = seq.seq.tostring()
53
54
55 #print(distri_p)
56
57
58 for i in range(0, len(trans)):
59 h.write(trans[i])
60 h.write('\n')
61 for j in range(len(distri_p[trans[i]])):
62 distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
63 for j in range(len(distri_m[trans[i]])):
64 distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)
65 s_p = sum(distri_p[trans[i]])
66 s_m = sum(distri_m[trans[i]])
67 length = len(distri_p[trans[i]])
68 if s_p!= 0 and s_m!= 0:
69 r = []
70 for j in range(0, len(distri_p[trans[i]])):
71 f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
72 f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
73 raw_react = f_p-f_m
74 r.append(max(0, raw_react))
75
76 if s_p!= 0 and s_m!= 0:
77 for k in range(1,(len(r)-1)):
78 if transcripts[trans[i]][k-1] in nt_s:
79 h.write(str(r[k]))
80 h.write('\t')
81 else:
82 h.write('NA')
83 h.write('\t')
84 k = k+1
85 if transcripts[trans[i]][k-1] in nt_s:
86 h.write(str(r[k]))
87 h.write('\n')
88 else:
89 h.write('NA')
90 h.write('\n')
91
92
93 h.close()
94
95 if flag_in:
96 react_norm((syspath+"react.txt"),output_file, threshold)
97 else:
98 h_o = file(output_file, 'w')
99 f_i = open(syspath+"react.txt")
100 for aline in f_i.readlines():
101 h_o.write(aline.strip())
102 h_o.write('\n')
103 os.system("rm -f "+syspath+"react.txt")
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136