Mercurial > repos > siyuan > prada
comparison pyPRADA_1.2/tools/samtools-0.1.16/misc/HmmGlocal.java @ 0:acc2ca1a3ba4
Uploaded
author | siyuan |
---|---|
date | Thu, 20 Feb 2014 00:44:58 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acc2ca1a3ba4 |
---|---|
1 import java.io.*; | |
2 import java.lang.*; | |
3 | |
4 public class HmmGlocal | |
5 { | |
6 private double[] qual2prob; | |
7 private double cd, ce; // gap open probility [1e-3], gap extension probability [0.1] | |
8 private int cb; // band width [7] | |
9 | |
10 public HmmGlocal(final double d, final double e, final int b) { | |
11 cd = d; ce = e; cb = b; | |
12 qual2prob = new double[256]; | |
13 for (int i = 0; i < 256; ++i) | |
14 qual2prob[i] = Math.pow(10, -i/10.); | |
15 } | |
16 private static int set_u(final int b, final int i, final int k) { | |
17 int x = i - b; | |
18 x = x > 0? x : 0; | |
19 return (k + 1 - x) * 3; | |
20 } | |
21 public int hmm_glocal(final byte[] _ref, final byte[] _query, final byte[] _iqual, int[] state, byte[] q) { | |
22 int i, k; | |
23 /*** initialization ***/ | |
24 // change coordinates | |
25 int l_ref = _ref.length; | |
26 byte[] ref = new byte[l_ref+1]; | |
27 for (i = 0; i < l_ref; ++i) ref[i+1] = _ref[i]; // FIXME: this is silly... | |
28 int l_query = _query.length; | |
29 byte[] query = new byte[l_query+1]; | |
30 double[] qual = new double[l_query+1]; | |
31 for (i = 0; i < l_query; ++i) { | |
32 query[i+1] = _query[i]; | |
33 qual[i+1] = qual2prob[_iqual[i]]; | |
34 } | |
35 // set band width | |
36 int bw2, bw = l_ref > l_query? l_ref : l_query; | |
37 if (bw > cb) bw = cb; | |
38 if (bw < Math.abs(l_ref - l_query)) bw = Math.abs(l_ref - l_query); | |
39 bw2 = bw * 2 + 1; | |
40 // allocate the forward and backward matrices f[][] and b[][] and the scaling array s[] | |
41 double[][] f = new double[l_query+1][bw2*3 + 6]; | |
42 double[][] b = new double[l_query+1][bw2*3 + 6]; | |
43 double[] s = new double[l_query+2]; | |
44 // initialize transition probabilities | |
45 double sM, sI, bM, bI; | |
46 sM = sI = 1. / (2 * l_query + 2); | |
47 bM = (1 - cd) / l_query; bI = cd / l_query; // (bM+bI)*l_query==1 | |
48 double[] m = new double[9]; | |
49 m[0*3+0] = (1 - cd - cd) * (1 - sM); m[0*3+1] = m[0*3+2] = cd * (1 - sM); | |
50 m[1*3+0] = (1 - ce) * (1 - sI); m[1*3+1] = ce * (1 - sI); m[1*3+2] = 0.; | |
51 m[2*3+0] = 1 - ce; m[2*3+1] = 0.; m[2*3+2] = ce; | |
52 /*** forward ***/ | |
53 // f[0] | |
54 f[0][set_u(bw, 0, 0)] = s[0] = 1.; | |
55 { // f[1] | |
56 double[] fi = f[1]; | |
57 double sum; | |
58 int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1, _beg, _end; | |
59 for (k = beg, sum = 0.; k <= end; ++k) { | |
60 int u; | |
61 double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] / 3.; | |
62 u = set_u(bw, 1, k); | |
63 fi[u+0] = e * bM; fi[u+1] = .25 * bI; | |
64 sum += fi[u] + fi[u+1]; | |
65 } | |
66 // rescale | |
67 s[1] = sum; | |
68 _beg = set_u(bw, 1, beg); _end = set_u(bw, 1, end); _end += 2; | |
69 for (k = _beg; k <= _end; ++k) fi[k] /= sum; | |
70 } | |
71 // f[2..l_query] | |
72 for (i = 2; i <= l_query; ++i) { | |
73 double[] fi = f[i], fi1 = f[i-1]; | |
74 double sum, qli = qual[i]; | |
75 int beg = 1, end = l_ref, x, _beg, _end; | |
76 byte qyi = query[i]; | |
77 x = i - bw; beg = beg > x? beg : x; // band start | |
78 x = i + bw; end = end < x? end : x; // band end | |
79 for (k = beg, sum = 0.; k <= end; ++k) { | |
80 int u, v11, v01, v10; | |
81 double e; | |
82 e = (ref[k] > 3 || qyi > 3)? 1. : ref[k] == qyi? 1. - qli : qli / 3.; | |
83 u = set_u(bw, i, k); v11 = set_u(bw, i-1, k-1); v10 = set_u(bw, i-1, k); v01 = set_u(bw, i, k-1); | |
84 fi[u+0] = e * (m[0] * fi1[v11+0] + m[3] * fi1[v11+1] + m[6] * fi1[v11+2]); | |
85 fi[u+1] = .25 * (m[1] * fi1[v10+0] + m[4] * fi1[v10+1]); | |
86 fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2]; | |
87 sum += fi[u] + fi[u+1] + fi[u+2]; | |
88 //System.out.println("("+i+","+k+";"+u+"): "+fi[u]+","+fi[u+1]+","+fi[u+2]); | |
89 } | |
90 // rescale | |
91 s[i] = sum; | |
92 _beg = set_u(bw, i, beg); _end = set_u(bw, i, end); _end += 2; | |
93 for (k = _beg, sum = 1./sum; k <= _end; ++k) fi[k] *= sum; | |
94 } | |
95 { // f[l_query+1] | |
96 double sum; | |
97 for (k = 1, sum = 0.; k <= l_ref; ++k) { | |
98 int u = set_u(bw, l_query, k); | |
99 if (u < 3 || u >= bw2*3+3) continue; | |
100 sum += f[l_query][u+0] * sM + f[l_query][u+1] * sI; | |
101 } | |
102 s[l_query+1] = sum; // the last scaling factor | |
103 } | |
104 /*** backward ***/ | |
105 // b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from) | |
106 for (k = 1; k <= l_ref; ++k) { | |
107 int u = set_u(bw, l_query, k); | |
108 double[] bi = b[l_query]; | |
109 if (u < 3 || u >= bw2*3+3) continue; | |
110 bi[u+0] = sM / s[l_query] / s[l_query+1]; bi[u+1] = sI / s[l_query] / s[l_query+1]; | |
111 } | |
112 // b[l_query-1..1] | |
113 for (i = l_query - 1; i >= 1; --i) { | |
114 int beg = 1, end = l_ref, x, _beg, _end; | |
115 double[] bi = b[i], bi1 = b[i+1]; | |
116 double y = (i > 1)? 1. : 0., qli1 = qual[i+1]; | |
117 byte qyi1 = query[i+1]; | |
118 x = i - bw; beg = beg > x? beg : x; | |
119 x = i + bw; end = end < x? end : x; | |
120 for (k = end; k >= beg; --k) { | |
121 int u, v11, v01, v10; | |
122 double e; | |
123 u = set_u(bw, i, k); v11 = set_u(bw, i+1, k+1); v10 = set_u(bw, i+1, k); v01 = set_u(bw, i, k+1); | |
124 e = (k >= l_ref? 0 : (ref[k+1] > 3 || qyi1 > 3)? 1. : ref[k+1] == qyi1? 1. - qli1 : qli1 / 3.) * bi1[v11]; | |
125 bi[u+0] = e * m[0] + .25 * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e. | |
126 bi[u+1] = e * m[3] + .25 * m[4] * bi1[v10+1]; | |
127 bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y; | |
128 } | |
129 // rescale | |
130 _beg = set_u(bw, i, beg); _end = set_u(bw, i, end); _end += 2; | |
131 for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y; | |
132 } | |
133 double pb; | |
134 { // b[0] | |
135 int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1; | |
136 double sum = 0.; | |
137 for (k = end; k >= beg; --k) { | |
138 int u = set_u(bw, 1, k); | |
139 double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] / 3.; | |
140 if (u < 3 || u >= bw2*3+3) continue; | |
141 sum += e * b[1][u+0] * bM + .25 * b[1][u+1] * bI; | |
142 } | |
143 pb = b[0][set_u(bw, 0, 0)] = sum / s[0]; // if everything works as is expected, pb == 1.0 | |
144 } | |
145 int is_diff = Math.abs(pb - 1.) > 1e-7? 1 : 0; | |
146 /*** MAP ***/ | |
147 for (i = 1; i <= l_query; ++i) { | |
148 double sum = 0., max = 0.; | |
149 double[] fi = f[i], bi = b[i]; | |
150 int beg = 1, end = l_ref, x, max_k = -1; | |
151 x = i - bw; beg = beg > x? beg : x; | |
152 x = i + bw; end = end < x? end : x; | |
153 for (k = beg; k <= end; ++k) { | |
154 int u = set_u(bw, i, k); | |
155 double z; | |
156 sum += (z = fi[u+0] * bi[u+0]); if (z > max) { max = z; max_k = (k-1)<<2 | 0; } | |
157 sum += (z = fi[u+1] * bi[u+1]); if (z > max) { max = z; max_k = (k-1)<<2 | 1; } | |
158 } | |
159 max /= sum; sum *= s[i]; // if everything works as is expected, sum == 1.0 | |
160 if (state != null) state[i-1] = max_k; | |
161 if (q != null) { | |
162 k = (int)(-4.343 * Math.log(1. - max) + .499); | |
163 q[i-1] = (byte)(k > 100? 99 : k); | |
164 } | |
165 //System.out.println("("+pb+","+sum+")"+" ("+(i-1)+","+(max_k>>2)+","+(max_k&3)+","+max+")"); | |
166 } | |
167 return 0; | |
168 } | |
169 | |
170 public static void main(String[] args) { | |
171 byte[] ref = {'\0', '\1', '\3', '\3', '\1'}; | |
172 byte[] query = {'\0', '\3', '\3', '\1'}; | |
173 byte[] qual = new byte[4]; | |
174 qual[0] = qual[1] = qual[2] = qual[3] = (byte)20; | |
175 HmmGlocal hg = new HmmGlocal(1e-3, 0.1, 7); | |
176 hg.hmm_glocal(ref, query, qual, null, null); | |
177 } | |
178 } |