Mercurial > repos > ashvark > qiime_1_8_0
comparison bwa-0.6.2/bwtsw2_core.c @ 2:a294fbfcb1db draft default tip
Uploaded BWA
author | ashvark |
---|---|
date | Fri, 18 Jul 2014 07:55:59 -0400 |
parents | dd1186b11b3b |
children |
comparison
equal
deleted
inserted
replaced
1:a9636dc1e99a | 2:a294fbfcb1db |
---|---|
1 #include <stdlib.h> | |
2 #include <string.h> | |
3 #include <stdio.h> | |
4 #include <sys/resource.h> | |
5 #include <assert.h> | |
6 #include "bwt_lite.h" | |
7 #include "bwtsw2.h" | |
8 #include "bwt.h" | |
9 #include "kvec.h" | |
10 | |
11 typedef struct { | |
12 bwtint_t k, l; | |
13 } qintv_t; | |
14 | |
15 #define qintv_eq(a, b) ((a).k == (b).k && (a).l == (b).l) | |
16 #define qintv_hash(a) ((a).k>>7^(a).l<<17) | |
17 | |
18 #include "khash.h" | |
19 KHASH_INIT(qintv, qintv_t, uint64_t, 1, qintv_hash, qintv_eq) | |
20 KHASH_MAP_INIT_INT64(64, uint64_t) | |
21 | |
22 #define MINUS_INF -0x3fffffff | |
23 #define MASK_LEVEL 0.90f | |
24 | |
25 struct __mempool_t; | |
26 static void mp_destroy(struct __mempool_t*); | |
27 typedef struct { | |
28 bwtint_t qk, ql; | |
29 int I, D, G; | |
30 uint32_t pj:2, qlen:30; | |
31 int tlen; | |
32 int ppos, upos; | |
33 int cpos[4]; | |
34 } bsw2cell_t; | |
35 | |
36 #include "ksort.h" | |
37 KSORT_INIT_GENERIC(int) | |
38 #define __hitG_lt(a, b) (((a).G + ((int)(a).n_seeds<<2)) > (b).G + ((int)(b).n_seeds<<2)) | |
39 KSORT_INIT(hitG, bsw2hit_t, __hitG_lt) | |
40 | |
41 static const bsw2cell_t g_default_cell = { 0, 0, MINUS_INF, MINUS_INF, MINUS_INF, 0, 0, 0, -1, -1, {-1, -1, -1, -1} }; | |
42 | |
43 typedef struct { | |
44 int n, max; | |
45 uint32_t tk, tl; // this is fine | |
46 bsw2cell_t *array; | |
47 } bsw2entry_t, *bsw2entry_p; | |
48 | |
49 /* --- BEGIN: Stack operations --- */ | |
50 typedef struct { | |
51 int n_pending; | |
52 kvec_t(bsw2entry_p) stack0, pending; | |
53 struct __mempool_t *pool; | |
54 } bsw2stack_t; | |
55 | |
56 #define stack_isempty(s) (kv_size(s->stack0) == 0 && s->n_pending == 0) | |
57 static void stack_destroy(bsw2stack_t *s) { mp_destroy(s->pool); kv_destroy(s->stack0); kv_destroy(s->pending); free(s); } | |
58 inline static void stack_push0(bsw2stack_t *s, bsw2entry_p e) { kv_push(bsw2entry_p, s->stack0, e); } | |
59 inline static bsw2entry_p stack_pop(bsw2stack_t *s) | |
60 { | |
61 assert(!(kv_size(s->stack0) == 0 && s->n_pending != 0)); | |
62 return kv_pop(s->stack0); | |
63 } | |
64 /* --- END: Stack operations --- */ | |
65 | |
66 /* --- BEGIN: memory pool --- */ | |
67 typedef struct __mempool_t { | |
68 int cnt; // if cnt!=0, then there must be memory leak | |
69 kvec_t(bsw2entry_p) pool; | |
70 } mempool_t; | |
71 inline static bsw2entry_p mp_alloc(mempool_t *mp) | |
72 { | |
73 ++mp->cnt; | |
74 if (kv_size(mp->pool) == 0) return (bsw2entry_t*)calloc(1, sizeof(bsw2entry_t)); | |
75 else return kv_pop(mp->pool); | |
76 } | |
77 inline static void mp_free(mempool_t *mp, bsw2entry_p e) | |
78 { | |
79 --mp->cnt; e->n = 0; | |
80 kv_push(bsw2entry_p, mp->pool, e); | |
81 } | |
82 static void mp_destroy(struct __mempool_t *mp) | |
83 { | |
84 int i; | |
85 for (i = 0; i != kv_size(mp->pool); ++i) { | |
86 free(kv_A(mp->pool, i)->array); | |
87 free(kv_A(mp->pool, i)); | |
88 } | |
89 kv_destroy(mp->pool); | |
90 free(mp); | |
91 } | |
92 /* --- END: memory pool --- */ | |
93 | |
94 /* --- BEGIN: utilities --- */ | |
95 static khash_t(64) *bsw2_connectivity(const bwtl_t *b) | |
96 { | |
97 khash_t(64) *h; | |
98 uint32_t k, l, cntk[4], cntl[4]; // this is fine | |
99 uint64_t x; | |
100 khiter_t iter; | |
101 int j, ret; | |
102 kvec_t(uint64_t) stack; | |
103 | |
104 kv_init(stack); | |
105 h = kh_init(64); | |
106 kh_resize(64, h, b->seq_len * 4); | |
107 x = b->seq_len; | |
108 kv_push(uint64_t, stack, x); | |
109 while (kv_size(stack)) { | |
110 x = kv_pop(stack); | |
111 k = x>>32; l = (uint32_t)x; | |
112 bwtl_2occ4(b, k-1, l, cntk, cntl); | |
113 for (j = 0; j != 4; ++j) { | |
114 k = b->L2[j] + cntk[j] + 1; | |
115 l = b->L2[j] + cntl[j]; | |
116 if (k > l) continue; | |
117 x = (uint64_t)k << 32 | l; | |
118 iter = kh_put(64, h, x, &ret); | |
119 if (ret) { // if not present | |
120 kh_value(h, iter) = 1; | |
121 kv_push(uint64_t, stack, x); | |
122 } else ++kh_value(h, iter); | |
123 } | |
124 } | |
125 kv_destroy(stack); | |
126 //fprintf(stderr, "[bsw2_connectivity] %u nodes in the DAG\n", kh_size(h)); | |
127 return h; | |
128 } | |
129 // pick up top T matches at a node | |
130 static void cut_tail(bsw2entry_t *u, int T, bsw2entry_t *aux) | |
131 { | |
132 int i, *a, n, x; | |
133 if (u->n <= T) return; | |
134 if (aux->max < u->n) { | |
135 aux->max = u->n; | |
136 aux->array = (bsw2cell_t*)realloc(aux->array, aux->max * sizeof(bsw2cell_t)); | |
137 } | |
138 a = (int*)aux->array; | |
139 for (i = n = 0; i != u->n; ++i) | |
140 if (u->array[i].ql && u->array[i].G > 0) | |
141 a[n++] = -u->array[i].G; | |
142 if (n <= T) return; | |
143 x = -ks_ksmall(int, n, a, T); | |
144 n = 0; | |
145 for (i = 0; i < u->n; ++i) { | |
146 bsw2cell_t *p = u->array + i; | |
147 if (p->G == x) ++n; | |
148 if (p->G < x || (p->G == x && n >= T)) { | |
149 p->qk = p->ql = 0; p->G = 0; | |
150 if (p->ppos >= 0) u->array[p->ppos].cpos[p->pj] = -1; | |
151 } | |
152 } | |
153 } | |
154 // remove duplicated cells | |
155 static inline void remove_duplicate(bsw2entry_t *u, khash_t(qintv) *hash) | |
156 { | |
157 int i, ret, j; | |
158 khiter_t k; | |
159 qintv_t key; | |
160 kh_clear(qintv, hash); | |
161 for (i = 0; i != u->n; ++i) { | |
162 bsw2cell_t *p = u->array + i; | |
163 if (p->ql == 0) continue; | |
164 key.k = p->qk; key.l = p->ql; | |
165 k = kh_put(qintv, hash, key, &ret); | |
166 j = -1; | |
167 if (ret == 0) { | |
168 if ((uint32_t)kh_value(hash, k) >= p->G) j = i; | |
169 else { | |
170 j = kh_value(hash, k)>>32; | |
171 kh_value(hash, k) = (uint64_t)i<<32 | p->G; | |
172 } | |
173 } else kh_value(hash, k) = (uint64_t)i<<32 | p->G; | |
174 if (j >= 0) { | |
175 p = u->array + j; | |
176 p->qk = p->ql = 0; p->G = 0; | |
177 if (p->ppos >= 0) u->array[p->ppos].cpos[p->pj] = -3; | |
178 } | |
179 } | |
180 } | |
181 // merge two entries | |
182 static void merge_entry(const bsw2opt_t * __restrict opt, bsw2entry_t *u, bsw2entry_t *v, bwtsw2_t *b) | |
183 { | |
184 int i; | |
185 if (u->n + v->n >= u->max) { | |
186 u->max = u->n + v->n; | |
187 u->array = (bsw2cell_t*)realloc(u->array, u->max * sizeof(bsw2cell_t)); | |
188 } | |
189 for (i = 0; i != v->n; ++i) { | |
190 bsw2cell_t *p = v->array + i; | |
191 if (p->ppos >= 0) p->ppos += u->n; | |
192 if (p->cpos[0] >= 0) p->cpos[0] += u->n; | |
193 if (p->cpos[1] >= 0) p->cpos[1] += u->n; | |
194 if (p->cpos[2] >= 0) p->cpos[2] += u->n; | |
195 if (p->cpos[3] >= 0) p->cpos[3] += u->n; | |
196 } | |
197 memcpy(u->array + u->n, v->array, v->n * sizeof(bsw2cell_t)); | |
198 u->n += v->n; | |
199 } | |
200 | |
201 static inline bsw2cell_t *push_array_p(bsw2entry_t *e) | |
202 { | |
203 if (e->n == e->max) { | |
204 e->max = e->max? e->max<<1 : 256; | |
205 e->array = (bsw2cell_t*)realloc(e->array, sizeof(bsw2cell_t) * e->max); | |
206 } | |
207 return e->array + e->n; | |
208 } | |
209 | |
210 static inline double time_elapse(const struct rusage *curr, const struct rusage *last) | |
211 { | |
212 long t1 = (curr->ru_utime.tv_sec - last->ru_utime.tv_sec) + (curr->ru_stime.tv_sec - last->ru_stime.tv_sec); | |
213 long t2 = (curr->ru_utime.tv_usec - last->ru_utime.tv_usec) + (curr->ru_stime.tv_usec - last->ru_stime.tv_usec); | |
214 return (double)t1 + t2 * 1e-6; | |
215 } | |
216 /* --- END: utilities --- */ | |
217 | |
218 /* --- BEGIN: processing partial hits --- */ | |
219 static void save_hits(const bwtl_t *bwt, int thres, bsw2hit_t *hits, bsw2entry_t *u) | |
220 { | |
221 int i; | |
222 uint32_t k; // this is fine | |
223 for (i = 0; i < u->n; ++i) { | |
224 bsw2cell_t *p = u->array + i; | |
225 if (p->G < thres) continue; | |
226 for (k = u->tk; k <= u->tl; ++k) { | |
227 int beg, end; | |
228 bsw2hit_t *q = 0; | |
229 beg = bwt->sa[k]; end = beg + p->tlen; | |
230 if (p->G > hits[beg*2].G) { | |
231 hits[beg*2+1] = hits[beg*2]; | |
232 q = hits + beg * 2; | |
233 } else if (p->G > hits[beg*2+1].G) q = hits + beg * 2 + 1; | |
234 if (q) { | |
235 q->k = p->qk; q->l = p->ql; q->len = p->qlen; q->G = p->G; | |
236 q->beg = beg; q->end = end; q->G2 = q->k == q->l? 0 : q->G; | |
237 q->flag = q->n_seeds = 0; | |
238 } | |
239 } | |
240 } | |
241 } | |
242 /* "narrow hits" are node-to-node hits that have a high score and | |
243 * are not so repetitive (|SA interval|<=IS). */ | |
244 static void save_narrow_hits(const bwtl_t *bwtl, bsw2entry_t *u, bwtsw2_t *b1, int t, int IS) | |
245 { | |
246 int i; | |
247 for (i = 0; i < u->n; ++i) { | |
248 bsw2hit_t *q; | |
249 bsw2cell_t *p = u->array + i; | |
250 if (p->G >= t && p->ql - p->qk + 1 <= IS) { // good narrow hit | |
251 if (b1->max == b1->n) { | |
252 b1->max = b1->max? b1->max<<1 : 4; | |
253 b1->hits = realloc(b1->hits, b1->max * sizeof(bsw2hit_t)); | |
254 } | |
255 q = &b1->hits[b1->n++]; | |
256 q->k = p->qk; q->l = p->ql; | |
257 q->len = p->qlen; | |
258 q->G = p->G; q->G2 = 0; | |
259 q->beg = bwtl->sa[u->tk]; q->end = q->beg + p->tlen; | |
260 q->flag = 0; | |
261 // delete p | |
262 p->qk = p->ql = 0; p->G = 0; | |
263 if (p->ppos >= 0) u->array[p->ppos].cpos[p->pj] = -3; | |
264 } | |
265 } | |
266 } | |
267 /* after this, "narrow SA hits" will be expanded and the coordinates | |
268 * will be obtained and stored in b->hits[*].k. */ | |
269 int bsw2_resolve_duphits(const bntseq_t *bns, const bwt_t *bwt, bwtsw2_t *b, int IS) | |
270 { | |
271 int i, j, n, is_rev; | |
272 if (b->n == 0) return 0; | |
273 if (bwt && bns) { // convert to chromosomal coordinates if requested | |
274 int old_n = b->n; | |
275 bsw2hit_t *old_hits = b->hits; | |
276 for (i = n = 0; i < b->n; ++i) { // compute the memory to allocated | |
277 bsw2hit_t *p = old_hits + i; | |
278 if (p->l - p->k + 1 <= IS) n += p->l - p->k + 1; | |
279 else if (p->G > 0) ++n; | |
280 } | |
281 b->n = b->max = n; | |
282 b->hits = calloc(b->max, sizeof(bsw2hit_t)); | |
283 for (i = j = 0; i < old_n; ++i) { | |
284 bsw2hit_t *p = old_hits + i; | |
285 if (p->l - p->k + 1 <= IS) { // the hit is no so repetitive | |
286 bwtint_t k; | |
287 if (p->G == 0 && p->k == 0 && p->l == 0 && p->len == 0) continue; | |
288 for (k = p->k; k <= p->l; ++k) { | |
289 b->hits[j] = *p; | |
290 b->hits[j].k = bns_depos(bns, bwt_sa(bwt, k), &is_rev); | |
291 b->hits[j].l = 0; | |
292 b->hits[j].is_rev = is_rev; | |
293 if (is_rev) b->hits[j].k -= p->len - 1; | |
294 ++j; | |
295 } | |
296 } else if (p->G > 0) { | |
297 b->hits[j] = *p; | |
298 b->hits[j].k = bns_depos(bns, bwt_sa(bwt, p->k), &is_rev); | |
299 b->hits[j].l = 0; | |
300 b->hits[j].flag |= 1; | |
301 b->hits[j].is_rev = is_rev; | |
302 if (is_rev) b->hits[j].k -= p->len - 1; | |
303 ++j; | |
304 } | |
305 } | |
306 free(old_hits); | |
307 } | |
308 for (i = j = 0; i < b->n; ++i) // squeeze out empty elements | |
309 if (b->hits[i].G) b->hits[j++] = b->hits[i]; | |
310 b->n = j; | |
311 ks_introsort(hitG, b->n, b->hits); | |
312 for (i = 1; i < b->n; ++i) { | |
313 bsw2hit_t *p = b->hits + i; | |
314 for (j = 0; j < i; ++j) { | |
315 bsw2hit_t *q = b->hits + j; | |
316 int compatible = 1; | |
317 if (p->is_rev != q->is_rev) continue; // hits from opposite strands are not duplicates | |
318 if (p->l == 0 && q->l == 0) { | |
319 int qol = (p->end < q->end? p->end : q->end) - (p->beg > q->beg? p->beg : q->beg); // length of query overlap | |
320 if (qol < 0) qol = 0; | |
321 if ((float)qol / (p->end - p->beg) > MASK_LEVEL || (float)qol / (q->end - q->beg) > MASK_LEVEL) { | |
322 int64_t tol = (int64_t)(p->k + p->len < q->k + q->len? p->k + p->len : q->k + q->len) | |
323 - (int64_t)(p->k > q->k? p->k : q->k); // length of target overlap | |
324 if ((double)tol / p->len > MASK_LEVEL || (double)tol / q->len > MASK_LEVEL) | |
325 compatible = 0; | |
326 } | |
327 } | |
328 if (!compatible) { | |
329 p->G = 0; | |
330 if (q->G2 < p->G2) q->G2 = p->G2; | |
331 break; | |
332 } | |
333 } | |
334 } | |
335 n = i; | |
336 for (i = j = 0; i < n; ++i) { | |
337 if (b->hits[i].G == 0) continue; | |
338 if (i != j) b->hits[j++] = b->hits[i]; | |
339 else ++j; | |
340 } | |
341 b->n = j; | |
342 return b->n; | |
343 } | |
344 | |
345 int bsw2_resolve_query_overlaps(bwtsw2_t *b, float mask_level) | |
346 { | |
347 int i, j, n; | |
348 if (b->n == 0) return 0; | |
349 ks_introsort(hitG, b->n, b->hits); | |
350 { // choose a random one | |
351 int G0 = b->hits[0].G; | |
352 for (i = 1; i < b->n; ++i) | |
353 if (b->hits[i].G != G0) break; | |
354 j = (int)(i * drand48()); | |
355 if (j) { | |
356 bsw2hit_t tmp; | |
357 tmp = b->hits[0]; b->hits[0] = b->hits[j]; b->hits[j] = tmp; | |
358 } | |
359 } | |
360 for (i = 1; i < b->n; ++i) { | |
361 bsw2hit_t *p = b->hits + i; | |
362 int all_compatible = 1; | |
363 if (p->G == 0) break; | |
364 for (j = 0; j < i; ++j) { | |
365 bsw2hit_t *q = b->hits + j; | |
366 int64_t tol = 0; | |
367 int qol, compatible = 0; | |
368 float fol; | |
369 if (q->G == 0) continue; | |
370 qol = (p->end < q->end? p->end : q->end) - (p->beg > q->beg? p->beg : q->beg); | |
371 if (qol < 0) qol = 0; | |
372 if (p->l == 0 && q->l == 0) { | |
373 tol = (int64_t)(p->k + p->len < q->k + q->len? p->k + p->len : q->k + q->len) | |
374 - (p->k > q->k? p->k : q->k); | |
375 if (tol < 0) tol = 0; | |
376 } | |
377 fol = (float)qol / (p->end - p->beg < q->end - q->beg? p->end - p->beg : q->end - q->beg); | |
378 if (fol < mask_level || (tol > 0 && qol < p->end - p->beg && qol < q->end - q->beg)) compatible = 1; | |
379 if (!compatible) { | |
380 if (q->G2 < p->G) q->G2 = p->G; | |
381 all_compatible = 0; | |
382 } | |
383 } | |
384 if (!all_compatible) p->G = 0; | |
385 } | |
386 n = i; | |
387 for (i = j = 0; i < n; ++i) { | |
388 if (b->hits[i].G == 0) continue; | |
389 if (i != j) b->hits[j++] = b->hits[i]; | |
390 else ++j; | |
391 } | |
392 b->n = j; | |
393 return j; | |
394 } | |
395 /* --- END: processing partial hits --- */ | |
396 | |
397 /* --- BEGIN: global mem pool --- */ | |
398 bsw2global_t *bsw2_global_init() | |
399 { | |
400 bsw2global_t *pool; | |
401 bsw2stack_t *stack; | |
402 pool = calloc(1, sizeof(bsw2global_t)); | |
403 stack = calloc(1, sizeof(bsw2stack_t)); | |
404 stack->pool = (mempool_t*)calloc(1, sizeof(mempool_t)); | |
405 pool->stack = (void*)stack; | |
406 return pool; | |
407 } | |
408 | |
409 void bsw2_global_destroy(bsw2global_t *pool) | |
410 { | |
411 stack_destroy((bsw2stack_t*)pool->stack); | |
412 free(pool->aln_mem); | |
413 free(pool); | |
414 } | |
415 /* --- END: global mem pool --- */ | |
416 | |
417 static inline int fill_cell(const bsw2opt_t *o, int match_score, bsw2cell_t *c[4]) | |
418 { | |
419 int G = c[3]? c[3]->G + match_score : MINUS_INF; | |
420 if (c[1]) { | |
421 c[0]->I = c[1]->I > c[1]->G - o->q? c[1]->I - o->r : c[1]->G - o->qr; | |
422 if (c[0]->I > G) G = c[0]->I; | |
423 } else c[0]->I = MINUS_INF; | |
424 if (c[2]) { | |
425 c[0]->D = c[2]->D > c[2]->G - o->q? c[2]->D - o->r : c[2]->G - o->qr; | |
426 if (c[0]->D > G) G = c[0]->D; | |
427 } else c[0]->D = MINUS_INF; | |
428 return(c[0]->G = G); | |
429 } | |
430 | |
431 static void init_bwtsw2(const bwtl_t *target, const bwt_t *query, bsw2stack_t *s) | |
432 { | |
433 bsw2entry_t *u; | |
434 bsw2cell_t *x; | |
435 | |
436 u = mp_alloc(s->pool); | |
437 u->tk = 0; u->tl = target->seq_len; | |
438 x = push_array_p(u); | |
439 *x = g_default_cell; | |
440 x->G = 0; x->qk = 0; x->ql = query->seq_len; | |
441 u->n++; | |
442 stack_push0(s, u); | |
443 } | |
444 /* On return, ret[1] keeps not-so-repetitive hits (narrow SA hits); ret[0] keeps all hits (right?) */ | |
445 bwtsw2_t **bsw2_core(const bntseq_t *bns, const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *query, bsw2global_t *pool) | |
446 { | |
447 bsw2stack_t *stack = (bsw2stack_t*)pool->stack; | |
448 bwtsw2_t *b, *b1, **b_ret; | |
449 int i, j, score_mat[16], *heap, heap_size, n_tot = 0; | |
450 struct rusage curr, last; | |
451 khash_t(qintv) *rhash; | |
452 khash_t(64) *chash; | |
453 | |
454 // initialize connectivity hash (chash) | |
455 chash = bsw2_connectivity(target); | |
456 // calculate score matrix | |
457 for (i = 0; i != 4; ++i) | |
458 for (j = 0; j != 4; ++j) | |
459 score_mat[i<<2|j] = (i == j)? opt->a : -opt->b; | |
460 // initialize other variables | |
461 rhash = kh_init(qintv); | |
462 init_bwtsw2(target, query, stack); | |
463 heap_size = opt->z; | |
464 heap = calloc(heap_size, sizeof(int)); | |
465 // initialize the return struct | |
466 b = (bwtsw2_t*)calloc(1, sizeof(bwtsw2_t)); | |
467 b->n = b->max = target->seq_len * 2; | |
468 b->hits = calloc(b->max, sizeof(bsw2hit_t)); | |
469 b1 = (bwtsw2_t*)calloc(1, sizeof(bwtsw2_t)); | |
470 b_ret = calloc(2, sizeof(void*)); | |
471 b_ret[0] = b; b_ret[1] = b1; | |
472 // initialize timer | |
473 getrusage(0, &last); | |
474 // the main loop: traversal of the DAG | |
475 while (!stack_isempty(stack)) { | |
476 int old_n, tj; | |
477 bsw2entry_t *v; | |
478 uint32_t tcntk[4], tcntl[4]; | |
479 bwtint_t k, l; | |
480 | |
481 v = stack_pop(stack); old_n = v->n; | |
482 n_tot += v->n; | |
483 | |
484 for (i = 0; i < v->n; ++i) { // test max depth and band width | |
485 bsw2cell_t *p = v->array + i; | |
486 if (p->ql == 0) continue; | |
487 if (p->tlen - (int)p->qlen > opt->bw || (int)p->qlen - p->tlen > opt->bw) { | |
488 p->qk = p->ql = 0; | |
489 if (p->ppos >= 0) v->array[p->ppos].cpos[p->pj] = -5; | |
490 } | |
491 } | |
492 | |
493 // get Occ for the DAG | |
494 bwtl_2occ4(target, v->tk - 1, v->tl, tcntk, tcntl); | |
495 for (tj = 0; tj != 4; ++tj) { // descend to the children | |
496 bwtint_t qcntk[4], qcntl[4]; | |
497 int qj, *curr_score_mat = score_mat + tj * 4; | |
498 khiter_t iter; | |
499 bsw2entry_t *u; | |
500 | |
501 k = target->L2[tj] + tcntk[tj] + 1; | |
502 l = target->L2[tj] + tcntl[tj]; | |
503 if (k > l) continue; | |
504 // update counter | |
505 iter = kh_get(64, chash, (uint64_t)k<<32 | l); | |
506 --kh_value(chash, iter); | |
507 // initialization | |
508 u = mp_alloc(stack->pool); | |
509 u->tk = k; u->tl = l; | |
510 memset(heap, 0, sizeof(int) * opt->z); | |
511 // loop through all the nodes in v | |
512 for (i = 0; i < v->n; ++i) { | |
513 bsw2cell_t *p = v->array + i, *x, *c[4]; // c[0]=>current, c[1]=>I, c[2]=>D, c[3]=>G | |
514 int is_added = 0; | |
515 if (p->ql == 0) continue; // deleted node | |
516 c[0] = x = push_array_p(u); | |
517 x->G = MINUS_INF; | |
518 p->upos = x->upos = -1; | |
519 if (p->ppos >= 0) { // parent has been visited | |
520 c[1] = (v->array[p->ppos].upos >= 0)? u->array + v->array[p->ppos].upos : 0; | |
521 c[3] = v->array + p->ppos; c[2] = p; | |
522 if (fill_cell(opt, curr_score_mat[p->pj], c) > 0) { // then update topology at p and x | |
523 x->ppos = v->array[p->ppos].upos; // the parent pos in u | |
524 p->upos = u->n++; // the current pos in u | |
525 if (x->ppos >= 0) u->array[x->ppos].cpos[p->pj] = p->upos; // the child pos of its parent in u | |
526 is_added = 1; | |
527 } | |
528 } else { | |
529 x->D = p->D > p->G - opt->q? p->D - opt->r : p->G - opt->qr; | |
530 if (x->D > 0) { | |
531 x->G = x->D; | |
532 x->I = MINUS_INF; x->ppos = -1; | |
533 p->upos = u->n++; | |
534 is_added = 1; | |
535 } | |
536 } | |
537 if (is_added) { // x has been added to u->array. fill the remaining variables | |
538 x->cpos[0] = x->cpos[1] = x->cpos[2] = x->cpos[3] = -1; | |
539 x->pj = p->pj; x->qk = p->qk; x->ql = p->ql; x->qlen = p->qlen; x->tlen = p->tlen + 1; | |
540 if (x->G > -heap[0]) { | |
541 heap[0] = -x->G; | |
542 ks_heapadjust(int, 0, heap_size, heap); | |
543 } | |
544 } | |
545 if ((x->G > opt->qr && x->G >= -heap[0]) || i < old_n) { // good node in u, or in v | |
546 if (p->cpos[0] == -1 || p->cpos[1] == -1 || p->cpos[2] == -1 || p->cpos[3] == -1) { | |
547 bwt_2occ4(query, p->qk - 1, p->ql, qcntk, qcntl); | |
548 for (qj = 0; qj != 4; ++qj) { // descend to the prefix trie | |
549 if (p->cpos[qj] != -1) continue; // this node will be visited later | |
550 k = query->L2[qj] + qcntk[qj] + 1; | |
551 l = query->L2[qj] + qcntl[qj]; | |
552 if (k > l) { p->cpos[qj] = -2; continue; } | |
553 x = push_array_p(v); | |
554 p = v->array + i; // p may not point to the correct position after realloc | |
555 x->G = x->I = x->D = MINUS_INF; | |
556 x->qk = k; x->ql = l; x->pj = qj; x->qlen = p->qlen + 1; x->ppos = i; x->tlen = p->tlen; | |
557 x->cpos[0] = x->cpos[1] = x->cpos[2] = x->cpos[3] = -1; | |
558 p->cpos[qj] = v->n++; | |
559 } // ~for(qj) | |
560 } // ~if(p->cpos[]) | |
561 } // ~if | |
562 } // ~for(i) | |
563 if (u->n) save_hits(target, opt->t, b->hits, u); | |
564 { // push u to the stack (or to the pending array) | |
565 uint32_t cnt, pos; | |
566 cnt = (uint32_t)kh_value(chash, iter); | |
567 pos = kh_value(chash, iter)>>32; | |
568 if (pos) { // something in the pending array, then merge | |
569 bsw2entry_t *w = kv_A(stack->pending, pos-1); | |
570 if (u->n) { | |
571 if (w->n < u->n) { // swap | |
572 w = u; u = kv_A(stack->pending, pos-1); kv_A(stack->pending, pos-1) = w; | |
573 } | |
574 merge_entry(opt, w, u, b); | |
575 } | |
576 if (cnt == 0) { // move from pending to stack0 | |
577 remove_duplicate(w, rhash); | |
578 save_narrow_hits(target, w, b1, opt->t, opt->is); | |
579 cut_tail(w, opt->z, u); | |
580 stack_push0(stack, w); | |
581 kv_A(stack->pending, pos-1) = 0; | |
582 --stack->n_pending; | |
583 } | |
584 mp_free(stack->pool, u); | |
585 } else if (cnt) { // the first time | |
586 if (u->n) { // push to the pending queue | |
587 ++stack->n_pending; | |
588 kv_push(bsw2entry_p, stack->pending, u); | |
589 kh_value(chash, iter) = (uint64_t)kv_size(stack->pending)<<32 | cnt; | |
590 } else mp_free(stack->pool, u); | |
591 } else { // cnt == 0, then push to the stack | |
592 bsw2entry_t *w = mp_alloc(stack->pool); | |
593 save_narrow_hits(target, u, b1, opt->t, opt->is); | |
594 cut_tail(u, opt->z, w); | |
595 mp_free(stack->pool, w); | |
596 stack_push0(stack, u); | |
597 } | |
598 } | |
599 } // ~for(tj) | |
600 mp_free(stack->pool, v); | |
601 } // while(top) | |
602 getrusage(0, &curr); | |
603 for (i = 0; i < 2; ++i) | |
604 for (j = 0; j < b_ret[i]->n; ++j) | |
605 b_ret[i]->hits[j].n_seeds = 0; | |
606 bsw2_resolve_duphits(bns, query, b, opt->is); | |
607 bsw2_resolve_duphits(bns, query, b1, opt->is); | |
608 //fprintf(stderr, "stats: %.3lf sec; %d elems\n", time_elapse(&curr, &last), n_tot); | |
609 // free | |
610 free(heap); | |
611 kh_destroy(qintv, rhash); | |
612 kh_destroy(64, chash); | |
613 stack->pending.n = stack->stack0.n = 0; | |
614 return b_ret; | |
615 } |