0
|
1 #include <stdio.h>
|
|
2 #include "bwtsw2.h"
|
|
3
|
|
4 typedef struct {
|
|
5 uint32_t tbeg, tend;
|
|
6 int qbeg, qend;
|
|
7 uint32_t flag:1, idx:31;
|
|
8 int chain; // also reuse as a counter
|
|
9 } hsaip_t;
|
|
10
|
|
11 #define _hsaip_lt(a, b) ((a).qbeg < (b).qbeg)
|
|
12
|
|
13 #include "ksort.h"
|
|
14 KSORT_INIT(hsaip, hsaip_t, _hsaip_lt)
|
|
15
|
|
16 static int chaining(const bsw2opt_t *opt, int shift, int n, hsaip_t *z, hsaip_t *chain)
|
|
17 {
|
|
18 int j, k, m = 0;
|
|
19 ks_introsort(hsaip, n, z);
|
|
20 for (j = 0; j < n; ++j) {
|
|
21 hsaip_t *p = z + j;
|
|
22 for (k = m - 1; k >= 0; --k) {
|
|
23 hsaip_t *q = chain + k;
|
|
24 int x = p->qbeg - q->qbeg; // always positive
|
|
25 int y = p->tbeg - q->tbeg;
|
|
26 if (y > 0 && x - y <= opt->bw && y - x <= opt->bw) {
|
|
27 if (p->qend > q->qend) q->qend = p->qend;
|
|
28 if (p->tend > q->tend) q->tend = p->tend;
|
|
29 ++q->chain;
|
|
30 p->chain = shift + k;
|
|
31 break;
|
|
32 }
|
|
33 }
|
|
34 if (k < 0) {
|
|
35 chain[m] = *p;
|
|
36 chain[m].chain = 1;
|
|
37 chain[m].idx = p->chain = shift + m;
|
|
38 ++m;
|
|
39 }
|
|
40 }
|
|
41 return m;
|
|
42 }
|
|
43
|
|
44 void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2])
|
|
45 {
|
|
46 hsaip_t *z[2], *chain[2];
|
|
47 int i, j, k, n[2], m[2];
|
|
48 char *flag;
|
|
49 // initialization
|
|
50 n[0] = b[0]->n; n[1] = b[1]->n;
|
|
51 z[0] = calloc(n[0] + n[1], sizeof(hsaip_t));
|
|
52 z[1] = z[0] + n[0];
|
|
53 chain[0] = calloc(n[0] + n[1], sizeof(hsaip_t));
|
|
54 for (k = j = 0; k < 2; ++k) {
|
|
55 for (i = 0; i < b[k]->n; ++i) {
|
|
56 bsw2hit_t *p = b[k]->hits + i;
|
|
57 hsaip_t *q = z[k] + i;
|
|
58 q->flag = k; q->idx = i;
|
|
59 q->tbeg = p->k; q->tend = p->k + p->len;
|
|
60 q->chain = -1;
|
|
61 q->qbeg = p->beg; q->qend = p->end;
|
|
62 }
|
|
63 }
|
|
64 // chaining
|
|
65 m[0] = chaining(opt, 0, n[0], z[0], chain[0]);
|
|
66 chain[1] = chain[0] + m[0];
|
|
67 m[1] = chaining(opt, m[0], n[1], z[1], chain[1]);
|
|
68 // change query coordinate on the reverse strand
|
|
69 for (k = 0; k < m[1]; ++k) {
|
|
70 hsaip_t *p = chain[1] + k;
|
|
71 int tmp = p->qbeg;
|
|
72 p->qbeg = len - p->qend; p->qend = len - tmp;
|
|
73 }
|
|
74 // filtering
|
|
75 flag = calloc(m[0] + m[1], 1);
|
|
76 ks_introsort(hsaip, m[0] + m[1], chain[0]);
|
|
77 for (k = 1; k < m[0] + m[1]; ++k) {
|
|
78 hsaip_t *p = chain[0] + k;
|
|
79 for (j = 0; j < k; ++j) {
|
|
80 hsaip_t *q = chain[0] + j;
|
|
81 if (flag[q->idx]) continue;
|
|
82 if (q->qend >= p->qend && q->chain > p->chain * opt->t_seeds * 2) {
|
|
83 flag[p->idx] = 1;
|
|
84 break;
|
|
85 }
|
|
86 }
|
|
87 }
|
|
88 for (k = 0; k < n[0] + n[1]; ++k) {
|
|
89 hsaip_t *p = z[0] + k;
|
|
90 if (flag[p->chain])
|
|
91 b[p->flag]->hits[p->idx].G = 0;
|
|
92 }
|
|
93 free(flag);
|
|
94 // squeeze out filtered elements in b[2]
|
|
95 for (k = 0; k < 2; ++k) {
|
|
96 for (j = i = 0; j < n[k]; ++j) {
|
|
97 bsw2hit_t *p = b[k]->hits + j;
|
|
98 if (p->G) {
|
|
99 if (i != j) b[k]->hits[i++] = *p;
|
|
100 else ++i;
|
|
101 }
|
|
102 }
|
|
103 b[k]->n = i;
|
|
104 }
|
|
105 // free
|
|
106 free(z[0]); free(chain[0]);
|
|
107 }
|