Mercurial > repos > ashvark > qiime_1_8_0
diff bwa-0.6.2/bwtsw2_chain.c @ 2:a294fbfcb1db draft default tip
Uploaded BWA
author | ashvark |
---|---|
date | Fri, 18 Jul 2014 07:55:59 -0400 |
parents | dd1186b11b3b |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bwa-0.6.2/bwtsw2_chain.c Fri Jul 18 07:55:59 2014 -0400 @@ -0,0 +1,107 @@ +#include <stdio.h> +#include "bwtsw2.h" + +typedef struct { + uint32_t tbeg, tend; + int qbeg, qend; + uint32_t flag:1, idx:31; + int chain; // also reuse as a counter +} hsaip_t; + +#define _hsaip_lt(a, b) ((a).qbeg < (b).qbeg) + +#include "ksort.h" +KSORT_INIT(hsaip, hsaip_t, _hsaip_lt) + +static int chaining(const bsw2opt_t *opt, int shift, int n, hsaip_t *z, hsaip_t *chain) +{ + int j, k, m = 0; + ks_introsort(hsaip, n, z); + for (j = 0; j < n; ++j) { + hsaip_t *p = z + j; + for (k = m - 1; k >= 0; --k) { + hsaip_t *q = chain + k; + int x = p->qbeg - q->qbeg; // always positive + int y = p->tbeg - q->tbeg; + if (y > 0 && x - y <= opt->bw && y - x <= opt->bw) { + if (p->qend > q->qend) q->qend = p->qend; + if (p->tend > q->tend) q->tend = p->tend; + ++q->chain; + p->chain = shift + k; + break; + } + } + if (k < 0) { + chain[m] = *p; + chain[m].chain = 1; + chain[m].idx = p->chain = shift + m; + ++m; + } + } + return m; +} + +void bsw2_chain_filter(const bsw2opt_t *opt, int len, bwtsw2_t *b[2]) +{ + hsaip_t *z[2], *chain[2]; + int i, j, k, n[2], m[2]; + char *flag; + // initialization + n[0] = b[0]->n; n[1] = b[1]->n; + z[0] = calloc(n[0] + n[1], sizeof(hsaip_t)); + z[1] = z[0] + n[0]; + chain[0] = calloc(n[0] + n[1], sizeof(hsaip_t)); + for (k = j = 0; k < 2; ++k) { + for (i = 0; i < b[k]->n; ++i) { + bsw2hit_t *p = b[k]->hits + i; + hsaip_t *q = z[k] + i; + q->flag = k; q->idx = i; + q->tbeg = p->k; q->tend = p->k + p->len; + q->chain = -1; + q->qbeg = p->beg; q->qend = p->end; + } + } + // chaining + m[0] = chaining(opt, 0, n[0], z[0], chain[0]); + chain[1] = chain[0] + m[0]; + m[1] = chaining(opt, m[0], n[1], z[1], chain[1]); + // change query coordinate on the reverse strand + for (k = 0; k < m[1]; ++k) { + hsaip_t *p = chain[1] + k; + int tmp = p->qbeg; + p->qbeg = len - p->qend; p->qend = len - tmp; + } + // filtering + flag = calloc(m[0] + m[1], 1); + ks_introsort(hsaip, m[0] + m[1], chain[0]); + for (k = 1; k < m[0] + m[1]; ++k) { + hsaip_t *p = chain[0] + k; + for (j = 0; j < k; ++j) { + hsaip_t *q = chain[0] + j; + if (flag[q->idx]) continue; + if (q->qend >= p->qend && q->chain > p->chain * opt->t_seeds * 2) { + flag[p->idx] = 1; + break; + } + } + } + for (k = 0; k < n[0] + n[1]; ++k) { + hsaip_t *p = z[0] + k; + if (flag[p->chain]) + b[p->flag]->hits[p->idx].G = 0; + } + free(flag); + // squeeze out filtered elements in b[2] + for (k = 0; k < 2; ++k) { + for (j = i = 0; j < n[k]; ++j) { + bsw2hit_t *p = b[k]->hits + j; + if (p->G) { + if (i != j) b[k]->hits[i++] = *p; + else ++i; + } + } + b[k]->n = i; + } + // free + free(z[0]); free(chain[0]); +}