annotate bwa-0.6.2/bwa.c @ 2:a294fbfcb1db draft default tip

Uploaded BWA
author ashvark
date Fri, 18 Jul 2014 07:55:59 -0400
parents dd1186b11b3b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1 #include <stdlib.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
2 #include <string.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
3 #include <stdio.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
4 #include <math.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
5 #include "bwa.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
6 #include "bwt.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
7 #include "bwtgap.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
8 #include "bntseq.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
9
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
10 #ifndef kroundup32
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
11 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
12 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
13
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
14 extern unsigned char nst_nt4_table[256];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
15 extern void seq_reverse(int len, uint8_t *seq, int is_comp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
16
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
17 bwa_opt_t bwa_def_opt = { 11, 4, -1, 1, 6, 32, 2, 0.04 };
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
18
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
19 struct bwa_idx_t {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
20 bwt_t *bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
21 bntseq_t *bns;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
22 uint8_t *pac;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
23 };
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
24
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
25 struct bwa_buf_t {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
26 int max_buf;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
27 bwa_pestat_t pes;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
28 gap_stack_t *stack;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
29 gap_opt_t *opt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
30 int *diff_tab;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
31 uint8_t *buf;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
32 int *logn;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
33 };
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
34
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
35 bwa_idx_t *bwa_idx_load(const char *prefix)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
36 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
37 bwa_idx_t *p;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
38 int l;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
39 char *str;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
40 l = strlen(prefix);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
41 p = calloc(1, sizeof(bwa_idx_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
42 str = malloc(l + 10);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
43 strcpy(str, prefix);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
44 p->bns = bns_restore(str);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
45 strcpy(str + l, ".bwt");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
46 p->bwt = bwt_restore_bwt(str);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
47 str[l] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
48 strcpy(str + l, ".sa");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
49 bwt_restore_sa(str, p->bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
50 free(str);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
51 p->pac = calloc(p->bns->l_pac/4+1, 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
52 fread(p->pac, 1, p->bns->l_pac/4+1, p->bns->fp_pac);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
53 fclose(p->bns->fp_pac);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
54 p->bns->fp_pac = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
55 return p;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
56 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
57
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
58 void bwa_idx_destroy(bwa_idx_t *p)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
59 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
60 bns_destroy(p->bns);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
61 bwt_destroy(p->bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
62 free(p->pac);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
63 free(p);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
64 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
65
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
66 bwa_buf_t *bwa_buf_init(const bwa_opt_t *opt, int max_score)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
67 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
68 extern gap_opt_t *gap_init_opt(void);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
69 extern int bwa_cal_maxdiff(int l, double err, double thres);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
70 int i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
71 bwa_buf_t *p;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
72 p = malloc(sizeof(bwa_buf_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
73 p->stack = gap_init_stack2(max_score);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
74 p->opt = gap_init_opt();
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
75 p->opt->s_gapo = opt->s_gapo;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
76 p->opt->s_gape = opt->s_gape;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
77 p->opt->max_diff = opt->max_diff;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
78 p->opt->max_gapo = opt->max_gapo;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
79 p->opt->max_gape = opt->max_gape;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
80 p->opt->seed_len = opt->seed_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
81 p->opt->max_seed_diff = opt->max_seed_diff;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
82 p->opt->fnr = opt->fnr;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
83 p->diff_tab = calloc(BWA_MAX_QUERY_LEN, sizeof(int));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
84 for (i = 1; i < BWA_MAX_QUERY_LEN; ++i)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
85 p->diff_tab[i] = bwa_cal_maxdiff(i, BWA_AVG_ERR, opt->fnr);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
86 p->logn = calloc(256, sizeof(int));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
87 for (i = 1; i != 256; ++i)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
88 p->logn[i] = (int)(4.343 * log(i) + 0.499);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
89 return p;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
90 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
91
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
92 void bwa_buf_destroy(bwa_buf_t *p)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
93 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
94 gap_destroy_stack(p->stack);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
95 free(p->diff_tab); free(p->logn); free(p->opt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
96 free(p);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
97 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
98
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
99 bwa_sai_t bwa_sai(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
100 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
101 extern int bwt_cal_width(const bwt_t *bwt, int len, const ubyte_t *str, bwt_width_t *width);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
102 int i, seq_len, buf_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
103 bwt_width_t *w, *seed_w;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
104 uint8_t *s;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
105 gap_opt_t opt2 = *buf->opt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
106 bwa_sai_t sai;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
107
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
108 seq_len = strlen(seq);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
109 // estimate the buffer length
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
110 buf_len = (buf->opt->seed_len + seq_len + 1) * sizeof(bwt_width_t) + seq_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
111 if (buf_len > buf->max_buf) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
112 buf->max_buf = buf_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
113 kroundup32(buf->max_buf);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
114 buf->buf = realloc(buf->buf, buf->max_buf);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
115 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
116 memset(buf->buf, 0, buf_len);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
117 seed_w = (bwt_width_t*)buf->buf;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
118 w = seed_w + buf->opt->seed_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
119 s = (uint8_t*)(w + seq_len + 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
120 if (opt2.fnr > 0.) opt2.max_diff = buf->diff_tab[seq_len];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
121 // copy the sequence
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
122 for (i = 0; i < seq_len; ++i)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
123 s[i] = nst_nt4_table[(int)seq[i]];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
124 seq_reverse(seq_len, s, 0);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
125 // mapping
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
126 bwt_cal_width(idx->bwt, seq_len, s, w);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
127 if (opt2.seed_len >= seq_len) opt2.seed_len = 0x7fffffff;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
128 if (seq_len > buf->opt->seed_len)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
129 bwt_cal_width(idx->bwt, buf->opt->seed_len, s + (seq_len - buf->opt->seed_len), seed_w);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
130 for (i = 0; i < seq_len; ++i) // complement; I forgot why...
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
131 s[i] = s[i] > 3? 4 : 3 - s[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
132 sai.sai = (bwa_sai1_t*)bwt_match_gap(idx->bwt, seq_len, s, w, seq_len <= buf->opt->seed_len? 0 : seed_w, &opt2, &sai.n, buf->stack);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
133 return sai;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
134 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
135
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
136 static void compute_NM(const uint8_t *pac, uint64_t l_pac, uint8_t *seq, int64_t pos, int n_cigar, uint32_t *cigar, int *n_mm, int *n_gaps)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
137 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
138 uint64_t x = pos, z;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
139 int k, y = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
140 *n_mm = *n_gaps = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
141 for (k = 0; k < n_cigar; ++k) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
142 int l = cigar[k]>>4;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
143 int op = cigar[k]&0xf;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
144 if (op == 0) { // match/mismatch
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
145 for (z = 0; z < l && x + z < l_pac; ++z) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
146 int c = pac[(x+z)>>2] >> ((~(x+z)&3)<<1) & 3;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
147 if (c > 3 || seq[y+z] > 3 || c != seq[y+z]) ++(*n_mm);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
148 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
149 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
150 if (op == 1 || op == 2) (*n_gaps) += l;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
151 if (op == 0 || op == 2) x += l;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
152 if (op == 0 || op == 1 || op == 4) y += l;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
153 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
154 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
155
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
156 void bwa_sa2aln(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, uint64_t sa, int n_gaps, bwa_aln_t *aln)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
157 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
158 extern bwtint_t bwa_sa2pos(const bntseq_t *bns, const bwt_t *bwt, bwtint_t sapos, int len, int *strand);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
159 extern bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const uint8_t *seq, bwtint_t *_pos, int ext, int *n_cigar, int is_end_correct);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
160 int strand, seq_len, i, n_gap, n_mm;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
161 uint64_t pos3, pac_pos;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
162 uint8_t *s[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
163
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
164 memset(aln, 0, sizeof(bwa_aln_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
165 seq_len = strlen(seq);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
166 if (seq_len<<1 > buf->max_buf) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
167 buf->max_buf = seq_len<<1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
168 kroundup32(buf->max_buf);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
169 buf->buf = realloc(buf->buf, buf->max_buf);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
170 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
171 s[0] = buf->buf;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
172 s[1] = s[0] + seq_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
173 for (i = 0; i < seq_len; ++i)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
174 s[0][i] = s[1][i] = nst_nt4_table[(int)seq[i]];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
175 seq_reverse(seq_len, s[1], 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
176 pac_pos = bwa_sa2pos(idx->bns, idx->bwt, sa, seq_len, &strand);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
177 if (strand) aln->flag |= 16;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
178 if (n_gaps) { // only for gapped alignment
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
179 int n_cigar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
180 bwa_cigar_t *cigar16;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
181 cigar16 = bwa_refine_gapped_core(idx->bns->l_pac, idx->pac, seq_len, s[strand], &pac_pos, strand? n_gaps : -n_gaps, &n_cigar, 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
182 aln->n_cigar = n_cigar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
183 aln->cigar = malloc(n_cigar * 4);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
184 for (i = 0, pos3 = pac_pos; i < n_cigar; ++i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
185 int op = cigar16[i]>>14;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
186 int len = cigar16[i]&0x3fff;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
187 if (op == 3) op = 4; // the 16-bit CIGAR is different from the 32-bit CIGAR
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
188 aln->cigar[i] = len<<4 | op;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
189 if (op == 0 || op == 2) pos3 += len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
190 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
191 free(cigar16);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
192 } else { // ungapped
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
193 aln->n_cigar = 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
194 aln->cigar = malloc(4);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
195 aln->cigar[0] = seq_len<<4 | 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
196 pos3 = pac_pos + seq_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
197 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
198 aln->n_n = bns_cnt_ambi(idx->bns, pac_pos, pos3 - pac_pos, &aln->ref_id);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
199 aln->offset = pac_pos - idx->bns->anns[aln->ref_id].offset;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
200 if (pos3 - idx->bns->anns[aln->ref_id].offset > idx->bns->anns[aln->ref_id].len) // read mapped beyond the end of a sequence
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
201 aln->flag |= 4; // read unmapped
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
202 compute_NM(idx->pac, idx->bns->l_pac, s[strand], pac_pos, aln->n_cigar, aln->cigar, &n_mm, &n_gap);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
203 aln->n_mm = n_mm;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
204 aln->n_gap = n_gap;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
205 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
206
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
207 /************************
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
208 * Single-end alignment *
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
209 ************************/
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
210
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
211 bwa_one_t *bwa_se(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, int gen_cigar)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
212 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
213 bwa_one_t *one;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
214 int best, cnt, i, seq_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
215
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
216 seq_len = strlen(seq);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
217 one = calloc(1, sizeof(bwa_one_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
218 one->sai = bwa_sai(idx, buf, seq);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
219 if (one->sai.n == 0) return one;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
220 // count number of hits; randomly select one alignment
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
221 best = one->sai.sai[0].score;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
222 for (i = cnt = 0; i < one->sai.n; ++i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
223 bwa_sai1_t *p = &one->sai.sai[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
224 if (p->score > best) break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
225 if (drand48() * (p->l - p->k + 1 + cnt) > (double)cnt) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
226 one->which = p;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
227 one->sa = p->k + (bwtint_t)((p->l - p->k + 1) * drand48());
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
228 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
229 cnt += p->l - p->k + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
230 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
231 one->c1 = cnt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
232 for (; i < one->sai.n; ++i)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
233 cnt += one->sai.sai[i].l - one->sai.sai[i].k + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
234 one->c2 = cnt - one->c1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
235 // estimate single-end mapping quality
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
236 one->mapQs = -1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
237 if (one->c1 == 0) one->mapQs = 23; // FIXME: is it possible?
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
238 else if (one->c1 > 1) one->mapQs = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
239 else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
240 int diff = one->which->n_mm + one->which->n_gapo + one->which->n_gape;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
241 if (diff >= buf->diff_tab[seq_len]) one->mapQs = 25;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
242 else if (one->c2 == 0) one->mapQs = 37;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
243 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
244 if (one->mapQs < 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
245 cnt = (one->c2 >= 255)? 255 : one->c2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
246 one->mapQs = 23 < buf->logn[cnt]? 0 : 23 - buf->logn[cnt];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
247 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
248 one->mapQ = one->mapQs;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
249 // compute CIGAR on request
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
250 one->one.ref_id = -1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
251 if (gen_cigar) bwa_sa2aln(idx, buf, seq, one->sa, one->which->n_gapo + one->which->n_gape, &one->one);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
252 return one;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
253 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
254
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
255 void bwa_one_destroy(bwa_one_t *one)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
256 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
257 free(one->sai.sai);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
258 free(one->one.cigar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
259 free(one);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
260 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
261
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
262 /************************
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
263 * Paired-end alignment *
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
264 ************************/
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
265
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
266 void bwa_pestat(bwa_buf_t *buf, int n, bwa_one_t **o[2])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
267 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
268 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
269
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
270 void bwa_pe(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq[2], bwa_one_t *o[2])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
271 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
272 }