annotate bwa-0.6.2/bwtaln.c @ 0:dd1186b11b3b draft

Uploaded BWA
author ashvark
date Fri, 18 Jul 2014 07:55:14 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1 #include <stdio.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
2 #include <unistd.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
3 #include <math.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
4 #include <stdlib.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
5 #include <string.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
6 #include <time.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
7 #include <stdint.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
8 #ifdef HAVE_CONFIG_H
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
9 #include "config.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
10 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
11 #include "bwtaln.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
12 #include "bwtgap.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
13 #include "utils.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
14
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
15 #ifdef HAVE_PTHREAD
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
16 #include <pthread.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
17 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
18
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
19 gap_opt_t *gap_init_opt()
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
20 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
21 gap_opt_t *o;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
22 o = (gap_opt_t*)calloc(1, sizeof(gap_opt_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
23 /* IMPORTANT: s_mm*10 should be about the average base error
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
24 rate. Voilating this requirement will break pairing! */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
25 o->s_mm = 3; o->s_gapo = 11; o->s_gape = 4;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
26 o->max_diff = -1; o->max_gapo = 1; o->max_gape = 6;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
27 o->indel_end_skip = 5; o->max_del_occ = 10; o->max_entries = 2000000;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
28 o->mode = BWA_MODE_GAPE | BWA_MODE_COMPREAD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
29 o->seed_len = 32; o->max_seed_diff = 2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
30 o->fnr = 0.04;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
31 o->n_threads = 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
32 o->max_top2 = 30;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
33 o->trim_qual = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
34 return o;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
35 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
36
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
37 int bwa_cal_maxdiff(int l, double err, double thres)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
38 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
39 double elambda = exp(-l * err);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
40 double sum, y = 1.0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
41 int k, x = 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
42 for (k = 1, sum = elambda; k < 1000; ++k) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
43 y *= l * err;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
44 x *= k;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
45 sum += elambda * y / x;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
46 if (1.0 - sum < thres) return k;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
47 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
48 return 2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
49 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
50
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
51 // width must be filled as zero
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
52 int bwt_cal_width(const bwt_t *bwt, int len, const ubyte_t *str, bwt_width_t *width)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
53 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
54 bwtint_t k, l, ok, ol;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
55 int i, bid;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
56 bid = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
57 k = 0; l = bwt->seq_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
58 for (i = 0; i < len; ++i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
59 ubyte_t c = str[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
60 if (c < 4) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
61 bwt_2occ(bwt, k - 1, l, c, &ok, &ol);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
62 k = bwt->L2[c] + ok + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
63 l = bwt->L2[c] + ol;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
64 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
65 if (k > l || c > 3) { // then restart
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
66 k = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
67 l = bwt->seq_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
68 ++bid;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
69 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
70 width[i].w = l - k + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
71 width[i].bid = bid;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
72 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
73 width[len].w = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
74 width[len].bid = ++bid;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
75 return bid;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
76 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
77
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
78 void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
79 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
80 int i, j, max_l = 0, max_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
81 gap_stack_t *stack;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
82 bwt_width_t *w, *seed_w;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
83 gap_opt_t local_opt = *opt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
84
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
85 // initiate priority stack
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
86 for (i = max_len = 0; i != n_seqs; ++i)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
87 if (seqs[i].len > max_len) max_len = seqs[i].len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
88 if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(max_len, BWA_AVG_ERR, opt->fnr);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
89 if (local_opt.max_diff < local_opt.max_gapo) local_opt.max_gapo = local_opt.max_diff;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
90 stack = gap_init_stack(local_opt.max_diff, local_opt.max_gapo, local_opt.max_gape, &local_opt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
91
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
92 seed_w = (bwt_width_t*)calloc(opt->seed_len+1, sizeof(bwt_width_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
93 w = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
94 for (i = 0; i != n_seqs; ++i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
95 bwa_seq_t *p = seqs + i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
96 #ifdef HAVE_PTHREAD
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
97 if (i % opt->n_threads != tid) continue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
98 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
99 p->sa = 0; p->type = BWA_TYPE_NO_MATCH; p->c1 = p->c2 = 0; p->n_aln = 0; p->aln = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
100 if (max_l < p->len) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
101 max_l = p->len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
102 w = (bwt_width_t*)realloc(w, (max_l + 1) * sizeof(bwt_width_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
103 memset(w, 0, (max_l + 1) * sizeof(bwt_width_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
104 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
105 bwt_cal_width(bwt, p->len, p->seq, w);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
106 if (opt->fnr > 0.0) local_opt.max_diff = bwa_cal_maxdiff(p->len, BWA_AVG_ERR, opt->fnr);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
107 local_opt.seed_len = opt->seed_len < p->len? opt->seed_len : 0x7fffffff;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
108 if (p->len > opt->seed_len)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
109 bwt_cal_width(bwt, opt->seed_len, p->seq + (p->len - opt->seed_len), seed_w);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
110 // core function
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
111 for (j = 0; j < p->len; ++j) // we need to complement
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
112 p->seq[j] = p->seq[j] > 3? 4 : 3 - p->seq[j];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
113 p->aln = bwt_match_gap(bwt, p->len, p->seq, w, p->len <= opt->seed_len? 0 : seed_w, &local_opt, &p->n_aln, stack);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
114 // clean up the unused data in the record
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
115 free(p->name); free(p->seq); free(p->rseq); free(p->qual);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
116 p->name = 0; p->seq = p->rseq = p->qual = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
117 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
118 free(seed_w); free(w);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
119 gap_destroy_stack(stack);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
120 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
121
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
122 #ifdef HAVE_PTHREAD
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
123 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
124 int tid;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
125 bwt_t *bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
126 int n_seqs;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
127 bwa_seq_t *seqs;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
128 const gap_opt_t *opt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
129 } thread_aux_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
130
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
131 static void *worker(void *data)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
132 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
133 thread_aux_t *d = (thread_aux_t*)data;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
134 bwa_cal_sa_reg_gap(d->tid, d->bwt, d->n_seqs, d->seqs, d->opt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
135 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
136 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
137 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
138
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
139 bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
140 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
141 bwa_seqio_t *ks;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
142 if (mode & BWA_MODE_BAM) { // open BAM
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
143 int which = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
144 if (mode & BWA_MODE_BAM_SE) which |= 4;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
145 if (mode & BWA_MODE_BAM_READ1) which |= 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
146 if (mode & BWA_MODE_BAM_READ2) which |= 2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
147 if (which == 0) which = 7; // then read all reads
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
148 ks = bwa_bam_open(fn_fa, which);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
149 } else ks = bwa_seq_open(fn_fa);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
150 return ks;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
151 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
152
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
153 void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
154 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
155 int i, n_seqs, tot_seqs = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
156 bwa_seq_t *seqs;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
157 bwa_seqio_t *ks;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
158 clock_t t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
159 bwt_t *bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
160
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
161 // initialization
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
162 ks = bwa_open_reads(opt->mode, fn_fa);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
163
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
164 { // load BWT
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
165 char *str = (char*)calloc(strlen(prefix) + 10, 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
166 strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
167 free(str);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
168 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
169
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
170 // core loop
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
171 err_fwrite(opt, sizeof(gap_opt_t), 1, stdout);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
172 while ((seqs = bwa_read_seq(ks, 0x40000, &n_seqs, opt->mode, opt->trim_qual)) != 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
173 tot_seqs += n_seqs;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
174 t = clock();
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
175
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
176 fprintf(stderr, "[bwa_aln_core] calculate SA coordinate... ");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
177
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
178 #ifdef HAVE_PTHREAD
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
179 if (opt->n_threads <= 1) { // no multi-threading at all
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
180 bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
181 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
182 pthread_t *tid;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
183 pthread_attr_t attr;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
184 thread_aux_t *data;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
185 int j;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
186 pthread_attr_init(&attr);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
187 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
188 data = (thread_aux_t*)calloc(opt->n_threads, sizeof(thread_aux_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
189 tid = (pthread_t*)calloc(opt->n_threads, sizeof(pthread_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
190 for (j = 0; j < opt->n_threads; ++j) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
191 data[j].tid = j; data[j].bwt = bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
192 data[j].n_seqs = n_seqs; data[j].seqs = seqs; data[j].opt = opt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
193 pthread_create(&tid[j], &attr, worker, data + j);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
194 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
195 for (j = 0; j < opt->n_threads; ++j) pthread_join(tid[j], 0);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
196 free(data); free(tid);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
197 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
198 #else
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
199 bwa_cal_sa_reg_gap(0, bwt, n_seqs, seqs, opt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
200 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
201
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
202 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
203
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
204 t = clock();
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
205 fprintf(stderr, "[bwa_aln_core] write to the disk... ");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
206 for (i = 0; i < n_seqs; ++i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
207 bwa_seq_t *p = seqs + i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
208 err_fwrite(&p->n_aln, 4, 1, stdout);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
209 if (p->n_aln) err_fwrite(p->aln, sizeof(bwt_aln1_t), p->n_aln, stdout);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
210 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
211 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
212
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
213 bwa_free_read_seq(n_seqs, seqs);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
214 fprintf(stderr, "[bwa_aln_core] %d sequences have been processed.\n", tot_seqs);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
215 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
216
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
217 // destroy
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
218 bwt_destroy(bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
219 bwa_seq_close(ks);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
220 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
221
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
222 char *bwa_infer_prefix(const char *hint)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
223 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
224 char *prefix;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
225 int l_hint;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
226 FILE *fp;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
227 l_hint = strlen(hint);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
228 prefix = malloc(l_hint + 3 + 4 + 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
229 strcpy(prefix, hint);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
230 strcpy(prefix + l_hint, ".64.bwt");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
231 if ((fp = fopen(prefix, "rb")) != 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
232 fclose(fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
233 prefix[l_hint + 3] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
234 return prefix;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
235 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
236 strcpy(prefix + l_hint, ".bwt");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
237 if ((fp = fopen(prefix, "rb")) == 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
238 free(prefix);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
239 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
240 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
241 fclose(fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
242 prefix[l_hint] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
243 return prefix;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
244 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
245 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
246 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
247
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
248 int bwa_aln(int argc, char *argv[])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
249 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
250 int c, opte = -1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
251 gap_opt_t *opt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
252 char *prefix;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
253
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
254 opt = gap_init_opt();
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
255 while ((c = getopt(argc, argv, "n:o:e:i:d:l:k:cLR:m:t:NM:O:E:q:f:b012IYB:")) >= 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
256 switch (c) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
257 case 'n':
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
258 if (strstr(optarg, ".")) opt->fnr = atof(optarg), opt->max_diff = -1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
259 else opt->max_diff = atoi(optarg), opt->fnr = -1.0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
260 break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
261 case 'o': opt->max_gapo = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
262 case 'e': opte = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
263 case 'M': opt->s_mm = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
264 case 'O': opt->s_gapo = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
265 case 'E': opt->s_gape = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
266 case 'd': opt->max_del_occ = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
267 case 'i': opt->indel_end_skip = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
268 case 'l': opt->seed_len = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
269 case 'k': opt->max_seed_diff = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
270 case 'm': opt->max_entries = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
271 case 't': opt->n_threads = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
272 case 'L': opt->mode |= BWA_MODE_LOGGAP; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
273 case 'R': opt->max_top2 = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
274 case 'q': opt->trim_qual = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
275 case 'c': opt->mode &= ~BWA_MODE_COMPREAD; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
276 case 'N': opt->mode |= BWA_MODE_NONSTOP; opt->max_top2 = 0x7fffffff; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
277 case 'f': xreopen(optarg, "wb", stdout); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
278 case 'b': opt->mode |= BWA_MODE_BAM; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
279 case '0': opt->mode |= BWA_MODE_BAM_SE; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
280 case '1': opt->mode |= BWA_MODE_BAM_READ1; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
281 case '2': opt->mode |= BWA_MODE_BAM_READ2; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
282 case 'I': opt->mode |= BWA_MODE_IL13; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
283 case 'Y': opt->mode |= BWA_MODE_CFY; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
284 case 'B': opt->mode |= atoi(optarg) << 24; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
285 default: return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
286 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
287 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
288 if (opte > 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
289 opt->max_gape = opte;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
290 opt->mode &= ~BWA_MODE_GAPE;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
291 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
292
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
293 if (optind + 2 > argc) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
294 fprintf(stderr, "\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
295 fprintf(stderr, "Usage: bwa aln [options] <prefix> <in.fq>\n\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
296 fprintf(stderr, "Options: -n NUM max #diff (int) or missing prob under %.2f err rate (float) [%.2f]\n",
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
297 BWA_AVG_ERR, opt->fnr);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
298 fprintf(stderr, " -o INT maximum number or fraction of gap opens [%d]\n", opt->max_gapo);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
299 fprintf(stderr, " -e INT maximum number of gap extensions, -1 for disabling long gaps [-1]\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
300 fprintf(stderr, " -i INT do not put an indel within INT bp towards the ends [%d]\n", opt->indel_end_skip);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
301 fprintf(stderr, " -d INT maximum occurrences for extending a long deletion [%d]\n", opt->max_del_occ);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
302 fprintf(stderr, " -l INT seed length [%d]\n", opt->seed_len);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
303 fprintf(stderr, " -k INT maximum differences in the seed [%d]\n", opt->max_seed_diff);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
304 fprintf(stderr, " -m INT maximum entries in the queue [%d]\n", opt->max_entries);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
305 fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
306 fprintf(stderr, " -M INT mismatch penalty [%d]\n", opt->s_mm);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
307 fprintf(stderr, " -O INT gap open penalty [%d]\n", opt->s_gapo);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
308 fprintf(stderr, " -E INT gap extension penalty [%d]\n", opt->s_gape);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
309 fprintf(stderr, " -R INT stop searching when there are >INT equally best hits [%d]\n", opt->max_top2);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
310 fprintf(stderr, " -q INT quality threshold for read trimming down to %dbp [%d]\n", BWA_MIN_RDLEN, opt->trim_qual);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
311 fprintf(stderr, " -f FILE file to write output to instead of stdout\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
312 fprintf(stderr, " -B INT length of barcode\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
313 // fprintf(stderr, " -c input sequences are in the color space\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
314 fprintf(stderr, " -L log-scaled gap penalty for long deletions\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
315 fprintf(stderr, " -N non-iterative mode: search for all n-difference hits (slooow)\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
316 fprintf(stderr, " -I the input is in the Illumina 1.3+ FASTQ-like format\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
317 fprintf(stderr, " -b the input read file is in the BAM format\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
318 fprintf(stderr, " -0 use single-end reads only (effective with -b)\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
319 fprintf(stderr, " -1 use the 1st read in a pair (effective with -b)\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
320 fprintf(stderr, " -2 use the 2nd read in a pair (effective with -b)\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
321 fprintf(stderr, " -Y filter Casava-filtered sequences\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
322 fprintf(stderr, "\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
323 return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
324 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
325 if (opt->fnr > 0.0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
326 int i, k;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
327 for (i = 17, k = 0; i <= 250; ++i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
328 int l = bwa_cal_maxdiff(i, BWA_AVG_ERR, opt->fnr);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
329 if (l != k) fprintf(stderr, "[bwa_aln] %dbp reads: max_diff = %d\n", i, l);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
330 k = l;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
331 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
332 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
333 if ((prefix = bwa_infer_prefix(argv[optind])) == 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
334 fprintf(stderr, "[%s] fail to locate the index\n", __func__);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
335 free(opt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
336 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
337 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
338 bwa_aln_core(prefix, argv[optind+1], opt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
339 free(opt); free(prefix);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
340 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
341 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
342
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
343 /* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
344 __cigar_op and __cigar_len while keeping stdaln stand alone */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
345 bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
346 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
347 uint32_t *cigar32;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
348 bwa_cigar_t *cigar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
349 int i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
350 cigar32 = aln_path2cigar32((path_t*) path, path_len, n_cigar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
351 cigar = (bwa_cigar_t*)cigar32;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
352 for (i = 0; i < *n_cigar; ++i)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
353 cigar[i] = __cigar_create( (cigar32[i]&0xf), (cigar32[i]>>4) );
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
354 return cigar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
355 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
356