Mercurial > repos > ashvark > qiime_1_8_0
comparison bwa-0.6.2/bwape.c @ 2:a294fbfcb1db draft default tip
Uploaded BWA
author | ashvark |
---|---|
date | Fri, 18 Jul 2014 07:55:59 -0400 |
parents | dd1186b11b3b |
children |
comparison
equal
deleted
inserted
replaced
1:a9636dc1e99a | 2:a294fbfcb1db |
---|---|
1 #include <unistd.h> | |
2 #include <math.h> | |
3 #include <stdlib.h> | |
4 #include <time.h> | |
5 #include <stdio.h> | |
6 #include <string.h> | |
7 #include "bwtaln.h" | |
8 #include "kvec.h" | |
9 #include "bntseq.h" | |
10 #include "utils.h" | |
11 #include "stdaln.h" | |
12 #include "bwase.h" | |
13 | |
14 typedef struct { | |
15 int n; | |
16 bwtint_t *a; | |
17 } poslist_t; | |
18 | |
19 typedef struct { | |
20 double avg, std, ap_prior; | |
21 bwtint_t low, high, high_bayesian; | |
22 } isize_info_t; | |
23 | |
24 typedef struct { | |
25 uint64_t x, y; | |
26 } b128_t; | |
27 | |
28 #define b128_lt(a, b) ((a).x < (b).x) | |
29 #define b128_eq(a, b) ((a).x == (b).x && (a).y == (b).y) | |
30 #define b128_hash(a) ((uint32_t)(a).x) | |
31 | |
32 #include "khash.h" | |
33 KHASH_INIT(b128, b128_t, poslist_t, 1, b128_hash, b128_eq) | |
34 | |
35 #include "ksort.h" | |
36 KSORT_INIT(b128, b128_t, b128_lt) | |
37 KSORT_INIT_GENERIC(uint64_t) | |
38 | |
39 typedef struct { | |
40 kvec_t(b128_t) arr; | |
41 kvec_t(b128_t) pos[2]; | |
42 kvec_t(bwt_aln1_t) aln[2]; | |
43 } pe_data_t; | |
44 | |
45 #define MIN_HASH_WIDTH 1000 | |
46 | |
47 extern int g_log_n[256]; // in bwase.c | |
48 static kh_b128_t *g_hash; | |
49 | |
50 void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_main, int n_multi); | |
51 void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s); | |
52 int bwa_approx_mapQ(const bwa_seq_t *p, int mm); | |
53 void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2); | |
54 bntseq_t *bwa_open_nt(const char *prefix); | |
55 void bwa_print_sam_SQ(const bntseq_t *bns); | |
56 void bwa_print_sam_PG(); | |
57 | |
58 pe_opt_t *bwa_init_pe_opt() | |
59 { | |
60 pe_opt_t *po; | |
61 po = (pe_opt_t*)calloc(1, sizeof(pe_opt_t)); | |
62 po->max_isize = 500; | |
63 po->force_isize = 0; | |
64 po->max_occ = 100000; | |
65 po->n_multi = 3; | |
66 po->N_multi = 10; | |
67 po->type = BWA_PET_STD; | |
68 po->is_sw = 1; | |
69 po->ap_prior = 1e-5; | |
70 return po; | |
71 } | |
72 | |
73 static inline uint64_t hash_64(uint64_t key) | |
74 { | |
75 key += ~(key << 32); | |
76 key ^= (key >> 22); | |
77 key += ~(key << 13); | |
78 key ^= (key >> 8); | |
79 key += (key << 3); | |
80 key ^= (key >> 15); | |
81 key += ~(key << 27); | |
82 key ^= (key >> 31); | |
83 return key; | |
84 } | |
85 /* | |
86 static double ierfc(double x) // inverse erfc(); iphi(x) = M_SQRT2 *ierfc(2 * x); | |
87 { | |
88 const double a = 0.140012; | |
89 double b, c; | |
90 b = log(x * (2 - x)); | |
91 c = 2./M_PI/a + b / 2.; | |
92 return sqrt(sqrt(c * c - b / a) - c); | |
93 } | |
94 */ | |
95 | |
96 // for normal distribution, this is about 3std | |
97 #define OUTLIER_BOUND 2.0 | |
98 | |
99 static int infer_isize(int n_seqs, bwa_seq_t *seqs[2], isize_info_t *ii, double ap_prior, int64_t L) | |
100 { | |
101 uint64_t x, *isizes, n_ap = 0; | |
102 int n, i, tot, p25, p75, p50, max_len = 1, tmp; | |
103 double skewness = 0.0, kurtosis = 0.0, y; | |
104 | |
105 ii->avg = ii->std = -1.0; | |
106 ii->low = ii->high = ii->high_bayesian = 0; | |
107 isizes = (uint64_t*)calloc(n_seqs, 8); | |
108 for (i = 0, tot = 0; i != n_seqs; ++i) { | |
109 bwa_seq_t *p[2]; | |
110 p[0] = seqs[0] + i; p[1] = seqs[1] + i; | |
111 if (p[0]->mapQ >= 20 && p[1]->mapQ >= 20) { | |
112 x = (p[0]->pos < p[1]->pos)? p[1]->pos + p[1]->len - p[0]->pos : p[0]->pos + p[0]->len - p[1]->pos; | |
113 if (x < 100000) isizes[tot++] = x; | |
114 } | |
115 if (p[0]->len > max_len) max_len = p[0]->len; | |
116 if (p[1]->len > max_len) max_len = p[1]->len; | |
117 } | |
118 if (tot < 20) { | |
119 fprintf(stderr, "[infer_isize] fail to infer insert size: too few good pairs\n"); | |
120 free(isizes); | |
121 return -1; | |
122 } | |
123 ks_introsort(uint64_t, tot, isizes); | |
124 p25 = isizes[(int)(tot*0.25 + 0.5)]; | |
125 p50 = isizes[(int)(tot*0.50 + 0.5)]; | |
126 p75 = isizes[(int)(tot*0.75 + 0.5)]; | |
127 tmp = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499); | |
128 ii->low = tmp > max_len? tmp : max_len; // ii->low is unsigned | |
129 ii->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499); | |
130 for (i = 0, x = n = 0; i < tot; ++i) | |
131 if (isizes[i] >= ii->low && isizes[i] <= ii->high) | |
132 ++n, x += isizes[i]; | |
133 ii->avg = (double)x / n; | |
134 for (i = 0; i < tot; ++i) { | |
135 if (isizes[i] >= ii->low && isizes[i] <= ii->high) { | |
136 double tmp = (isizes[i] - ii->avg) * (isizes[i] - ii->avg); | |
137 ii->std += tmp; | |
138 skewness += tmp * (isizes[i] - ii->avg); | |
139 kurtosis += tmp * tmp; | |
140 } | |
141 } | |
142 kurtosis = kurtosis/n / (ii->std / n * ii->std / n) - 3; | |
143 ii->std = sqrt(ii->std / n); // it would be better as n-1, but n is usually very large | |
144 skewness = skewness / n / (ii->std * ii->std * ii->std); | |
145 for (y = 1.0; y < 10.0; y += 0.01) | |
146 if (.5 * erfc(y / M_SQRT2) < ap_prior / L * (y * ii->std + ii->avg)) break; | |
147 ii->high_bayesian = (bwtint_t)(y * ii->std + ii->avg + .499); | |
148 for (i = 0; i < tot; ++i) | |
149 if (isizes[i] > ii->high_bayesian) ++n_ap; | |
150 ii->ap_prior = .01 * (n_ap + .01) / tot; | |
151 if (ii->ap_prior < ap_prior) ii->ap_prior = ap_prior; | |
152 free(isizes); | |
153 fprintf(stderr, "[infer_isize] (25, 50, 75) percentile: (%d, %d, %d)\n", p25, p50, p75); | |
154 if (isnan(ii->std) || p75 > 100000) { | |
155 ii->low = ii->high = ii->high_bayesian = 0; ii->avg = ii->std = -1.0; | |
156 fprintf(stderr, "[infer_isize] fail to infer insert size: weird pairing\n"); | |
157 return -1; | |
158 } | |
159 for (y = 1.0; y < 10.0; y += 0.01) | |
160 if (.5 * erfc(y / M_SQRT2) < ap_prior / L * (y * ii->std + ii->avg)) break; | |
161 ii->high_bayesian = (bwtint_t)(y * ii->std + ii->avg + .499); | |
162 fprintf(stderr, "[infer_isize] low and high boundaries: %ld and %ld for estimating avg and std\n", (long)ii->low, (long)ii->high); | |
163 fprintf(stderr, "[infer_isize] inferred external isize from %d pairs: %.3lf +/- %.3lf\n", n, ii->avg, ii->std); | |
164 fprintf(stderr, "[infer_isize] skewness: %.3lf; kurtosis: %.3lf; ap_prior: %.2e\n", skewness, kurtosis, ii->ap_prior); | |
165 fprintf(stderr, "[infer_isize] inferred maximum insert size: %ld (%.2lf sigma)\n", (long)ii->high_bayesian, y); | |
166 return 0; | |
167 } | |
168 | |
169 static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm, const isize_info_t *ii) | |
170 { | |
171 int i, j, o_n, subo_n, cnt_chg = 0, low_bound = ii->low, max_len; | |
172 uint64_t o_score, subo_score; | |
173 b128_t last_pos[2][2], o_pos[2]; | |
174 max_len = p[0]->full_len; | |
175 if (max_len < p[1]->full_len) max_len = p[1]->full_len; | |
176 if (low_bound < max_len) low_bound = max_len; | |
177 | |
178 // here v>=u. When ii is set, we check insert size with ii; otherwise with opt->max_isize | |
179 #define __pairing_aux(u,v) do { \ | |
180 bwtint_t l = (v).x + p[(v).y&1]->len - ((u).x); \ | |
181 if ((u).x != (uint64_t)-1 && (v).x > (u).x && l >= max_len \ | |
182 && ((ii->high && l <= ii->high_bayesian) || (ii->high == 0 && l <= opt->max_isize))) \ | |
183 { \ | |
184 uint64_t s = d->aln[(v).y&1].a[(v).y>>2].score + d->aln[(u).y&1].a[(u).y>>2].score; \ | |
185 s *= 10; \ | |
186 if (ii->high) s += (int)(-4.343 * log(.5 * erfc(M_SQRT1_2 * fabs(l - ii->avg) / ii->std)) + .499); \ | |
187 s = s<<32 | (uint32_t)hash_64((u).x<<32 | (v).x); \ | |
188 if (s>>32 == o_score>>32) ++o_n; \ | |
189 else if (s>>32 < o_score>>32) { subo_n += o_n; o_n = 1; } \ | |
190 else ++subo_n; \ | |
191 if (s < o_score) subo_score = o_score, o_score = s, o_pos[(u).y&1] = (u), o_pos[(v).y&1] = (v); \ | |
192 else if (s < subo_score) subo_score = s; \ | |
193 } \ | |
194 } while (0) | |
195 | |
196 #define __pairing_aux2(q, w) do { \ | |
197 const bwt_aln1_t *r = d->aln[(w).y&1].a + ((w).y>>2); \ | |
198 (q)->extra_flag |= SAM_FPP; \ | |
199 if ((q)->pos != (w).x || (q)->strand != ((w).y>>1&1)) { \ | |
200 (q)->n_mm = r->n_mm; (q)->n_gapo = r->n_gapo; (q)->n_gape = r->n_gape; (q)->strand = (w).y>>1&1; \ | |
201 (q)->score = r->score; \ | |
202 (q)->pos = (w).x; \ | |
203 if ((q)->mapQ > 0) ++cnt_chg; \ | |
204 } \ | |
205 } while (0) | |
206 | |
207 o_score = subo_score = (uint64_t)-1; | |
208 o_n = subo_n = 0; | |
209 ks_introsort(b128, d->arr.n, d->arr.a); | |
210 for (j = 0; j < 2; ++j) last_pos[j][0].x = last_pos[j][0].y = last_pos[j][1].x = last_pos[j][1].y = (uint64_t)-1; | |
211 if (opt->type == BWA_PET_STD) { | |
212 for (i = 0; i < d->arr.n; ++i) { | |
213 b128_t x = d->arr.a[i]; | |
214 int strand = x.y>>1&1; | |
215 if (strand == 1) { // reverse strand, then check | |
216 int y = 1 - (x.y&1); | |
217 __pairing_aux(last_pos[y][1], x); | |
218 __pairing_aux(last_pos[y][0], x); | |
219 } else { // forward strand, then push | |
220 last_pos[x.y&1][0] = last_pos[x.y&1][1]; | |
221 last_pos[x.y&1][1] = x; | |
222 } | |
223 } | |
224 } else if (opt->type == BWA_PET_SOLID) { | |
225 for (i = 0; i < d->arr.n; ++i) { | |
226 b128_t x = d->arr.a[i]; | |
227 int strand = x.y>>1&1; | |
228 if ((strand^x.y)&1) { // push | |
229 int y = 1 - (x.y&1); | |
230 __pairing_aux(last_pos[y][1], x); | |
231 __pairing_aux(last_pos[y][0], x); | |
232 } else { // check | |
233 last_pos[x.y&1][0] = last_pos[x.y&1][1]; | |
234 last_pos[x.y&1][1] = x; | |
235 } | |
236 } | |
237 } else { | |
238 fprintf(stderr, "[paring] not implemented yet!\n"); | |
239 exit(1); | |
240 } | |
241 // set pairing | |
242 //fprintf(stderr, "[%ld, %d, %d, %d]\n", d->arr.n, (int)(o_score>>32), (int)(subo_score>>32), o_n); | |
243 if (o_score != (uint64_t)-1) { | |
244 int mapQ_p = 0; // this is the maximum mapping quality when one end is moved | |
245 //fprintf(stderr, "%d, %d\n", o_n, subo_n); | |
246 if (o_n == 1) { | |
247 if (subo_score == (uint64_t)-1) mapQ_p = 29; // no sub-optimal pair | |
248 else if ((subo_score>>32) - (o_score>>32) > s_mm * 10) mapQ_p = 23; // poor sub-optimal pair | |
249 else { | |
250 int n = subo_n > 255? 255 : subo_n; | |
251 mapQ_p = ((subo_score>>32) - (o_score>>32)) / 2 - g_log_n[n]; | |
252 if (mapQ_p < 0) mapQ_p = 0; | |
253 } | |
254 } | |
255 if ((p[0]->pos == o_pos[0].x && p[0]->strand == (o_pos[0].y>>1&1)) && (p[1]->pos == o_pos[1].x && p[1]->strand == (o_pos[1].y>>1&1))) { // both ends not moved | |
256 if (p[0]->mapQ > 0 && p[1]->mapQ > 0) { | |
257 int mapQ = p[0]->mapQ + p[1]->mapQ; | |
258 if (mapQ > 60) mapQ = 60; | |
259 p[0]->mapQ = p[1]->mapQ = mapQ; | |
260 } else { | |
261 if (p[0]->mapQ == 0) p[0]->mapQ = (mapQ_p + 7 < p[1]->mapQ)? mapQ_p + 7 : p[1]->mapQ; | |
262 if (p[1]->mapQ == 0) p[1]->mapQ = (mapQ_p + 7 < p[0]->mapQ)? mapQ_p + 7 : p[0]->mapQ; | |
263 } | |
264 } else if (p[0]->pos == o_pos[0].x && p[0]->strand == (o_pos[0].y>>1&1)) { // [1] moved | |
265 p[1]->seQ = 0; p[1]->mapQ = p[0]->mapQ; | |
266 if (p[1]->mapQ > mapQ_p) p[1]->mapQ = mapQ_p; | |
267 } else if (p[1]->pos == o_pos[1].x && p[1]->strand == (o_pos[1].y>>1&1)) { // [0] moved | |
268 p[0]->seQ = 0; p[0]->mapQ = p[1]->mapQ; | |
269 if (p[0]->mapQ > mapQ_p) p[0]->mapQ = mapQ_p; | |
270 } else { // both ends moved | |
271 p[0]->seQ = p[1]->seQ = 0; | |
272 mapQ_p -= 20; | |
273 if (mapQ_p < 0) mapQ_p = 0; | |
274 p[0]->mapQ = p[1]->mapQ = mapQ_p; | |
275 } | |
276 __pairing_aux2(p[0], o_pos[0]); | |
277 __pairing_aux2(p[1], o_pos[1]); | |
278 } | |
279 return cnt_chg; | |
280 } | |
281 | |
282 typedef struct { | |
283 kvec_t(bwt_aln1_t) aln; | |
284 } aln_buf_t; | |
285 | |
286 int bwa_cal_pac_pos_pe(const bntseq_t *bns, const char *prefix, bwt_t *const _bwt, int n_seqs, bwa_seq_t *seqs[2], FILE *fp_sa[2], isize_info_t *ii, | |
287 const pe_opt_t *opt, const gap_opt_t *gopt, const isize_info_t *last_ii) | |
288 { | |
289 int i, j, cnt_chg = 0; | |
290 char str[1024]; | |
291 bwt_t *bwt; | |
292 pe_data_t *d; | |
293 aln_buf_t *buf[2]; | |
294 | |
295 d = (pe_data_t*)calloc(1, sizeof(pe_data_t)); | |
296 buf[0] = (aln_buf_t*)calloc(n_seqs, sizeof(aln_buf_t)); | |
297 buf[1] = (aln_buf_t*)calloc(n_seqs, sizeof(aln_buf_t)); | |
298 | |
299 if (_bwt == 0) { // load forward SA | |
300 strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); | |
301 strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt); | |
302 } else bwt = _bwt; | |
303 | |
304 // SE | |
305 for (i = 0; i != n_seqs; ++i) { | |
306 bwa_seq_t *p[2]; | |
307 for (j = 0; j < 2; ++j) { | |
308 int n_aln; | |
309 p[j] = seqs[j] + i; | |
310 p[j]->n_multi = 0; | |
311 p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2); | |
312 fread(&n_aln, 4, 1, fp_sa[j]); | |
313 if (n_aln > kv_max(d->aln[j])) | |
314 kv_resize(bwt_aln1_t, d->aln[j], n_aln); | |
315 d->aln[j].n = n_aln; | |
316 fread(d->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]); | |
317 kv_copy(bwt_aln1_t, buf[j][i].aln, d->aln[j]); // backup d->aln[j] | |
318 // generate SE alignment and mapping quality | |
319 bwa_aln2seq(n_aln, d->aln[j].a, p[j]); | |
320 if (p[j]->type == BWA_TYPE_UNIQUE || p[j]->type == BWA_TYPE_REPEAT) { | |
321 int strand; | |
322 int max_diff = gopt->fnr > 0.0? bwa_cal_maxdiff(p[j]->len, BWA_AVG_ERR, gopt->fnr) : gopt->max_diff; | |
323 p[j]->seQ = p[j]->mapQ = bwa_approx_mapQ(p[j], max_diff); | |
324 p[j]->pos = bwa_sa2pos(bns, bwt, p[j]->sa, p[j]->len, &strand); | |
325 p[j]->strand = strand; | |
326 } | |
327 } | |
328 } | |
329 | |
330 // infer isize | |
331 infer_isize(n_seqs, seqs, ii, opt->ap_prior, bwt->seq_len/2); | |
332 if (ii->avg < 0.0 && last_ii->avg > 0.0) *ii = *last_ii; | |
333 if (opt->force_isize) { | |
334 fprintf(stderr, "[%s] discard insert size estimate as user's request.\n", __func__); | |
335 ii->low = ii->high = 0; ii->avg = ii->std = -1.0; | |
336 } | |
337 | |
338 // PE | |
339 for (i = 0; i != n_seqs; ++i) { | |
340 bwa_seq_t *p[2]; | |
341 for (j = 0; j < 2; ++j) { | |
342 p[j] = seqs[j] + i; | |
343 kv_copy(bwt_aln1_t, d->aln[j], buf[j][i].aln); | |
344 } | |
345 if ((p[0]->type == BWA_TYPE_UNIQUE || p[0]->type == BWA_TYPE_REPEAT) | |
346 && (p[1]->type == BWA_TYPE_UNIQUE || p[1]->type == BWA_TYPE_REPEAT)) | |
347 { // only when both ends mapped | |
348 b128_t x; | |
349 int j, k; | |
350 long long n_occ[2]; | |
351 for (j = 0; j < 2; ++j) { | |
352 n_occ[j] = 0; | |
353 for (k = 0; k < d->aln[j].n; ++k) | |
354 n_occ[j] += d->aln[j].a[k].l - d->aln[j].a[k].k + 1; | |
355 } | |
356 if (n_occ[0] > opt->max_occ || n_occ[1] > opt->max_occ) continue; | |
357 d->arr.n = 0; | |
358 for (j = 0; j < 2; ++j) { | |
359 for (k = 0; k < d->aln[j].n; ++k) { | |
360 bwt_aln1_t *r = d->aln[j].a + k; | |
361 bwtint_t l; | |
362 if (0 && r->l - r->k + 1 >= MIN_HASH_WIDTH) { // then check hash table | |
363 b128_t key; | |
364 int ret; | |
365 key.x = r->k; key.y = r->l; | |
366 khint_t iter = kh_put(b128, g_hash, key, &ret); | |
367 if (ret) { // not in the hash table; ret must equal 1 as we never remove elements | |
368 poslist_t *z = &kh_val(g_hash, iter); | |
369 z->n = r->l - r->k + 1; | |
370 z->a = (bwtint_t*)malloc(sizeof(bwtint_t) * z->n); | |
371 for (l = r->k; l <= r->l; ++l) { | |
372 int strand; | |
373 z->a[l - r->k] = bwa_sa2pos(bns, bwt, l, p[j]->len, &strand)<<1; | |
374 z->a[l - r->k] |= strand; | |
375 } | |
376 } | |
377 for (l = 0; l < kh_val(g_hash, iter).n; ++l) { | |
378 x.x = kh_val(g_hash, iter).a[l]>>1; | |
379 x.y = k<<2 | (kh_val(g_hash, iter).a[l]&1)<<1 | j; | |
380 kv_push(b128_t, d->arr, x); | |
381 } | |
382 } else { // then calculate on the fly | |
383 for (l = r->k; l <= r->l; ++l) { | |
384 int strand; | |
385 x.x = bwa_sa2pos(bns, bwt, l, p[j]->len, &strand); | |
386 x.y = k<<2 | strand<<1 | j; | |
387 kv_push(b128_t, d->arr, x); | |
388 } | |
389 } | |
390 } | |
391 } | |
392 cnt_chg += pairing(p, d, opt, gopt->s_mm, ii); | |
393 } | |
394 | |
395 if (opt->N_multi || opt->n_multi) { | |
396 for (j = 0; j < 2; ++j) { | |
397 if (p[j]->type != BWA_TYPE_NO_MATCH) { | |
398 int k, n_multi; | |
399 if (!(p[j]->extra_flag&SAM_FPP) && p[1-j]->type != BWA_TYPE_NO_MATCH) { | |
400 bwa_aln2seq_core(d->aln[j].n, d->aln[j].a, p[j], 0, p[j]->c1+p[j]->c2-1 > opt->N_multi? opt->n_multi : opt->N_multi); | |
401 } else bwa_aln2seq_core(d->aln[j].n, d->aln[j].a, p[j], 0, opt->n_multi); | |
402 for (k = 0, n_multi = 0; k < p[j]->n_multi; ++k) { | |
403 int strand; | |
404 bwt_multi1_t *q = p[j]->multi + k; | |
405 q->pos = bwa_sa2pos(bns, bwt, q->pos, p[j]->len, &strand); | |
406 q->strand = strand; | |
407 if (q->pos != p[j]->pos) | |
408 p[j]->multi[n_multi++] = *q; | |
409 } | |
410 p[j]->n_multi = n_multi; | |
411 } | |
412 } | |
413 } | |
414 } | |
415 | |
416 // free | |
417 for (i = 0; i < n_seqs; ++i) { | |
418 kv_destroy(buf[0][i].aln); | |
419 kv_destroy(buf[1][i].aln); | |
420 } | |
421 free(buf[0]); free(buf[1]); | |
422 if (_bwt == 0) bwt_destroy(bwt); | |
423 kv_destroy(d->arr); | |
424 kv_destroy(d->pos[0]); kv_destroy(d->pos[1]); | |
425 kv_destroy(d->aln[0]); kv_destroy(d->aln[1]); | |
426 free(d); | |
427 return cnt_chg; | |
428 } | |
429 | |
430 #define SW_MIN_MATCH_LEN 20 | |
431 #define SW_MIN_MAPQ 17 | |
432 | |
433 // cnt = n_mm<<16 | n_gapo<<8 | n_gape | |
434 bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen, | |
435 int *n_cigar, uint32_t *_cnt) | |
436 { | |
437 bwa_cigar_t *cigar = 0; | |
438 ubyte_t *ref_seq; | |
439 bwtint_t k, x, y, l; | |
440 int path_len, ret, subo; | |
441 AlnParam ap = aln_param_bwa; | |
442 path_t *path, *p; | |
443 | |
444 // check whether there are too many N's | |
445 if (reglen < SW_MIN_MATCH_LEN || (int64_t)l_pac - *beg < len) return 0; | |
446 for (k = 0, x = 0; k < len; ++k) | |
447 if (seq[k] >= 4) ++x; | |
448 if ((float)x/len >= 0.25 || len - x < SW_MIN_MATCH_LEN) return 0; | |
449 | |
450 // get reference subsequence | |
451 ref_seq = (ubyte_t*)calloc(reglen, 1); | |
452 for (k = *beg, l = 0; l < reglen && k < l_pac; ++k) | |
453 ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3; | |
454 path = (path_t*)calloc(l+len, sizeof(path_t)); | |
455 | |
456 // do alignment | |
457 ret = aln_local_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len, 1, &subo); | |
458 if (ret < 0 || subo == ret) { // no hit or tandem hits | |
459 free(path); free(cigar); free(ref_seq); *n_cigar = 0; | |
460 return 0; | |
461 } | |
462 cigar = bwa_aln_path2cigar(path, path_len, n_cigar); | |
463 | |
464 // check whether the alignment is good enough | |
465 for (k = 0, x = y = 0; k < *n_cigar; ++k) { | |
466 bwa_cigar_t c = cigar[k]; | |
467 if (__cigar_op(c) == FROM_M) x += __cigar_len(c), y += __cigar_len(c); | |
468 else if (__cigar_op(c) == FROM_D) x += __cigar_len(c); | |
469 else y += __cigar_len(c); | |
470 } | |
471 if (x < SW_MIN_MATCH_LEN || y < SW_MIN_MATCH_LEN) { // not good enough | |
472 free(path); free(cigar); free(ref_seq); | |
473 *n_cigar = 0; | |
474 return 0; | |
475 } | |
476 | |
477 { // update cigar and coordinate; | |
478 int start, end; | |
479 p = path + path_len - 1; | |
480 *beg += (p->i? p->i : 1) - 1; | |
481 start = (p->j? p->j : 1) - 1; | |
482 end = path->j; | |
483 cigar = (bwa_cigar_t*)realloc(cigar, sizeof(bwa_cigar_t) * (*n_cigar + 2)); | |
484 if (start) { | |
485 memmove(cigar + 1, cigar, sizeof(bwa_cigar_t) * (*n_cigar)); | |
486 cigar[0] = __cigar_create(3, start); | |
487 ++(*n_cigar); | |
488 } | |
489 if (end < len) { | |
490 /*cigar[*n_cigar] = 3<<14 | (len - end);*/ | |
491 cigar[*n_cigar] = __cigar_create(3, (len - end)); | |
492 ++(*n_cigar); | |
493 } | |
494 } | |
495 | |
496 { // set *cnt | |
497 int n_mm, n_gapo, n_gape; | |
498 n_mm = n_gapo = n_gape = 0; | |
499 p = path + path_len - 1; | |
500 x = p->i? p->i - 1 : 0; y = p->j? p->j - 1 : 0; | |
501 for (k = 0; k < *n_cigar; ++k) { | |
502 bwa_cigar_t c = cigar[k]; | |
503 if (__cigar_op(c) == FROM_M) { | |
504 for (l = 0; l < (__cigar_len(c)); ++l) | |
505 if (ref_seq[x+l] < 4 && seq[y+l] < 4 && ref_seq[x+l] != seq[y+l]) ++n_mm; | |
506 x += __cigar_len(c), y += __cigar_len(c); | |
507 } else if (__cigar_op(c) == FROM_D) { | |
508 x += __cigar_len(c), ++n_gapo, n_gape += (__cigar_len(c)) - 1; | |
509 } else if (__cigar_op(c) == FROM_I) { | |
510 y += __cigar_len(c), ++n_gapo, n_gape += (__cigar_len(c)) - 1; | |
511 } | |
512 } | |
513 *_cnt = (uint32_t)n_mm<<16 | n_gapo<<8 | n_gape; | |
514 } | |
515 | |
516 free(ref_seq); free(path); | |
517 return cigar; | |
518 } | |
519 | |
520 ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs, bwa_seq_t *seqs[2], const pe_opt_t *popt, const isize_info_t *ii) | |
521 { | |
522 ubyte_t *pacseq; | |
523 int i; | |
524 uint64_t n_tot[2], n_mapped[2]; | |
525 | |
526 // load reference sequence | |
527 if (_pacseq == 0) { | |
528 pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1); | |
529 rewind(bns->fp_pac); | |
530 fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac); | |
531 } else pacseq = (ubyte_t*)_pacseq; | |
532 if (!popt->is_sw || ii->avg < 0.0) return pacseq; | |
533 | |
534 // perform mate alignment | |
535 n_tot[0] = n_tot[1] = n_mapped[0] = n_mapped[1] = 0; | |
536 for (i = 0; i != n_seqs; ++i) { | |
537 bwa_seq_t *p[2]; | |
538 p[0] = seqs[0] + i; p[1] = seqs[1] + i; | |
539 if ((p[0]->mapQ >= SW_MIN_MAPQ || p[1]->mapQ >= SW_MIN_MAPQ) && (p[0]->extra_flag&SAM_FPP) == 0) { // unpaired and one read has high mapQ | |
540 int k, n_cigar[2], is_singleton, mapQ = 0, mq_adjust[2]; | |
541 int64_t beg[2], end[2]; | |
542 bwa_cigar_t *cigar[2]; | |
543 uint32_t cnt[2]; | |
544 | |
545 /* In the following, _pref points to the reference read | |
546 * which must be aligned; _pmate points to its mate which is | |
547 * considered to be modified. */ | |
548 | |
549 #define __set_rght_coor(_a, _b, _pref, _pmate) do { \ | |
550 (_a) = (int64_t)_pref->pos + ii->avg - 3 * ii->std - _pmate->len * 1.5; \ | |
551 (_b) = (_a) + 6 * ii->std + 2 * _pmate->len; \ | |
552 if ((_a) < (int64_t)_pref->pos + _pref->len) (_a) = _pref->pos + _pref->len; \ | |
553 if ((_b) > bns->l_pac) (_b) = bns->l_pac; \ | |
554 } while (0) | |
555 | |
556 #define __set_left_coor(_a, _b, _pref, _pmate) do { \ | |
557 (_a) = (int64_t)_pref->pos + _pref->len - ii->avg - 3 * ii->std - _pmate->len * 0.5; \ | |
558 (_b) = (_a) + 6 * ii->std + 2 * _pmate->len; \ | |
559 if ((_a) < 0) (_a) = 0; \ | |
560 if ((_b) > _pref->pos) (_b) = _pref->pos; \ | |
561 } while (0) | |
562 | |
563 #define __set_fixed(_pref, _pmate, _beg, _cnt) do { \ | |
564 _pmate->type = BWA_TYPE_MATESW; \ | |
565 _pmate->pos = _beg; \ | |
566 _pmate->seQ = _pref->seQ; \ | |
567 _pmate->strand = (popt->type == BWA_PET_STD)? 1 - _pref->strand : _pref->strand; \ | |
568 _pmate->n_mm = _cnt>>16; _pmate->n_gapo = _cnt>>8&0xff; _pmate->n_gape = _cnt&0xff; \ | |
569 _pmate->extra_flag |= SAM_FPP; \ | |
570 _pref->extra_flag |= SAM_FPP; \ | |
571 } while (0) | |
572 | |
573 mq_adjust[0] = mq_adjust[1] = 255; // not effective | |
574 is_singleton = (p[0]->type == BWA_TYPE_NO_MATCH || p[1]->type == BWA_TYPE_NO_MATCH)? 1 : 0; | |
575 | |
576 ++n_tot[is_singleton]; | |
577 cigar[0] = cigar[1] = 0; | |
578 n_cigar[0] = n_cigar[1] = 0; | |
579 if (popt->type != BWA_PET_STD && popt->type != BWA_PET_SOLID) continue; // other types of pairing is not considered | |
580 for (k = 0; k < 2; ++k) { // p[1-k] is the reference read and p[k] is the read considered to be modified | |
581 ubyte_t *seq; | |
582 if (p[1-k]->type == BWA_TYPE_NO_MATCH) continue; // if p[1-k] is unmapped, skip | |
583 if (popt->type == BWA_PET_STD) { | |
584 if (p[1-k]->strand == 0) { // then the mate is on the reverse strand and has larger coordinate | |
585 __set_rght_coor(beg[k], end[k], p[1-k], p[k]); | |
586 seq = p[k]->rseq; | |
587 } else { // then the mate is on forward stand and has smaller coordinate | |
588 __set_left_coor(beg[k], end[k], p[1-k], p[k]); | |
589 seq = p[k]->seq; | |
590 seq_reverse(p[k]->len, seq, 0); // because ->seq is reversed; this will reversed back shortly | |
591 } | |
592 } else { // BWA_PET_SOLID | |
593 if (p[1-k]->strand == 0) { // R3-F3 pairing | |
594 if (k == 0) __set_left_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is R3 | |
595 else __set_rght_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is F3 | |
596 seq = p[k]->rseq; | |
597 seq_reverse(p[k]->len, seq, 0); // because ->seq is reversed | |
598 } else { // F3-R3 pairing | |
599 if (k == 0) __set_rght_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is R3 | |
600 else __set_left_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is F3 | |
601 seq = p[k]->seq; | |
602 } | |
603 } | |
604 // perform SW alignment | |
605 cigar[k] = bwa_sw_core(bns->l_pac, pacseq, p[k]->len, seq, &beg[k], end[k] - beg[k], &n_cigar[k], &cnt[k]); | |
606 if (cigar[k] && p[k]->type != BWA_TYPE_NO_MATCH) { // re-evaluate cigar[k] | |
607 int s_old, clip = 0, s_new; | |
608 if (__cigar_op(cigar[k][0]) == 3) clip += __cigar_len(cigar[k][0]); | |
609 if (__cigar_op(cigar[k][n_cigar[k]-1]) == 3) clip += __cigar_len(cigar[k][n_cigar[k]-1]); | |
610 s_old = (int)((p[k]->n_mm * 9 + p[k]->n_gapo * 13 + p[k]->n_gape * 2) / 3. * 8. + .499); | |
611 s_new = (int)(((cnt[k]>>16) * 9 + (cnt[k]>>8&0xff) * 13 + (cnt[k]&0xff) * 2 + clip * 3) / 3. * 8. + .499); | |
612 s_old += -4.343 * log(ii->ap_prior / bns->l_pac); | |
613 s_new += (int)(-4.343 * log(.5 * erfc(M_SQRT1_2 * 1.5) + .499)); // assume the mapped isize is 1.5\sigma | |
614 if (s_old < s_new) { // reject SW alignment | |
615 mq_adjust[k] = s_new - s_old; | |
616 free(cigar[k]); cigar[k] = 0; n_cigar[k] = 0; | |
617 } else mq_adjust[k] = s_old - s_new; | |
618 } | |
619 // now revserse sequence back such that p[*]->seq looks untouched | |
620 if (popt->type == BWA_PET_STD) { | |
621 if (p[1-k]->strand == 1) seq_reverse(p[k]->len, seq, 0); | |
622 } else { | |
623 if (p[1-k]->strand == 0) seq_reverse(p[k]->len, seq, 0); | |
624 } | |
625 } | |
626 k = -1; // no read to be changed | |
627 if (cigar[0] && cigar[1]) { | |
628 k = p[0]->mapQ < p[1]->mapQ? 0 : 1; // p[k] to be fixed | |
629 mapQ = abs(p[1]->mapQ - p[0]->mapQ); | |
630 } else if (cigar[0]) k = 0, mapQ = p[1]->mapQ; | |
631 else if (cigar[1]) k = 1, mapQ = p[0]->mapQ; | |
632 if (k >= 0 && p[k]->pos != beg[k]) { | |
633 ++n_mapped[is_singleton]; | |
634 { // recalculate mapping quality | |
635 int tmp = (int)p[1-k]->mapQ - p[k]->mapQ/2 - 8; | |
636 if (tmp <= 0) tmp = 1; | |
637 if (mapQ > tmp) mapQ = tmp; | |
638 p[k]->mapQ = p[1-k]->mapQ = mapQ; | |
639 p[k]->seQ = p[1-k]->seQ = p[1-k]->seQ < mapQ? p[1-k]->seQ : mapQ; | |
640 if (p[k]->mapQ > mq_adjust[k]) p[k]->mapQ = mq_adjust[k]; | |
641 if (p[k]->seQ > mq_adjust[k]) p[k]->seQ = mq_adjust[k]; | |
642 } | |
643 // update CIGAR | |
644 free(p[k]->cigar); p[k]->cigar = cigar[k]; cigar[k] = 0; | |
645 p[k]->n_cigar = n_cigar[k]; | |
646 // update the rest of information | |
647 __set_fixed(p[1-k], p[k], beg[k], cnt[k]); | |
648 } | |
649 free(cigar[0]); free(cigar[1]); | |
650 } | |
651 } | |
652 fprintf(stderr, "[bwa_paired_sw] %lld out of %lld Q%d singletons are mated.\n", | |
653 (long long)n_mapped[1], (long long)n_tot[1], SW_MIN_MAPQ); | |
654 fprintf(stderr, "[bwa_paired_sw] %lld out of %lld Q%d discordant pairs are fixed.\n", | |
655 (long long)n_mapped[0], (long long)n_tot[0], SW_MIN_MAPQ); | |
656 return pacseq; | |
657 } | |
658 | |
659 void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt) | |
660 { | |
661 extern bwa_seqio_t *bwa_open_reads(int mode, const char *fn_fa); | |
662 int i, j, n_seqs, tot_seqs = 0; | |
663 bwa_seq_t *seqs[2]; | |
664 bwa_seqio_t *ks[2]; | |
665 clock_t t; | |
666 bntseq_t *bns, *ntbns = 0; | |
667 FILE *fp_sa[2]; | |
668 gap_opt_t opt, opt0; | |
669 khint_t iter; | |
670 isize_info_t last_ii; // this is for the last batch of reads | |
671 char str[1024]; | |
672 bwt_t *bwt; | |
673 uint8_t *pac; | |
674 | |
675 // initialization | |
676 bwase_initialize(); // initialize g_log_n[] in bwase.c | |
677 pac = 0; bwt = 0; | |
678 for (i = 1; i != 256; ++i) g_log_n[i] = (int)(4.343 * log(i) + 0.5); | |
679 bns = bns_restore(prefix); | |
680 srand48(bns->seed); | |
681 fp_sa[0] = xopen(fn_sa[0], "r"); | |
682 fp_sa[1] = xopen(fn_sa[1], "r"); | |
683 g_hash = kh_init(b128); | |
684 last_ii.avg = -1.0; | |
685 | |
686 fread(&opt, sizeof(gap_opt_t), 1, fp_sa[0]); | |
687 ks[0] = bwa_open_reads(opt.mode, fn_fa[0]); | |
688 opt0 = opt; | |
689 fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]); // overwritten! | |
690 ks[1] = bwa_open_reads(opt.mode, fn_fa[1]); | |
691 if (!(opt.mode & BWA_MODE_COMPREAD)) { | |
692 popt->type = BWA_PET_SOLID; | |
693 ntbns = bwa_open_nt(prefix); | |
694 } else { // for Illumina alignment only | |
695 if (popt->is_preload) { | |
696 strcpy(str, prefix); strcat(str, ".bwt"); bwt = bwt_restore_bwt(str); | |
697 strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt); | |
698 pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1); | |
699 rewind(bns->fp_pac); | |
700 fread(pac, 1, bns->l_pac/4+1, bns->fp_pac); | |
701 } | |
702 } | |
703 | |
704 // core loop | |
705 bwa_print_sam_SQ(bns); | |
706 bwa_print_sam_PG(); | |
707 while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, opt0.mode, opt0.trim_qual)) != 0) { | |
708 int cnt_chg; | |
709 isize_info_t ii; | |
710 ubyte_t *pacseq; | |
711 | |
712 seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, opt.mode, opt.trim_qual); | |
713 tot_seqs += n_seqs; | |
714 t = clock(); | |
715 | |
716 fprintf(stderr, "[bwa_sai2sam_pe_core] convert to sequence coordinate... \n"); | |
717 cnt_chg = bwa_cal_pac_pos_pe(bns, prefix, bwt, n_seqs, seqs, fp_sa, &ii, popt, &opt, &last_ii); | |
718 fprintf(stderr, "[bwa_sai2sam_pe_core] time elapses: %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); | |
719 fprintf(stderr, "[bwa_sai2sam_pe_core] changing coordinates of %d alignments.\n", cnt_chg); | |
720 | |
721 fprintf(stderr, "[bwa_sai2sam_pe_core] align unmapped mate...\n"); | |
722 pacseq = bwa_paired_sw(bns, pac, n_seqs, seqs, popt, &ii); | |
723 fprintf(stderr, "[bwa_sai2sam_pe_core] time elapses: %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); | |
724 | |
725 fprintf(stderr, "[bwa_sai2sam_pe_core] refine gapped alignments... "); | |
726 for (j = 0; j < 2; ++j) | |
727 bwa_refine_gapped(bns, n_seqs, seqs[j], pacseq, ntbns); | |
728 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); | |
729 if (pac == 0) free(pacseq); | |
730 | |
731 fprintf(stderr, "[bwa_sai2sam_pe_core] print alignments... "); | |
732 for (i = 0; i < n_seqs; ++i) { | |
733 bwa_seq_t *p[2]; | |
734 p[0] = seqs[0] + i; p[1] = seqs[1] + i; | |
735 if (p[0]->bc[0] || p[1]->bc[0]) { | |
736 strcat(p[0]->bc, p[1]->bc); | |
737 strcpy(p[1]->bc, p[0]->bc); | |
738 } | |
739 bwa_print_sam1(bns, p[0], p[1], opt.mode, opt.max_top2); | |
740 bwa_print_sam1(bns, p[1], p[0], opt.mode, opt.max_top2); | |
741 } | |
742 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock(); | |
743 | |
744 for (j = 0; j < 2; ++j) | |
745 bwa_free_read_seq(n_seqs, seqs[j]); | |
746 fprintf(stderr, "[bwa_sai2sam_pe_core] %d sequences have been processed.\n", tot_seqs); | |
747 last_ii = ii; | |
748 } | |
749 | |
750 // destroy | |
751 bns_destroy(bns); | |
752 if (ntbns) bns_destroy(ntbns); | |
753 for (i = 0; i < 2; ++i) { | |
754 bwa_seq_close(ks[i]); | |
755 fclose(fp_sa[i]); | |
756 } | |
757 for (iter = kh_begin(g_hash); iter != kh_end(g_hash); ++iter) | |
758 if (kh_exist(g_hash, iter)) free(kh_val(g_hash, iter).a); | |
759 kh_destroy(b128, g_hash); | |
760 if (pac) { | |
761 free(pac); bwt_destroy(bwt); | |
762 } | |
763 } | |
764 | |
765 int bwa_sai2sam_pe(int argc, char *argv[]) | |
766 { | |
767 extern char *bwa_rg_line, *bwa_rg_id; | |
768 extern int bwa_set_rg(const char *s); | |
769 extern char *bwa_infer_prefix(const char *hint); | |
770 int c; | |
771 pe_opt_t *popt; | |
772 char *prefix; | |
773 | |
774 popt = bwa_init_pe_opt(); | |
775 while ((c = getopt(argc, argv, "a:o:sPn:N:c:f:Ar:")) >= 0) { | |
776 switch (c) { | |
777 case 'r': | |
778 if (bwa_set_rg(optarg) < 0) { | |
779 fprintf(stderr, "[%s] malformated @RG line\n", __func__); | |
780 return 1; | |
781 } | |
782 break; | |
783 case 'a': popt->max_isize = atoi(optarg); break; | |
784 case 'o': popt->max_occ = atoi(optarg); break; | |
785 case 's': popt->is_sw = 0; break; | |
786 case 'P': popt->is_preload = 1; break; | |
787 case 'n': popt->n_multi = atoi(optarg); break; | |
788 case 'N': popt->N_multi = atoi(optarg); break; | |
789 case 'c': popt->ap_prior = atof(optarg); break; | |
790 case 'f': xreopen(optarg, "w", stdout); break; | |
791 case 'A': popt->force_isize = 1; break; | |
792 default: return 1; | |
793 } | |
794 } | |
795 | |
796 if (optind + 5 > argc) { | |
797 fprintf(stderr, "\n"); | |
798 fprintf(stderr, "Usage: bwa sampe [options] <prefix> <in1.sai> <in2.sai> <in1.fq> <in2.fq>\n\n"); | |
799 fprintf(stderr, "Options: -a INT maximum insert size [%d]\n", popt->max_isize); | |
800 fprintf(stderr, " -o INT maximum occurrences for one end [%d]\n", popt->max_occ); | |
801 fprintf(stderr, " -n INT maximum hits to output for paired reads [%d]\n", popt->n_multi); | |
802 fprintf(stderr, " -N INT maximum hits to output for discordant pairs [%d]\n", popt->N_multi); | |
803 fprintf(stderr, " -c FLOAT prior of chimeric rate (lower bound) [%.1le]\n", popt->ap_prior); | |
804 fprintf(stderr, " -f FILE sam file to output results to [stdout]\n"); | |
805 fprintf(stderr, " -r STR read group header line such as `@RG\\tID:foo\\tSM:bar' [null]\n"); | |
806 fprintf(stderr, " -P preload index into memory (for base-space reads only)\n"); | |
807 fprintf(stderr, " -s disable Smith-Waterman for the unmapped mate\n"); | |
808 fprintf(stderr, " -A disable insert size estimate (force -s)\n\n"); | |
809 fprintf(stderr, "Notes: 1. For SOLiD reads, <in1.fq> corresponds R3 reads and <in2.fq> to F3.\n"); | |
810 fprintf(stderr, " 2. For reads shorter than 30bp, applying a smaller -o is recommended to\n"); | |
811 fprintf(stderr, " to get a sensible speed at the cost of pairing accuracy.\n"); | |
812 fprintf(stderr, "\n"); | |
813 return 1; | |
814 } | |
815 if ((prefix = bwa_infer_prefix(argv[optind])) == 0) { | |
816 fprintf(stderr, "[%s] fail to locate the index\n", __func__); | |
817 free(bwa_rg_line); free(bwa_rg_id); | |
818 return 0; | |
819 } | |
820 bwa_sai2sam_pe_core(prefix, argv + optind + 1, argv + optind+3, popt); | |
821 free(bwa_rg_line); free(bwa_rg_id); free(prefix); | |
822 free(popt); | |
823 return 0; | |
824 } |