annotate pyPRADA_1.2/tools/bwa-0.5.7-mh/bwape.c @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 #include <unistd.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2 #include <math.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 #include <stdlib.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4 #include <time.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 #include <stdio.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 #include <string.h>
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 #include "bwtaln.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 #include "kvec.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 #include "bntseq.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 #include "utils.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 #include "stdaln.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 int n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15 bwtint_t *a;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 } poslist_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 double avg, std;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 bwtint_t low, high, high_bayesian;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 } isize_info_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 #include "khash.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 KHASH_MAP_INIT_INT64(64, poslist_t)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 #include "ksort.h"
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27 KSORT_INIT_GENERIC(uint64_t)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 kvec_t(uint64_t) arr;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 kvec_t(uint64_t) pos[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 kvec_t(bwt_aln1_t) aln[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 } pe_data_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 #define MIN_HASH_WIDTH 1000
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 static int g_log_n[256];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 static kh_64_t *g_hash;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 void bwa_aln2seq_core(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s, int set_main, int n_multi);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 void bwa_aln2seq(int n_aln, const bwt_aln1_t *aln, bwa_seq_t *s);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 void bwa_refine_gapped(const bntseq_t *bns, int n_seqs, bwa_seq_t *seqs, ubyte_t *_pacseq, bntseq_t *ntbns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 int bwa_approx_mapQ(const bwa_seq_t *p, int mm);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, int mode, int max_top2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 bntseq_t *bwa_open_nt(const char *prefix);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 void bwa_print_sam_SQ(const bntseq_t *bns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 pe_opt_t *bwa_init_pe_opt()
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 pe_opt_t *po;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 po = (pe_opt_t*)calloc(1, sizeof(pe_opt_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 po->max_isize = 500;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 po->max_occ = 100000;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 po->n_multi = 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 po->N_multi = 10;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 po->type = BWA_PET_STD;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 po->is_sw = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 po->ap_prior = 1e-5;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 return po;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 static inline uint64_t hash_64(uint64_t key)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 key += ~(key << 32);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 key ^= (key >> 22);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 key += ~(key << 13);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 key ^= (key >> 8);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 key += (key << 3);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 key ^= (key >> 15);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 key += ~(key << 27);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 key ^= (key >> 31);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 return key;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 /*
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 static double ierfc(double x) // inverse erfc(); iphi(x) = M_SQRT2 *ierfc(2 * x);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 const double a = 0.140012;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 double b, c;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 b = log(x * (2 - x));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 c = 2./M_PI/a + b / 2.;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 return sqrt(sqrt(c * c - b / a) - c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 // for normal distribution, this is about 3std
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 #define OUTLIER_BOUND 2.0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 static int infer_isize(int n_seqs, bwa_seq_t *seqs[2], isize_info_t *ii, double ap_prior, int64_t L)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 uint64_t x, *isizes;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 int n, i, tot, p25, p75, p50, max_len = 1, tmp;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 double skewness = 0.0, kurtosis = 0.0, y;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 ii->avg = ii->std = -1.0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 ii->low = ii->high = ii->high_bayesian = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 isizes = (uint64_t*)calloc(n_seqs, 8);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 for (i = 0, tot = 0; i != n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98 bwa_seq_t *p[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 p[0] = seqs[0] + i; p[1] = seqs[1] + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 if (p[0]->mapQ >= 20 && p[1]->mapQ >= 20)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 isizes[tot++] = (p[0]->pos < p[1]->pos)? p[1]->pos + p[1]->len - p[0]->pos : p[0]->pos + p[0]->len - p[1]->pos;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 if (p[0]->len > max_len) max_len = p[0]->len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 if (p[1]->len > max_len) max_len = p[1]->len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 if (tot < 20) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 fprintf(stderr, "[infer_isize] fail to infer insert size: too few good pairs\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 free(isizes);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 return -1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 ks_introsort(uint64_t, tot, isizes);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 p25 = isizes[(int)(tot*0.25 + 0.5)];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 p50 = isizes[(int)(tot*0.50 + 0.5)];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 p75 = isizes[(int)(tot*0.75 + 0.5)];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 tmp = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 ii->low = tmp > max_len? tmp : max_len; // ii->low is unsigned
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 ii->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 for (i = 0, x = n = 0; i < tot; ++i)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118 if (isizes[i] >= ii->low && isizes[i] <= ii->high)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 ++n, x += isizes[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 ii->avg = (double)x / n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 for (i = 0; i < tot; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 if (isizes[i] >= ii->low && isizes[i] <= ii->high) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 double tmp = (isizes[i] - ii->avg) * (isizes[i] - ii->avg);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 ii->std += tmp;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 skewness += tmp * (isizes[i] - ii->avg);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 kurtosis += tmp * tmp;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
128 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
129 kurtosis = kurtosis/n / (ii->std / n * ii->std / n) - 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
130 ii->std = sqrt(ii->std / n); // it would be better as n-1, but n is usually very large
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
131 skewness = skewness / n / (ii->std * ii->std * ii->std);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
132 free(isizes);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
133 if (isnan(ii->std)) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
134 ii->low = ii->high = 0; ii->avg = ii->std = -1.0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
135 fprintf(stderr, "[infer_isize] fail to infer insert size: weird pairing\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
136 return -1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
137 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
138 for (y = 1.0; y < 10.0; y += 0.01)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
139 if (.5 * erfc(y / M_SQRT2) < ap_prior / L * (y * ii->std + ii->avg)) break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
140 ii->high_bayesian = (bwtint_t)(y * ii->std + ii->avg + .499);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
141 fprintf(stderr, "[infer_isize] (25, 50, 75) percentile: (%d, %d, %d)\n", p25, p50, p75);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
142 fprintf(stderr, "[infer_isize] low and high boundaries: %d and %d for estimating avg and std\n", ii->low, ii->high);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
143 fprintf(stderr, "[infer_isize] inferred external isize from %d pairs: %.3lf +/- %.3lf\n", n, ii->avg, ii->std);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
144 fprintf(stderr, "[infer_isize] skewness: %.3lf; kurtosis: %.3lf\n", skewness, kurtosis);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
145 fprintf(stderr, "[infer_isize] inferred maximum insert size: %d (%.2lf sigma)\n", ii->high_bayesian, y);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
146 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
147 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
148
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
149 static int pairing(bwa_seq_t *p[2], pe_data_t *d, const pe_opt_t *opt, int s_mm, const isize_info_t *ii)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
150 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
151 int i, j, o_n, subo_n, cnt_chg = 0, low_bound = ii->low, max_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
152 uint64_t last_pos[2][2], o_pos[2], subo_score, o_score;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
153 max_len = p[0]->full_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
154 if (max_len < p[1]->full_len) max_len = p[1]->full_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
155 if (low_bound < max_len) low_bound = max_len;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
156
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
157 // here v>=u. When ii is set, we check insert size with ii; otherwise with opt->max_isize
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
158 #define __pairing_aux(u,v) do { \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
159 bwtint_t l = ((v)>>32) + p[(v)&1]->len - ((u)>>32); \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
160 if ((u) != (uint64_t)-1 && (v)>>32 > (u)>>32 && l >= max_len \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
161 && ((ii->high && l <= ii->high_bayesian) || (ii->high == 0 && l <= opt->max_isize))) \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
162 { \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
163 uint64_t s = d->aln[(v)&1].a[(uint32_t)(v)>>1].score + d->aln[(u)&1].a[(uint32_t)(u)>>1].score; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
164 s *= 10; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
165 if (ii->high) s += (int)(-4.343 * log(.5 * erfc(M_SQRT1_2 * fabs(l - ii->avg) / ii->std)) + .499); \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
166 s = s<<32 | (uint32_t)hash_64((u)>>32<<32 | (v)>>32); \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
167 if (s>>32 == o_score>>32) ++o_n; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
168 else if (s>>32 < o_score>>32) { subo_n += o_n; o_n = 1; } \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
169 else ++subo_n; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
170 if (s < o_score) subo_score = o_score, o_score = s, o_pos[(u)&1] = (u), o_pos[(v)&1] = (v); \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
171 else if (s < subo_score) subo_score = s; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
172 } \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
173 } while (0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
174
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
175 #define __pairing_aux2(q, w) do { \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
176 const bwt_aln1_t *r = d->aln[(w)&1].a + ((uint32_t)(w)>>1); \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
177 (q)->extra_flag |= SAM_FPP; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
178 if ((q)->pos != (w)>>32 || (q)->strand != r->a) { \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
179 (q)->n_mm = r->n_mm; (q)->n_gapo = r->n_gapo; (q)->n_gape = r->n_gape; (q)->strand = r->a; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
180 (q)->score = r->score; (q)->mapQ = mapQ_p; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
181 (q)->pos = (w)>>32; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
182 if ((q)->mapQ > 0) ++cnt_chg; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
183 } \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
184 } while (0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
185
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
186 o_score = subo_score = (uint64_t)-1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
187 o_n = subo_n = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
188 ks_introsort(uint64_t, d->arr.n, d->arr.a);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
189 for (j = 0; j < 2; ++j) last_pos[j][0] = last_pos[j][1] = (uint64_t)-1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
190 if (opt->type == BWA_PET_STD) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
191 for (i = 0; i < d->arr.n; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
192 uint64_t x = d->arr.a[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
193 int strand = d->aln[x&1].a[(uint32_t)x>>1].a;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
194 if (strand == 1) { // reverse strand, then check
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
195 int y = 1 - (x&1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
196 __pairing_aux(last_pos[y][1], x);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
197 __pairing_aux(last_pos[y][0], x);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
198 } else { // forward strand, then push
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
199 last_pos[x&1][0] = last_pos[x&1][1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
200 last_pos[x&1][1] = x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
201 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
202 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
203 } else if (opt->type == BWA_PET_SOLID) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
204 for (i = 0; i < d->arr.n; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
205 uint64_t x = d->arr.a[i];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
206 int strand = d->aln[x&1].a[(uint32_t)x>>1].a;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
207 if ((strand^x)&1) { // push
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
208 int y = 1 - (x&1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
209 __pairing_aux(last_pos[y][1], x);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
210 __pairing_aux(last_pos[y][0], x);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
211 } else { // check
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
212 last_pos[x&1][0] = last_pos[x&1][1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
213 last_pos[x&1][1] = x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
214 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
215 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
216 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
217 fprintf(stderr, "[paring] not implemented yet!\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
218 exit(1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
219 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
220 // set pairing
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
221 //fprintf(stderr, "[%d, %d, %d, %d]\n", d->arr.n, (int)(o_score>>32), (int)(subo_score>>32), o_n);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
222 if (o_score != (uint64_t)-1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
223 int mapQ_p = 0; // this is the maximum mapping quality when one end is moved
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
224 //fprintf(stderr, "%d, %d\n", o_n, subo_n);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
225 if (o_n == 1) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
226 if (subo_score == (uint64_t)-1) mapQ_p = 29; // no sub-optimal pair
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
227 else if ((subo_score>>32) - (o_score>>32) > s_mm * 10) mapQ_p = 23; // poor sub-optimal pair
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
228 else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
229 int n = subo_n > 255? 255 : subo_n;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
230 mapQ_p = ((subo_score>>32) - (o_score>>32)) / 2 - g_log_n[n];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
231 if (mapQ_p < 0) mapQ_p = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
232 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
233 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
234 if (p[0]->pos == o_pos[0]>>32 && p[1]->pos == o_pos[1]>>32) { // both ends not moved
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
235 if (p[0]->mapQ > 0 && p[1]->mapQ > 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
236 int mapQ = p[0]->mapQ + p[1]->mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
237 if (mapQ > 60) mapQ = 60;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
238 p[0]->mapQ = p[1]->mapQ = mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
239 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
240 if (p[0]->mapQ == 0) p[0]->mapQ = (mapQ_p + 7 < p[1]->mapQ)? mapQ_p + 7 : p[1]->mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
241 if (p[1]->mapQ == 0) p[1]->mapQ = (mapQ_p + 7 < p[0]->mapQ)? mapQ_p + 7 : p[0]->mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
242 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
243 } else if (p[0]->pos == o_pos[0]>>32) { // [1] moved
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
244 p[1]->seQ = 0; p[1]->mapQ = p[0]->mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
245 if (p[1]->mapQ > mapQ_p) p[1]->mapQ = mapQ_p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
246 } else if (p[1]->pos == o_pos[1]>>32) { // [0] moved
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
247 p[0]->seQ = 0; p[0]->mapQ = p[1]->mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
248 if (p[0]->mapQ > mapQ_p) p[0]->mapQ = mapQ_p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
249 } else { // both ends moved
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
250 p[0]->seQ = p[1]->seQ = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
251 mapQ_p -= 20;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
252 if (mapQ_p < 0) mapQ_p = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
253 p[0]->mapQ = p[1]->mapQ = mapQ_p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
254 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
255 __pairing_aux2(p[0], o_pos[0]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
256 __pairing_aux2(p[1], o_pos[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
257 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
258 return cnt_chg;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
259 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
260
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
261 typedef struct {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
262 kvec_t(bwt_aln1_t) aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
263 } aln_buf_t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
264
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
265 int bwa_cal_pac_pos_pe(const char *prefix, bwt_t *const _bwt[2], int n_seqs, bwa_seq_t *seqs[2], FILE *fp_sa[2], isize_info_t *ii,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
266 const pe_opt_t *opt, const gap_opt_t *gopt, const isize_info_t *last_ii)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
267 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
268 int i, j, cnt_chg = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
269 char str[1024];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
270 bwt_t *bwt[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
271 pe_data_t *d;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
272 aln_buf_t *buf[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
273
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
274 d = (pe_data_t*)calloc(1, sizeof(pe_data_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
275 buf[0] = (aln_buf_t*)calloc(n_seqs, sizeof(aln_buf_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
276 buf[1] = (aln_buf_t*)calloc(n_seqs, sizeof(aln_buf_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
277
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
278 if (_bwt[0] == 0) { // load forward SA
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
279 strcpy(str, prefix); strcat(str, ".bwt"); bwt[0] = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
280 strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt[0]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
281 strcpy(str, prefix); strcat(str, ".rbwt"); bwt[1] = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
282 strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
283 } else bwt[0] = _bwt[0], bwt[1] = _bwt[1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
284
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
285 // SE
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
286 for (i = 0; i != n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
287 bwa_seq_t *p[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
288 for (j = 0; j < 2; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
289 int n_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
290 p[j] = seqs[j] + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
291 p[j]->n_multi = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
292 p[j]->extra_flag |= SAM_FPD | (j == 0? SAM_FR1 : SAM_FR2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
293 fread(&n_aln, 4, 1, fp_sa[j]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
294 if (n_aln > kv_max(d->aln[j]))
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
295 kv_resize(bwt_aln1_t, d->aln[j], n_aln);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
296 d->aln[j].n = n_aln;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
297 fread(d->aln[j].a, sizeof(bwt_aln1_t), n_aln, fp_sa[j]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
298 kv_copy(bwt_aln1_t, buf[j][i].aln, d->aln[j]); // backup d->aln[j]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
299 // generate SE alignment and mapping quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
300 bwa_aln2seq(n_aln, d->aln[j].a, p[j]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
301 if (p[j]->type == BWA_TYPE_UNIQUE || p[j]->type == BWA_TYPE_REPEAT) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
302 int max_diff = gopt->fnr > 0.0? bwa_cal_maxdiff(p[j]->len, BWA_AVG_ERR, gopt->fnr) : gopt->max_diff;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
303 p[j]->pos = p[j]->strand? bwt_sa(bwt[0], p[j]->sa)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
304 : bwt[1]->seq_len - (bwt_sa(bwt[1], p[j]->sa) + p[j]->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
305 p[j]->seQ = p[j]->mapQ = bwa_approx_mapQ(p[j], max_diff);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
306 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
307 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
308 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
309
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
310 // infer isize
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
311 infer_isize(n_seqs, seqs, ii, opt->ap_prior, bwt[0]->seq_len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
312 if (ii->avg < 0.0 && last_ii->avg > 0.0) *ii = *last_ii;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
313
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
314 // PE
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
315 for (i = 0; i != n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
316 bwa_seq_t *p[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
317 for (j = 0; j < 2; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
318 p[j] = seqs[j] + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
319 kv_copy(bwt_aln1_t, d->aln[j], buf[j][i].aln);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
320 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
321 if ((p[0]->type == BWA_TYPE_UNIQUE || p[0]->type == BWA_TYPE_REPEAT)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
322 && (p[1]->type == BWA_TYPE_UNIQUE || p[1]->type == BWA_TYPE_REPEAT))
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
323 { // only when both ends mapped
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
324 uint64_t x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
325 int j, k, n_occ[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
326 for (j = 0; j < 2; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
327 n_occ[j] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
328 for (k = 0; k < d->aln[j].n; ++k)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
329 n_occ[j] += d->aln[j].a[k].l - d->aln[j].a[k].k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
330 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
331 if (n_occ[0] > opt->max_occ || n_occ[1] > opt->max_occ) continue;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
332 d->arr.n = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
333 for (j = 0; j < 2; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
334 for (k = 0; k < d->aln[j].n; ++k) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
335 bwt_aln1_t *r = d->aln[j].a + k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
336 bwtint_t l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
337 if (r->l - r->k + 1 >= MIN_HASH_WIDTH) { // then check hash table
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
338 uint64_t key = (uint64_t)r->k<<32 | r->l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
339 int ret;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
340 khint_t iter = kh_put(64, g_hash, key, &ret);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
341 if (ret) { // not in the hash table; ret must equal 1 as we never remove elements
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
342 poslist_t *z = &kh_val(g_hash, iter);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
343 z->n = r->l - r->k + 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
344 z->a = (bwtint_t*)malloc(sizeof(bwtint_t) * z->n);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
345 for (l = r->k; l <= r->l; ++l)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
346 z->a[l - r->k] = r->a? bwt_sa(bwt[0], l) : bwt[1]->seq_len - (bwt_sa(bwt[1], l) + p[j]->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
347 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
348 for (l = 0; l < kh_val(g_hash, iter).n; ++l) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
349 x = kh_val(g_hash, iter).a[l];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
350 x = x<<32 | k<<1 | j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
351 kv_push(uint64_t, d->arr, x);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
352 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
353 } else { // then calculate on the fly
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
354 for (l = r->k; l <= r->l; ++l) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
355 x = r->a? bwt_sa(bwt[0], l) : bwt[1]->seq_len - (bwt_sa(bwt[1], l) + p[j]->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
356 x = x<<32 | k<<1 | j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
357 kv_push(uint64_t, d->arr, x);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
358 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
359 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
360 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
361 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
362 cnt_chg += pairing(p, d, opt, gopt->s_mm, ii);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
363 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
364
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
365 if (opt->N_multi || opt->n_multi) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
366 for (j = 0; j < 2; ++j) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
367 if (p[j]->type != BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
368 int k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
369 if (!(p[j]->extra_flag&SAM_FPP) && p[1-j]->type != BWA_TYPE_NO_MATCH) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
370 bwa_aln2seq_core(d->aln[j].n, d->aln[j].a, p[j], 0, p[j]->c1+p[j]->c2-1 > opt->N_multi? opt->n_multi : opt->N_multi);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
371 } else bwa_aln2seq_core(d->aln[j].n, d->aln[j].a, p[j], 0, opt->n_multi);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
372 for (k = 0; k < p[j]->n_multi; ++k) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
373 bwt_multi1_t *q = p[j]->multi + k;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
374 q->pos = q->strand? bwt_sa(bwt[0], q->pos) : bwt[1]->seq_len - (bwt_sa(bwt[1], q->pos) + p[j]->len);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
375 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
376 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
377 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
378 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
379 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
380
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
381 // free
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
382 for (i = 0; i < n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
383 kv_destroy(buf[0][i].aln);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
384 kv_destroy(buf[1][i].aln);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
385 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
386 free(buf[0]); free(buf[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
387 if (_bwt[0] == 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
388 bwt_destroy(bwt[0]); bwt_destroy(bwt[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
389 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
390 kv_destroy(d->arr);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
391 kv_destroy(d->pos[0]); kv_destroy(d->pos[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
392 kv_destroy(d->aln[0]); kv_destroy(d->aln[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
393 free(d);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
394 return cnt_chg;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
395 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
396
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
397 #define SW_MIN_MATCH_LEN 20
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
398 #define SW_MIN_MAPQ 17
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
399
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
400 // cnt = n_mm<<16 | n_gapo<<8 | n_gape
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
401 bwa_cigar_t *bwa_sw_core(bwtint_t l_pac, const ubyte_t *pacseq, int len, const ubyte_t *seq, int64_t *beg, int reglen,
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
402 int *n_cigar, uint32_t *_cnt)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
403 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
404 bwa_cigar_t *cigar = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
405 ubyte_t *ref_seq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
406 bwtint_t k, x, y, l;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
407 int path_len, ret;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
408 AlnParam ap = aln_param_bwa;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
409 path_t *path, *p;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
410
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
411 // check whether there are too many N's
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
412 if (reglen < SW_MIN_MATCH_LEN || (int64_t)l_pac - *beg < len) return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
413 for (k = 0, x = 0; k < len; ++k)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
414 if (seq[k] >= 4) ++x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
415 if ((float)x/len >= 0.25 || len - x < SW_MIN_MATCH_LEN) return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
416
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
417 // get reference subsequence
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
418 ref_seq = (ubyte_t*)calloc(reglen, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
419 for (k = *beg, l = 0; l < reglen && k < l_pac; ++k)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
420 ref_seq[l++] = pacseq[k>>2] >> ((~k&3)<<1) & 3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
421 path = (path_t*)calloc(l+len, sizeof(path_t));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
422
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
423 // do alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
424 ret = aln_local_core(ref_seq, l, (ubyte_t*)seq, len, &ap, path, &path_len, 1, 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
425 if (ret < 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
426 free(path); free(cigar); free(ref_seq); *n_cigar = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
427 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
428 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
429 cigar = bwa_aln_path2cigar(path, path_len, n_cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
430
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
431 // check whether the alignment is good enough
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
432 for (k = 0, x = y = 0; k < *n_cigar; ++k) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
433 bwa_cigar_t c = cigar[k];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
434 if (__cigar_op(c) == FROM_M) x += __cigar_len(c), y += __cigar_len(c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
435 else if (__cigar_op(c) == FROM_D) x += __cigar_len(c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
436 else y += __cigar_len(c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
437 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
438 if (x < SW_MIN_MATCH_LEN || y < SW_MIN_MATCH_LEN) { // not good enough
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
439 free(path); free(cigar); free(ref_seq);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
440 *n_cigar = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
441 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
442 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
443
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
444 { // update cigar and coordinate;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
445 int start, end;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
446 p = path + path_len - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
447 *beg += (p->i? p->i : 1) - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
448 start = (p->j? p->j : 1) - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
449 end = path->j;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
450 cigar = (bwa_cigar_t*)realloc(cigar, sizeof(bwa_cigar_t) * (*n_cigar + 2));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
451 if (start) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
452 memmove(cigar + 1, cigar, sizeof(bwa_cigar_t) * (*n_cigar));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
453 cigar[0] = __cigar_create(3, start);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
454 ++(*n_cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
455 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
456 if (end < len) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
457 /*cigar[*n_cigar] = 3<<14 | (len - end);*/
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
458 cigar[*n_cigar] = __cigar_create(3, (len - end));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
459 ++(*n_cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
460 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
461 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
462
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
463 { // set *cnt
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
464 int n_mm, n_gapo, n_gape;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
465 n_mm = n_gapo = n_gape = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
466 p = path + path_len - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
467 x = p->i? p->i - 1 : 0; y = p->j? p->j - 1 : 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
468 for (k = 0; k < *n_cigar; ++k) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
469 bwa_cigar_t c = cigar[k];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
470 if (__cigar_op(c) == FROM_M) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
471 for (l = 0; l < (__cigar_len(c)); ++l)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
472 if (ref_seq[x+l] < 4 && seq[y+l] < 4 && ref_seq[x+l] != seq[y+l]) ++n_mm;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
473 x += __cigar_len(c), y += __cigar_len(c);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
474 } else if (__cigar_op(c) == FROM_D) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
475 x += __cigar_len(c), ++n_gapo, n_gape += (__cigar_len(c)) - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
476 } else if (__cigar_op(c) == FROM_I) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
477 y += __cigar_len(c), ++n_gapo, n_gape += (__cigar_len(c)) - 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
478 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
479 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
480 *_cnt = (uint32_t)n_mm<<16 | n_gapo<<8 | n_gape;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
481 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
482
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
483 free(ref_seq); free(path);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
484 return cigar;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
485 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
486
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
487 ubyte_t *bwa_paired_sw(const bntseq_t *bns, const ubyte_t *_pacseq, int n_seqs, bwa_seq_t *seqs[2], const pe_opt_t *popt, const isize_info_t *ii)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
488 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
489 ubyte_t *pacseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
490 int i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
491 uint64_t n_tot[2], n_mapped[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
492
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
493 // load reference sequence
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
494 if (_pacseq == 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
495 pacseq = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
496 rewind(bns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
497 fread(pacseq, 1, bns->l_pac/4+1, bns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
498 } else pacseq = (ubyte_t*)_pacseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
499 if (!popt->is_sw || ii->avg < 0.0) return pacseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
500
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
501 // perform mate alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
502 n_tot[0] = n_tot[1] = n_mapped[0] = n_mapped[1] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
503 for (i = 0; i != n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
504 bwa_seq_t *p[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
505 p[0] = seqs[0] + i; p[1] = seqs[1] + i;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
506 if ((p[0]->mapQ >= SW_MIN_MAPQ || p[1]->mapQ >= SW_MIN_MAPQ) && (p[0]->extra_flag&SAM_FPP) == 0) { // unpaired and one read has high mapQ
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
507 int k, n_cigar[2], is_singleton, mapQ = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
508 int64_t beg[2], end[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
509 bwa_cigar_t *cigar[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
510 uint32_t cnt[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
511
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
512 /* In the following, _pref points to the reference read
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
513 * which must be aligned; _pmate points to its mate which is
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
514 * considered to be modified. */
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
515
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
516 #define __set_rght_coor(_a, _b, _pref, _pmate) do { \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
517 (_a) = _pref->pos + ii->avg - 3 * ii->std - _pmate->len * 1.5; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
518 (_b) = (_a) + 6 * ii->std + 2 * _pmate->len; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
519 if ((_a) < _pref->pos + _pref->len) (_a) = _pref->pos + _pref->len; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
520 if ((_b) > bns->l_pac) (_b) = bns->l_pac; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
521 } while (0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
522
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
523 #define __set_left_coor(_a, _b, _pref, _pmate) do { \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
524 (_a) = _pref->pos + _pref->len - ii->avg - 3 * ii->std - _pmate->len * 0.5; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
525 (_b) = (_a) + 6 * ii->std + 2 * _pmate->len; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
526 if ((_a) < 0) (_a) = 0; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
527 if ((_b) > _pref->pos) (_b) = _pref->pos; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
528 } while (0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
529
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
530 #define __set_fixed(_pref, _pmate, _beg, _cnt) do { \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
531 _pmate->type = BWA_TYPE_MATESW; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
532 _pmate->pos = _beg; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
533 _pmate->seQ = _pref->seQ; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
534 _pmate->strand = (popt->type == BWA_PET_STD)? 1 - _pref->strand : _pref->strand; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
535 _pmate->n_mm = _cnt>>16; _pmate->n_gapo = _cnt>>8&0xff; _pmate->n_gape = _cnt&0xff; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
536 _pmate->extra_flag |= SAM_FPP; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
537 _pref->extra_flag |= SAM_FPP; \
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
538 } while (0)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
539
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
540 is_singleton = (p[0]->type == BWA_TYPE_NO_MATCH || p[1]->type == BWA_TYPE_NO_MATCH)? 1 : 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
541
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
542 ++n_tot[is_singleton];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
543 cigar[0] = cigar[1] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
544 n_cigar[0] = n_cigar[1] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
545 if (popt->type != BWA_PET_STD && popt->type != BWA_PET_SOLID) continue; // other types of pairing is not considered
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
546 for (k = 0; k < 2; ++k) { // p[1-k] is the reference read and p[k] is the read considered to be modified
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
547 ubyte_t *seq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
548 if (p[1-k]->type == BWA_TYPE_NO_MATCH) continue; // if p[1-k] is unmapped, skip
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
549 if (popt->type == BWA_PET_STD) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
550 if (p[1-k]->strand == 0) { // then the mate is on the reverse strand and has larger coordinate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
551 __set_rght_coor(beg[k], end[k], p[1-k], p[k]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
552 seq = p[k]->rseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
553 } else { // then the mate is on forward stand and has smaller coordinate
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
554 __set_left_coor(beg[k], end[k], p[1-k], p[k]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
555 seq = p[k]->seq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
556 seq_reverse(p[k]->len, seq, 0); // because ->seq is reversed; this will reversed back shortly
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
557 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
558 } else { // BWA_PET_SOLID
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
559 if (p[1-k]->strand == 0) { // R3-F3 pairing
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
560 if (k == 0) __set_left_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is R3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
561 else __set_rght_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is F3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
562 seq = p[k]->rseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
563 seq_reverse(p[k]->len, seq, 0); // because ->seq is reversed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
564 } else { // F3-R3 pairing
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
565 if (k == 0) __set_rght_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is R3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
566 else __set_left_coor(beg[k], end[k], p[1-k], p[k]); // p[k] is F3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
567 seq = p[k]->seq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
568 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
569 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
570 // perform SW alignment
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
571 cigar[k] = bwa_sw_core(bns->l_pac, pacseq, p[k]->len, seq, &beg[k], end[k] - beg[k], &n_cigar[k], &cnt[k]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
572 // now revserse sequence back such that p[*]->seq looks untouched
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
573 if (popt->type == BWA_PET_STD) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
574 if (p[1-k]->strand == 1) seq_reverse(p[k]->len, seq, 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
575 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
576 if (p[1-k]->strand == 0) seq_reverse(p[k]->len, seq, 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
577 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
578 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
579 k = -1; // no read to be changed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
580 if (cigar[0] && cigar[1]) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
581 k = p[0]->mapQ < p[1]->mapQ? 0 : 1; // p[k] to be fixed
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
582 mapQ = abs(p[1]->mapQ - p[0]->mapQ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
583 } else if (cigar[0]) k = 0, mapQ = p[1]->mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
584 else if (cigar[1]) k = 1, mapQ = p[0]->mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
585 if (k >= 0 && p[k]->pos != beg[k]) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
586 ++n_mapped[is_singleton];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
587 { // recalculate mapping quality
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
588 int tmp = (int)p[1-k]->mapQ - p[k]->mapQ/2 - 8;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
589 if (tmp <= 0) tmp = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
590 if (mapQ > tmp) mapQ = tmp;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
591 p[k]->mapQ = p[1-k]->mapQ = mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
592 p[k]->seQ = p[1-k]->seQ = p[1-k]->seQ < mapQ? p[1-k]->seQ : mapQ;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
593 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
594 // update CIGAR
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
595 free(p[k]->cigar); p[k]->cigar = cigar[k]; cigar[k] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
596 p[k]->n_cigar = n_cigar[k];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
597 // update the rest of information
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
598 __set_fixed(p[1-k], p[k], beg[k], cnt[k]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
599 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
600 free(cigar[0]); free(cigar[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
601 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
602 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
603 fprintf(stderr, "[bwa_paired_sw] %lld out of %lld Q%d singletons are mated.\n",
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
604 (long long)n_mapped[1], (long long)n_tot[1], SW_MIN_MAPQ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
605 fprintf(stderr, "[bwa_paired_sw] %lld out of %lld Q%d discordant pairs are fixed.\n",
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
606 (long long)n_mapped[0], (long long)n_tot[0], SW_MIN_MAPQ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
607 return pacseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
608 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
609
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
610 void bwa_sai2sam_pe_core(const char *prefix, char *const fn_sa[2], char *const fn_fa[2], pe_opt_t *popt)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
611 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
612 int i, j, n_seqs, tot_seqs = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
613 bwa_seq_t *seqs[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
614 bwa_seqio_t *ks[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
615 clock_t t;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
616 bntseq_t *bns, *ntbns = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
617 FILE *fp_sa[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
618 gap_opt_t opt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
619 khint_t iter;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
620 isize_info_t last_ii; // this is for the last batch of reads
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
621 char str[1024];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
622 bwt_t *bwt[2];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
623 uint8_t *pac;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
624
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
625 // initialization
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
626 pac = 0; bwt[0] = bwt[1] = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
627 for (i = 1; i != 256; ++i) g_log_n[i] = (int)(4.343 * log(i) + 0.5);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
628 bns = bns_restore(prefix);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
629 srand48(bns->seed);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
630 for (i = 0; i < 2; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
631 ks[i] = bwa_seq_open(fn_fa[i]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
632 fp_sa[i] = xopen(fn_sa[i], "r");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
633 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
634 g_hash = kh_init(64);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
635 last_ii.avg = -1.0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
636
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
637 fread(&opt, sizeof(gap_opt_t), 1, fp_sa[0]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
638 fread(&opt, sizeof(gap_opt_t), 1, fp_sa[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
639 if (!(opt.mode & BWA_MODE_COMPREAD)) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
640 popt->type = BWA_PET_SOLID;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
641 ntbns = bwa_open_nt(prefix);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
642 } else { // for Illumina alignment only
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
643 if (popt->is_preload) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
644 strcpy(str, prefix); strcat(str, ".bwt"); bwt[0] = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
645 strcpy(str, prefix); strcat(str, ".sa"); bwt_restore_sa(str, bwt[0]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
646 strcpy(str, prefix); strcat(str, ".rbwt"); bwt[1] = bwt_restore_bwt(str);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
647 strcpy(str, prefix); strcat(str, ".rsa"); bwt_restore_sa(str, bwt[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
648 pac = (ubyte_t*)calloc(bns->l_pac/4+1, 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
649 rewind(bns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
650 fread(pac, 1, bns->l_pac/4+1, bns->fp_pac);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
651 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
652 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
653
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
654 // core loop
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
655 bwa_print_sam_SQ(bns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
656 while ((seqs[0] = bwa_read_seq(ks[0], 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual)) != 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
657 int cnt_chg;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
658 isize_info_t ii;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
659 ubyte_t *pacseq;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
660
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
661 seqs[1] = bwa_read_seq(ks[1], 0x40000, &n_seqs, opt.mode & BWA_MODE_COMPREAD, opt.trim_qual);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
662 tot_seqs += n_seqs;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
663 t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
664
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
665 fprintf(stderr, "[bwa_sai2sam_pe_core] convert to sequence coordinate... \n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
666 cnt_chg = bwa_cal_pac_pos_pe(prefix, bwt, n_seqs, seqs, fp_sa, &ii, popt, &opt, &last_ii);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
667 fprintf(stderr, "[bwa_sai2sam_pe_core] time elapses: %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
668 fprintf(stderr, "[bwa_sai2sam_pe_core] changing coordinates of %d alignments.\n", cnt_chg);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
669
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
670 fprintf(stderr, "[bwa_sai2sam_pe_core] align unmapped mate...\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
671 pacseq = bwa_paired_sw(bns, pac, n_seqs, seqs, popt, &ii);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
672 fprintf(stderr, "[bwa_sai2sam_pe_core] time elapses: %.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
673
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
674 fprintf(stderr, "[bwa_sai2sam_pe_core] refine gapped alignments... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
675 for (j = 0; j < 2; ++j)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
676 bwa_refine_gapped(bns, n_seqs, seqs[j], pacseq, ntbns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
677 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
678 if (pac == 0) free(pacseq);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
679
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
680 fprintf(stderr, "[bwa_sai2sam_pe_core] print alignments... ");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
681 for (i = 0; i < n_seqs; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
682 bwa_print_sam1(bns, seqs[0] + i, seqs[1] + i, opt.mode, opt.max_top2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
683 bwa_print_sam1(bns, seqs[1] + i, seqs[0] + i, opt.mode, opt.max_top2);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
684 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
685 fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC); t = clock();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
686
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
687 for (j = 0; j < 2; ++j)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
688 bwa_free_read_seq(n_seqs, seqs[j]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
689 fprintf(stderr, "[bwa_sai2sam_pe_core] %d sequences have been processed.\n", tot_seqs);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
690 last_ii = ii;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
691 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
692
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
693 // destroy
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
694 bns_destroy(bns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
695 if (ntbns) bns_destroy(ntbns);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
696 for (i = 0; i < 2; ++i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
697 bwa_seq_close(ks[i]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
698 fclose(fp_sa[i]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
699 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
700 for (iter = kh_begin(g_hash); iter != kh_end(g_hash); ++iter)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
701 if (kh_exist(g_hash, iter)) free(kh_val(g_hash, iter).a);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
702 kh_destroy(64, g_hash);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
703 if (pac) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
704 free(pac); bwt_destroy(bwt[0]); bwt_destroy(bwt[1]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
705 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
706 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
707
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
708 int bwa_sai2sam_pe(int argc, char *argv[])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
709 {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
710 int c;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
711 pe_opt_t *popt;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
712 popt = bwa_init_pe_opt();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
713 while ((c = getopt(argc, argv, "a:o:sPn:N:c:f:")) >= 0) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
714 switch (c) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
715 case 'a': popt->max_isize = atoi(optarg); break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
716 case 'o': popt->max_occ = atoi(optarg); break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
717 case 's': popt->is_sw = 0; break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
718 case 'P': popt->is_preload = 1; break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
719 case 'n': popt->n_multi = atoi(optarg); break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
720 case 'N': popt->N_multi = atoi(optarg); break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
721 case 'c': popt->ap_prior = atof(optarg); break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
722 case 'f': freopen(optarg, "w", stdout); break;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
723 default: return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
724 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
725 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
726
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
727 if (optind + 5 > argc) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
728 fprintf(stderr, "\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
729 fprintf(stderr, "Usage: bwa sampe [options] <prefix> <in1.sai> <in2.sai> <in1.fq> <in2.fq>\n\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
730 fprintf(stderr, "Options: -a INT maximum insert size [%d]\n", popt->max_isize);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
731 fprintf(stderr, " -o INT maximum occurrences for one end [%d]\n", popt->max_occ);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
732 fprintf(stderr, " -n INT maximum hits to output for paired reads [%d]\n", popt->n_multi);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
733 fprintf(stderr, " -N INT maximum hits to output for discordant pairs [%d]\n", popt->N_multi);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
734 fprintf(stderr, " -c FLOAT prior of chimeric rate [%.1le]\n", popt->ap_prior);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
735 fprintf(stderr, " -P preload index into memory (for base-space reads only)\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
736 fprintf(stderr, " -s disable Smith-Waterman for the unmapped mate\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
737 fprintf(stderr, " -f FILE sam file to output results to instead of stdout\n\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
738 fprintf(stderr, "Notes: 1. For SOLiD reads, <in1.fq> corresponds R3 reads and <in2.fq> to F3.\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
739 fprintf(stderr, " 2. For reads shorter than 30bp, applying a smaller -o is recommended to\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
740 fprintf(stderr, " to get a sensible speed at the cost of pairing accuracy.\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
741 fprintf(stderr, "\n");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
742 return 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
743 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
744 bwa_sai2sam_pe_core(argv[optind], argv + optind + 1, argv + optind+3, popt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
745 free(popt);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
746 return 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
747 }