annotate bwa-0.6.2/bwa.h @ 0:dd1186b11b3b draft

Uploaded BWA
author ashvark
date Fri, 18 Jul 2014 07:55:14 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1 #ifndef BWA_H_
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
2 #define BWA_H_
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
3
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
4 #include <stdint.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
5
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
6 #define BWA_DEF_MAX_SCORE 2048
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
7 #define BWA_MAX_QUERY_LEN 1024
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
8
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
9 // BWA index
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
10 struct bwa_idx_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
11 typedef struct bwa_idx_t bwa_idx_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
12
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
13 // Buffer for BWA alignment
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
14 struct bwa_buf_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
15 typedef struct bwa_buf_t bwa_buf_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
16
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
17 // BWA alignment options
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
18 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
19 int s_gapo, s_gape; // gap open and extension penalties; the mismatch penalty is fixed at 3
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
20 int max_diff, max_gapo, max_gape; // max differences (-1 to use fnr for length-adjusted max diff), gap opens and gap extensions
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
21 int seed_len, max_seed_diff; // seed length and max differences allowed in the seed
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
22 float fnr; // parameter for automatic length-adjusted max differences
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
23 } bwa_opt_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
24
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
25 // default BWA alignment options
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
26 extern bwa_opt_t bwa_def_opt; // = { 11, 4, -1, 1, 6, 32, 2, 0.04 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
27
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
28 // an interval hit in the SA coordinate; basic unit in .sai files
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
29 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
30 uint32_t n_mm:16, n_gapo:8, n_gape:8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
31 int score;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
32 uint64_t k, l; // [k,l] is the SA interval; each interval has l-k+1 hits
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
33 } bwa_sai1_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
34
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
35 // all interval hits in the SA coordinate
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
36 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
37 int n; // number of interval hits
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
38 bwa_sai1_t *sai;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
39 } bwa_sai_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
40
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
41 // an alignment
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
42 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
43 uint32_t n_n:8, n_gap:12, n_mm:12; // number of ambiguous bases, gaps and mismatches in the alignment
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
44 int32_t ref_id; // referece sequence index (the first seq is indexed by 0)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
45 uint32_t offset; // coordinate on the reference; zero-based
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
46 uint32_t n_cigar:16, flag:16; // number of CIGAR operations; SAM flag
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
47 uint32_t *cigar; // CIGAR in the BAM 28+4 encoding; having n_cigar operations
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
48 } bwa_aln_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
49
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
50 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
51 int mapQs, mapQ, c1, c2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
52 uint64_t sa;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
53 bwa_sai1_t *which;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
54 bwa_sai_t sai;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
55 bwa_aln_t one;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
56 } bwa_one_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
57
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
58 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
59 double avg, std, ap_prior;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
60 uint64_t low, high, high_bayesian;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
61 } bwa_pestat_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
62
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
63 #ifdef __cplusplus
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
64 extern "C" {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
65 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
66
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
67 // load a BWA index
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
68 bwa_idx_t *bwa_idx_load(const char *prefix);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
69 void bwa_idx_destroy(bwa_idx_t *p);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
70
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
71 // allocate a BWA alignment buffer; if unsure, set opt to &bwa_def_opt and max_score to BWA_DEF_MAX_SCORE
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
72 bwa_buf_t *bwa_buf_init(const bwa_opt_t *opt, int max_score);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
73 void bwa_buf_destroy(bwa_buf_t *p);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
74
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
75 /**
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
76 * Find all the SA intervals
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
77 *
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
78 * @param idx BWA index; multiple threads can share the same index
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
79 * @param buf BWA alignment buffer; each thread should have its own buffer
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
80 * @param seq NULL terminated C string, consisting of A/C/G/T/N only
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
81 *
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
82 * @return SA intervals seq is matched to
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
83 */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
84 bwa_sai_t bwa_sai(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
85
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
86 /**
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
87 * Construct an alignment in the base-pair coordinate
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
88 *
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
89 * @param idx BWA index
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
90 * @param buf BWA alignment buffer
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
91 * @param seq NULL terinated C string
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
92 * @param sa Suffix array value
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
93 * @param n_gaps Number of gaps (typically equal to bwa_sai1_t::n_gapo + bwa_sai1_t::n_gape
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
94 *
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
95 * @return An alignment
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
96 */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
97 void bwa_sa2aln(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, uint64_t sa, int n_gaps, bwa_aln_t *aln);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
98
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
99 bwa_one_t *bwa_se(const bwa_idx_t *idx, bwa_buf_t *buf, const char *seq, int gen_cigar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
100
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
101 void bwa_one_destroy(bwa_one_t *one);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
102
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
103 #ifdef __cplusplus
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
104 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
105 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
106
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
107 #endif