|
0
|
1 /* The MIT License
|
|
|
2
|
|
|
3 Copyright (c) 2008 Genome Research Ltd (GRL).
|
|
|
4
|
|
|
5 Permission is hereby granted, free of charge, to any person obtaining
|
|
|
6 a copy of this software and associated documentation files (the
|
|
|
7 "Software"), to deal in the Software without restriction, including
|
|
|
8 without limitation the rights to use, copy, modify, merge, publish,
|
|
|
9 distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
10 permit persons to whom the Software is furnished to do so, subject to
|
|
|
11 the following conditions:
|
|
|
12
|
|
|
13 The above copyright notice and this permission notice shall be
|
|
|
14 included in all copies or substantial portions of the Software.
|
|
|
15
|
|
|
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
|
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
|
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
23 SOFTWARE.
|
|
|
24 */
|
|
|
25
|
|
|
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
|
|
|
27
|
|
|
28 #ifndef BWA_BWT_H
|
|
|
29 #define BWA_BWT_H
|
|
|
30
|
|
|
31 #include <stdint.h>
|
|
|
32
|
|
|
33 // requirement: (OCC_INTERVAL%16 == 0)
|
|
|
34 #define OCC_INTERVAL 0x80
|
|
|
35
|
|
|
36 #ifndef BWA_UBYTE
|
|
|
37 #define BWA_UBYTE
|
|
|
38 typedef unsigned char ubyte_t;
|
|
|
39 #endif
|
|
|
40 typedef uint32_t bwtint_t;
|
|
|
41
|
|
|
42 typedef struct {
|
|
|
43 bwtint_t primary; // S^{-1}(0), or the primary index of BWT
|
|
|
44 bwtint_t L2[5]; // C(), cumulative count
|
|
|
45 bwtint_t seq_len; // sequence length
|
|
|
46 bwtint_t bwt_size; // size of bwt, about seq_len/4
|
|
|
47 uint32_t *bwt; // BWT
|
|
|
48 // occurance array, separated to two parts
|
|
|
49 uint32_t cnt_table[256];
|
|
|
50 // suffix array
|
|
|
51 int sa_intv;
|
|
|
52 bwtint_t n_sa;
|
|
|
53 bwtint_t *sa;
|
|
|
54 } bwt_t;
|
|
|
55
|
|
|
56 #define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL*12 + 4 + (k)%OCC_INTERVAL/16])
|
|
|
57
|
|
|
58 /* retrieve a character from the $-removed BWT string. Note that
|
|
|
59 * bwt_t::bwt is not exactly the BWT string and therefore this macro is
|
|
|
60 * called bwt_B0 instead of bwt_B */
|
|
|
61 #define bwt_B0(b, k) (bwt_bwt(b, k)>>((~(k)&0xf)<<1)&3)
|
|
|
62
|
|
|
63 #define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL*12)
|
|
|
64
|
|
|
65 // inverse Psi function
|
|
|
66 #define bwt_invPsi(bwt, k) \
|
|
|
67 (((k) == (bwt)->primary)? 0 : \
|
|
|
68 ((k) < (bwt)->primary)? \
|
|
|
69 (bwt)->L2[bwt_B0(bwt, k)] + bwt_occ(bwt, k, bwt_B0(bwt, k)) \
|
|
|
70 : (bwt)->L2[bwt_B0(bwt, (k)-1)] + bwt_occ(bwt, k, bwt_B0(bwt, (k)-1)))
|
|
|
71
|
|
|
72 #ifdef __cplusplus
|
|
|
73 extern "C" {
|
|
|
74 #endif
|
|
|
75
|
|
|
76 void bwt_dump_bwt(const char *fn, const bwt_t *bwt);
|
|
|
77 void bwt_dump_sa(const char *fn, const bwt_t *bwt);
|
|
|
78
|
|
|
79 bwt_t *bwt_restore_bwt(const char *fn);
|
|
|
80 void bwt_restore_sa(const char *fn, bwt_t *bwt);
|
|
|
81
|
|
|
82 void bwt_destroy(bwt_t *bwt);
|
|
|
83
|
|
|
84 void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW
|
|
|
85 void bwt_cal_sa(bwt_t *bwt, int intv);
|
|
|
86
|
|
|
87 void bwt_bwtupdate_core(bwt_t *bwt);
|
|
|
88
|
|
|
89 inline bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c);
|
|
|
90 inline void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]);
|
|
|
91 bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k);
|
|
|
92
|
|
|
93 // more efficient version of bwt_occ/bwt_occ4 for retrieving two close Occ values
|
|
|
94 void bwt_gen_cnt_table(bwt_t *bwt);
|
|
|
95 inline void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol);
|
|
|
96 inline void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4]);
|
|
|
97
|
|
|
98 int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end);
|
|
|
99 int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0);
|
|
|
100
|
|
|
101 #ifdef __cplusplus
|
|
|
102 }
|
|
|
103 #endif
|
|
|
104
|
|
|
105 #endif
|