Mercurial > repos > ashvark > qiime_1_8_0
comparison bwa-0.6.2/bwt.h @ 2:a294fbfcb1db draft default tip
Uploaded BWA
author | ashvark |
---|---|
date | Fri, 18 Jul 2014 07:55:59 -0400 |
parents | dd1186b11b3b |
children |
comparison
equal
deleted
inserted
replaced
1:a9636dc1e99a | 2:a294fbfcb1db |
---|---|
1 /* The MIT License | |
2 | |
3 Copyright (c) 2008 Genome Research Ltd (GRL). | |
4 | |
5 Permission is hereby granted, free of charge, to any person obtaining | |
6 a copy of this software and associated documentation files (the | |
7 "Software"), to deal in the Software without restriction, including | |
8 without limitation the rights to use, copy, modify, merge, publish, | |
9 distribute, sublicense, and/or sell copies of the Software, and to | |
10 permit persons to whom the Software is furnished to do so, subject to | |
11 the following conditions: | |
12 | |
13 The above copyright notice and this permission notice shall be | |
14 included in all copies or substantial portions of the Software. | |
15 | |
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
23 SOFTWARE. | |
24 */ | |
25 | |
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */ | |
27 | |
28 #ifndef BWA_BWT_H | |
29 #define BWA_BWT_H | |
30 | |
31 #include <stdint.h> | |
32 | |
33 // requirement: (OCC_INTERVAL%16 == 0); please DO NOT change this line | |
34 #define OCC_INTERVAL 0x80 | |
35 | |
36 #ifndef BWA_UBYTE | |
37 #define BWA_UBYTE | |
38 typedef unsigned char ubyte_t; | |
39 #endif | |
40 | |
41 typedef uint64_t bwtint_t; | |
42 | |
43 typedef struct { | |
44 bwtint_t primary; // S^{-1}(0), or the primary index of BWT | |
45 bwtint_t L2[5]; // C(), cumulative count | |
46 bwtint_t seq_len; // sequence length | |
47 bwtint_t bwt_size; // size of bwt, about seq_len/4 | |
48 uint32_t *bwt; // BWT | |
49 // occurance array, separated to two parts | |
50 uint32_t cnt_table[256]; | |
51 // suffix array | |
52 int sa_intv; | |
53 bwtint_t n_sa; | |
54 bwtint_t *sa; | |
55 } bwt_t; | |
56 | |
57 typedef struct { | |
58 bwtint_t x[3], info; | |
59 } bwtintv_t; | |
60 | |
61 typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v; | |
62 | |
63 /* For general OCC_INTERVAL, the following is correct: | |
64 #define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16]) | |
65 #define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) | |
66 */ | |
67 | |
68 // The following two lines are ONLY correct when OCC_INTERVAL==0x80 | |
69 #define bwt_bwt(b, k) ((b)->bwt[((k)>>7<<4) + sizeof(bwtint_t) + (((k)&0x7f)>>4)]) | |
70 #define bwt_occ_intv(b, k) ((b)->bwt + ((k)>>7<<4)) | |
71 | |
72 /* retrieve a character from the $-removed BWT string. Note that | |
73 * bwt_t::bwt is not exactly the BWT string and therefore this macro is | |
74 * called bwt_B0 instead of bwt_B */ | |
75 #define bwt_B0(b, k) (bwt_bwt(b, k)>>((~(k)&0xf)<<1)&3) | |
76 | |
77 // inverse Psi function | |
78 #define bwt_invPsi(bwt, k) \ | |
79 (((k) == (bwt)->primary)? 0 : \ | |
80 ((k) < (bwt)->primary)? \ | |
81 (bwt)->L2[bwt_B0(bwt, k)] + bwt_occ(bwt, k, bwt_B0(bwt, k)) \ | |
82 : (bwt)->L2[bwt_B0(bwt, (k)-1)] + bwt_occ(bwt, k, bwt_B0(bwt, (k)-1))) | |
83 | |
84 #define bwt_set_intv(bwt, c, ik) ((ik).x[0] = (bwt)->L2[(int)(c)]+1, (ik).x[2] = (bwt)->L2[(int)(c)+1]-(bwt)->L2[(int)(c)], (ik).x[1] = (bwt)->L2[3-(c)]+1, (ik).info = 0) | |
85 | |
86 #ifdef __cplusplus | |
87 extern "C" { | |
88 #endif | |
89 | |
90 void bwt_dump_bwt(const char *fn, const bwt_t *bwt); | |
91 void bwt_dump_sa(const char *fn, const bwt_t *bwt); | |
92 | |
93 bwt_t *bwt_restore_bwt(const char *fn); | |
94 void bwt_restore_sa(const char *fn, bwt_t *bwt); | |
95 | |
96 void bwt_destroy(bwt_t *bwt); | |
97 | |
98 void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW | |
99 void bwt_cal_sa(bwt_t *bwt, int intv); | |
100 | |
101 void bwt_bwtupdate_core(bwt_t *bwt); | |
102 | |
103 inline bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c); | |
104 inline void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]); | |
105 bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k); | |
106 | |
107 // more efficient version of bwt_occ/bwt_occ4 for retrieving two close Occ values | |
108 void bwt_gen_cnt_table(bwt_t *bwt); | |
109 inline void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol); | |
110 inline void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4]); | |
111 | |
112 int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end); | |
113 int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0); | |
114 | |
115 /** | |
116 * Extend bi-SA-interval _ik_ | |
117 */ | |
118 void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back); | |
119 | |
120 /** | |
121 * Given a query _q_, collect potential SMEMs covering position _x_ and store them in _mem_. | |
122 * Return the end of the longest exact match starting from _x_. | |
123 */ | |
124 int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem, bwtintv_v *tmpvec[2]); | |
125 | |
126 #ifdef __cplusplus | |
127 } | |
128 #endif | |
129 | |
130 #endif |