0
|
1 /*
|
|
2
|
|
3 BWTConstruct.h BWT-Index Construction
|
|
4
|
|
5 This module constructs BWT and auxiliary data structures.
|
|
6
|
|
7 Copyright (C) 2004, Wong Chi Kwong.
|
|
8
|
|
9 This program is free software; you can redistribute it and/or
|
|
10 modify it under the terms of the GNU General Public License
|
|
11 as published by the Free Software Foundation; either version 2
|
|
12 of the License, or (at your option) any later version.
|
|
13
|
|
14 This program is distributed in the hope that it will be useful,
|
|
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17 GNU General Public License for more details.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License
|
|
20 along with this program; if not, write to the Free Software
|
|
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
22
|
|
23 */
|
|
24
|
|
25 #ifndef BWT_GEN_H
|
|
26 #define BWT_GEN_H
|
|
27
|
|
28 #define ALPHABET_SIZE 4
|
|
29 #define BIT_PER_CHAR 2
|
|
30 #define CHAR_PER_WORD 16
|
|
31 #define CHAR_PER_BYTE 4
|
|
32
|
|
33 #define BITS_IN_WORD 32
|
|
34 #define BITS_IN_BYTE 8
|
|
35 #define BYTES_IN_WORD 4
|
|
36
|
|
37 #define ALL_ONE_MASK 0xFFFFFFFF
|
|
38 #define DNA_OCC_CNT_TABLE_SIZE_IN_WORD 65536
|
|
39
|
|
40 #define BITS_PER_OCC_VALUE 16
|
|
41 #define OCC_VALUE_PER_WORD 2
|
|
42 #define OCC_INTERVAL 256
|
|
43 #define OCC_INTERVAL_MAJOR 65536
|
|
44
|
|
45 #define TRUE 1
|
|
46 #define FALSE 0
|
|
47
|
|
48 #define BWTINC_INSERT_SORT_NUM_ITEM 7
|
|
49
|
|
50 #define average(value1, value2) ( ((value1) & (value2)) + ((value1) ^ (value2)) / 2 )
|
|
51 #define min(value1, value2) ( ((value1) < (value2)) ? (value1) : (value2) )
|
|
52 #define max(value1, value2) ( ((value1) > (value2)) ? (value1) : (value2) )
|
|
53 #define med3(a, b, c) ( a<b ? (b<c ? b : a<c ? c : a) : (b>c ? b : a>c ? c : a))
|
|
54 #define swap(a, b, t); t = a; a = b; b = t;
|
|
55 #define truncateLeft(value, offset) ( (value) << (offset) >> (offset) )
|
|
56 #define truncateRight(value, offset) ( (value) >> (offset) << (offset) )
|
|
57 #define DNA_OCC_SUM_EXCEPTION(sum) ((sum & 0xfefefeff) == 0)
|
|
58
|
|
59 typedef struct SaIndexRange {
|
|
60 unsigned int startSaIndex;
|
|
61 unsigned int endSaIndex;
|
|
62 } SaIndexRange;
|
|
63
|
|
64 typedef struct BWT {
|
|
65 unsigned int textLength; // length of the text
|
|
66 unsigned int saInterval; // interval between two SA values stored explicitly
|
|
67 unsigned int inverseSaInterval; // interval between two inverse SA stored explicitly
|
|
68 unsigned int inverseSa0; // SA-1[0]
|
|
69 unsigned int *cumulativeFreq; // cumulative frequency
|
|
70 unsigned int *bwtCode; // BWT code
|
|
71 unsigned int *occValue; // Occurrence values stored explicitly
|
|
72 unsigned int *occValueMajor; // Occurrence values stored explicitly
|
|
73 unsigned int *saValue; // SA values stored explicitly
|
|
74 unsigned int *inverseSa; // Inverse SA stored explicitly
|
|
75 SaIndexRange *saIndexRange; // SA index range
|
|
76 int saIndexRangeNumOfChar; // Number of characters indexed in SA index range
|
|
77 unsigned int *saValueOnBoundary; // Pre-calculated frequently referred data
|
|
78 unsigned int *decodeTable; // For decoding BWT by table lookup
|
|
79 unsigned int decodeTableGenerated; // == TRUE if decode table is generated on load and will be freed
|
|
80 unsigned int bwtSizeInWord; // Temporary variable to hold the memory allocated
|
|
81 unsigned int occSizeInWord; // Temporary variable to hold the memory allocated
|
|
82 unsigned int occMajorSizeInWord; // Temporary variable to hold the memory allocated
|
|
83 unsigned int saValueSize; // Temporary variable to hold the memory allocated
|
|
84 unsigned int inverseSaSize; // Temporary variable to hold the memory allocated
|
|
85 unsigned int saIndexRangeSize; // Temporary variable to hold the memory allocated
|
|
86 } BWT;
|
|
87
|
|
88 typedef struct BWTInc {
|
|
89 BWT *bwt;
|
|
90 unsigned int numberOfIterationDone;
|
|
91 unsigned int *cumulativeCountInCurrentBuild;
|
|
92 unsigned int availableWord;
|
|
93 unsigned int targetTextLength;
|
|
94 float targetNBit;
|
|
95 unsigned int buildSize;
|
|
96 unsigned int initialMaxBuildSize;
|
|
97 unsigned int incMaxBuildSize;
|
|
98 unsigned int firstCharInLastIteration;
|
|
99 unsigned int *workingMemory;
|
|
100 unsigned int *packedText;
|
|
101 unsigned char *textBuffer;
|
|
102 unsigned int *packedShift;
|
|
103 } BWTInc;
|
|
104
|
|
105 #endif
|