annotate bwa-0.6.2/bwt_gen.c @ 2:a294fbfcb1db draft default tip

Uploaded BWA
author ashvark
date Fri, 18 Jul 2014 07:55:59 -0400
parents dd1186b11b3b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1 /*
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
2
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
3 BWTConstruct.c BWT-Index Construction
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
4
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
5 This module constructs BWT and auxiliary data structures.
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
6
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
7 Copyright (C) 2004, Wong Chi Kwong.
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
8
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
9 This program is free software; you can redistribute it and/or
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
10 modify it under the terms of the GNU General Public License
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
11 as published by the Free Software Foundation; either version 2
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
12 of the License, or (at your option) any later version.
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
13
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
14 This program is distributed in the hope that it will be useful,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
17 GNU General Public License for more details.
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
18
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
19 You should have received a copy of the GNU General Public License
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
20 along with this program; if not, write to the Free Software
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
21 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
22
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
23 */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
24
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
25 #include <stdio.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
26 #include <stdlib.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
27 #include <string.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
28 #include <assert.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
29 #include <stdint.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
30 #include "QSufSort.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
31
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
32 typedef uint64_t bgint_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
33 typedef int64_t sbgint_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
34
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
35 #define ALPHABET_SIZE 4
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
36 #define BIT_PER_CHAR 2
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
37 #define CHAR_PER_WORD 16
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
38 #define CHAR_PER_BYTE 4
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
39
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
40 #define BITS_IN_WORD 32
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
41 #define BITS_IN_BYTE 8
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
42 #define BYTES_IN_WORD 4
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
43
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
44 #define ALL_ONE_MASK 0xFFFFFFFF
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
45 #define DNA_OCC_CNT_TABLE_SIZE_IN_WORD 65536
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
46
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
47 #define BITS_PER_OCC_VALUE 16
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
48 #define OCC_VALUE_PER_WORD 2
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
49 #define OCC_INTERVAL 256
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
50 #define OCC_INTERVAL_MAJOR 65536
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
51
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
52 #define TRUE 1
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
53 #define FALSE 0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
54
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
55 #define BWTINC_INSERT_SORT_NUM_ITEM 7
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
56
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
57 #define MIN_AVAILABLE_WORD 0x10000
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
58
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
59 #define average(value1, value2) ( ((value1) & (value2)) + ((value1) ^ (value2)) / 2 )
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
60 #define min(value1, value2) ( ((value1) < (value2)) ? (value1) : (value2) )
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
61 #define max(value1, value2) ( ((value1) > (value2)) ? (value1) : (value2) )
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
62 #define med3(a, b, c) ( a<b ? (b<c ? b : a<c ? c : a) : (b>c ? b : a>c ? c : a))
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
63 #define swap(a, b, t); t = a; a = b; b = t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
64 #define truncateLeft(value, offset) ( (value) << (offset) >> (offset) )
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
65 #define truncateRight(value, offset) ( (value) >> (offset) << (offset) )
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
66 #define DNA_OCC_SUM_EXCEPTION(sum) ((sum & 0xfefefeff) == 0)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
67
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
68 typedef struct BWT {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
69 bgint_t textLength; // length of the text
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
70 bgint_t inverseSa0; // SA-1[0]
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
71 bgint_t *cumulativeFreq; // cumulative frequency
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
72 unsigned int *bwtCode; // BWT code
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
73 unsigned int *occValue; // Occurrence values stored explicitly
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
74 bgint_t *occValueMajor; // Occurrence values stored explicitly
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
75 unsigned int *decodeTable; // For decoding BWT by table lookup
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
76 bgint_t bwtSizeInWord; // Temporary variable to hold the memory allocated
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
77 bgint_t occSizeInWord; // Temporary variable to hold the memory allocated
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
78 bgint_t occMajorSizeInWord; // Temporary variable to hold the memory allocated
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
79 } BWT;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
80
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
81 typedef struct BWTInc {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
82 BWT *bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
83 unsigned int numberOfIterationDone;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
84 bgint_t *cumulativeCountInCurrentBuild;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
85 bgint_t availableWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
86 bgint_t buildSize;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
87 bgint_t initialMaxBuildSize;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
88 bgint_t incMaxBuildSize;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
89 unsigned int firstCharInLastIteration;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
90 unsigned int *workingMemory;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
91 unsigned int *packedText;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
92 unsigned char *textBuffer;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
93 unsigned int *packedShift;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
94 } BWTInc;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
95
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
96 static bgint_t TextLengthFromBytePacked(bgint_t bytePackedLength, unsigned int bitPerChar,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
97 unsigned int lastByteLength)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
98 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
99 return (bytePackedLength - 1) * (BITS_IN_BYTE / bitPerChar) + lastByteLength;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
100 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
101
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
102 static void initializeVAL(unsigned int *startAddr, const bgint_t length, const unsigned int initValue)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
103 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
104 bgint_t i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
105 for (i=0; i<length; i++) startAddr[i] = initValue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
106 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
107
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
108 static void initializeVAL_bg(bgint_t *startAddr, const bgint_t length, const bgint_t initValue)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
109 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
110 bgint_t i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
111 for (i=0; i<length; i++) startAddr[i] = initValue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
112 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
113
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
114 static void GenerateDNAOccCountTable(unsigned int *dnaDecodeTable)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
115 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
116 unsigned int i, j, c, t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
117
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
118 for (i=0; i<DNA_OCC_CNT_TABLE_SIZE_IN_WORD; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
119 dnaDecodeTable[i] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
120 c = i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
121 for (j=0; j<8; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
122 t = c & 0x00000003;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
123 dnaDecodeTable[i] += 1 << (t * 8);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
124 c >>= 2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
125 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
126 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
127
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
128 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
129 // for BWTIncCreate()
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
130 static bgint_t BWTOccValueMajorSizeInWord(const bgint_t numChar)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
131 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
132 bgint_t numOfOccValue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
133 unsigned numOfOccIntervalPerMajor;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
134 numOfOccValue = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; // Value at both end for bi-directional encoding
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
135 numOfOccIntervalPerMajor = OCC_INTERVAL_MAJOR / OCC_INTERVAL;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
136 return (numOfOccValue + numOfOccIntervalPerMajor - 1) / numOfOccIntervalPerMajor * ALPHABET_SIZE;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
137 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
138 // for BWTIncCreate()
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
139 static bgint_t BWTOccValueMinorSizeInWord(const bgint_t numChar)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
140 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
141 bgint_t numOfOccValue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
142 numOfOccValue = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; // Value at both end for bi-directional encoding
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
143 return (numOfOccValue + OCC_VALUE_PER_WORD - 1) / OCC_VALUE_PER_WORD * ALPHABET_SIZE;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
144 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
145 // for BWTIncCreate()
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
146 static bgint_t BWTResidentSizeInWord(const bgint_t numChar) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
147
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
148 bgint_t numCharRoundUpToOccInterval;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
149
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
150 // The $ in BWT at the position of inverseSa0 is not encoded
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
151 numCharRoundUpToOccInterval = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL * OCC_INTERVAL;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
152
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
153 return (numCharRoundUpToOccInterval + CHAR_PER_WORD - 1) / CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
154
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
155 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
156
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
157 static void BWTIncSetBuildSizeAndTextAddr(BWTInc *bwtInc)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
158 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
159 bgint_t maxBuildSize;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
160
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
161 if (bwtInc->bwt->textLength == 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
162 // initial build
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
163 // Minus 2 because n+1 entries of seq and rank needed for n char
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
164 maxBuildSize = (bwtInc->availableWord - (2 + OCC_INTERVAL / CHAR_PER_WORD) * (sizeof(bgint_t) / 4))
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
165 / (2 * CHAR_PER_WORD + 1) * CHAR_PER_WORD / (sizeof(bgint_t) / 4);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
166 if (bwtInc->initialMaxBuildSize > 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
167 bwtInc->buildSize = min(bwtInc->initialMaxBuildSize, maxBuildSize);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
168 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
169 bwtInc->buildSize = maxBuildSize;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
170 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
171 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
172 // Minus 3 because n+1 entries of sorted rank, seq and rank needed for n char
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
173 // Minus numberOfIterationDone because bwt slightly shift to left in each iteration
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
174 maxBuildSize = (bwtInc->availableWord - bwtInc->bwt->bwtSizeInWord - bwtInc->bwt->occSizeInWord
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
175 - (3 + bwtInc->numberOfIterationDone * OCC_INTERVAL / BIT_PER_CHAR) * (sizeof(bgint_t) / 4))
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
176 / 3 / (sizeof(bgint_t) / 4);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
177 if (maxBuildSize < CHAR_PER_WORD) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
178 fprintf(stderr, "BWTIncSetBuildSizeAndTextAddr(): Not enough space allocated to continue construction!\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
179 exit(1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
180 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
181 if (bwtInc->incMaxBuildSize > 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
182 bwtInc->buildSize = min(bwtInc->incMaxBuildSize, maxBuildSize);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
183 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
184 bwtInc->buildSize = maxBuildSize;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
185 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
186 if (bwtInc->buildSize < CHAR_PER_WORD)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
187 bwtInc->buildSize = CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
188 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
189
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
190 if (bwtInc->buildSize < CHAR_PER_WORD) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
191 fprintf(stderr, "BWTIncSetBuildSizeAndTextAddr(): Not enough space allocated to continue construction!\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
192 exit(1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
193 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
194
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
195 bwtInc->buildSize = bwtInc->buildSize / CHAR_PER_WORD * CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
196
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
197 bwtInc->packedText = bwtInc->workingMemory + 2 * (bwtInc->buildSize + 1) * (sizeof(bgint_t) / 4);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
198 bwtInc->textBuffer = (unsigned char*)(bwtInc->workingMemory + (bwtInc->buildSize + 1) * (sizeof(bgint_t) / 4));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
199 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
200
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
201 // for ceilLog2()
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
202 unsigned int leadingZero(const unsigned int input)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
203 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
204 unsigned int l;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
205 const static unsigned int leadingZero8bit[256] = {8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
206 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
207 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
208 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
209 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
210 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
211 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
212 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
213
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
214 if (input & 0xFFFF0000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
215 if (input & 0xFF000000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
216 l = leadingZero8bit[input >> 24];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
217 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
218 l = 8 + leadingZero8bit[input >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
219 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
220 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
221 if (input & 0x0000FF00) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
222 l = 16 + leadingZero8bit[input >> 8];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
223 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
224 l = 24 + leadingZero8bit[input];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
225 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
226 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
227 return l;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
228
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
229 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
230 // for BitPerBytePackedChar()
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
231 static unsigned int ceilLog2(const unsigned int input)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
232 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
233 if (input <= 1) return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
234 return BITS_IN_WORD - leadingZero(input - 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
235
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
236 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
237 // for ConvertBytePackedToWordPacked()
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
238 static unsigned int BitPerBytePackedChar(const unsigned int alphabetSize)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
239 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
240 unsigned int bitPerChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
241 bitPerChar = ceilLog2(alphabetSize);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
242 // Return the largest number of bit that does not affect packing efficiency
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
243 if (BITS_IN_BYTE / (BITS_IN_BYTE / bitPerChar) > bitPerChar)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
244 bitPerChar = BITS_IN_BYTE / (BITS_IN_BYTE / bitPerChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
245 return bitPerChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
246 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
247 // for ConvertBytePackedToWordPacked()
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
248 static unsigned int BitPerWordPackedChar(const unsigned int alphabetSize)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
249 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
250 return ceilLog2(alphabetSize);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
251 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
252
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
253 static void ConvertBytePackedToWordPacked(const unsigned char *input, unsigned int *output, const unsigned int alphabetSize,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
254 const bgint_t textLength)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
255 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
256 bgint_t i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
257 unsigned int j, k, c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
258 unsigned int bitPerBytePackedChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
259 unsigned int bitPerWordPackedChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
260 unsigned int charPerWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
261 unsigned int charPerByte;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
262 unsigned int bytePerIteration;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
263 bgint_t byteProcessed = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
264 bgint_t wordProcessed = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
265 unsigned int mask, shift;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
266
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
267 unsigned int buffer[BITS_IN_WORD];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
268
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
269 bitPerBytePackedChar = BitPerBytePackedChar(alphabetSize);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
270 bitPerWordPackedChar = BitPerWordPackedChar(alphabetSize);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
271 charPerByte = BITS_IN_BYTE / bitPerBytePackedChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
272 charPerWord = BITS_IN_WORD / bitPerWordPackedChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
273
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
274 bytePerIteration = charPerWord / charPerByte;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
275 mask = truncateRight(ALL_ONE_MASK, BITS_IN_WORD - bitPerWordPackedChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
276 shift = BITS_IN_WORD - BITS_IN_BYTE + bitPerBytePackedChar - bitPerWordPackedChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
277
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
278 while ((wordProcessed + 1) * charPerWord < textLength) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
279
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
280 k = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
281 for (i=0; i<bytePerIteration; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
282 c = (unsigned int)input[byteProcessed] << shift;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
283 for (j=0; j<charPerByte; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
284 buffer[k] = c & mask;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
285 c <<= bitPerBytePackedChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
286 k++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
287 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
288 byteProcessed++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
289 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
290
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
291 c = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
292 for (i=0; i<charPerWord; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
293 c |= buffer[i] >> bitPerWordPackedChar * i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
294 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
295 output[wordProcessed] = c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
296 wordProcessed++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
297
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
298 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
299
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
300 k = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
301 for (i=0; i < (textLength - wordProcessed * charPerWord - 1) / charPerByte + 1; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
302 c = (unsigned int)input[byteProcessed] << shift;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
303 for (j=0; j<charPerByte; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
304 buffer[k] = c & mask;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
305 c <<= bitPerBytePackedChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
306 k++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
307 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
308 byteProcessed++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
309 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
310
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
311 c = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
312 for (i=0; i<textLength - wordProcessed * charPerWord; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
313 c |= buffer[i] >> bitPerWordPackedChar * i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
314 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
315 output[wordProcessed] = c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
316 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
317
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
318 BWT *BWTCreate(const bgint_t textLength, unsigned int *decodeTable)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
319 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
320 BWT *bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
321
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
322 bwt = (BWT*)calloc(1, sizeof(BWT));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
323
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
324 bwt->textLength = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
325
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
326 bwt->cumulativeFreq = (bgint_t*)calloc((ALPHABET_SIZE + 1), sizeof(bgint_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
327 initializeVAL_bg(bwt->cumulativeFreq, ALPHABET_SIZE + 1, 0);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
328
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
329 bwt->bwtSizeInWord = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
330
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
331 // Generate decode tables
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
332 if (decodeTable == NULL) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
333 bwt->decodeTable = (unsigned*)calloc(DNA_OCC_CNT_TABLE_SIZE_IN_WORD, sizeof(unsigned int));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
334 GenerateDNAOccCountTable(bwt->decodeTable);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
335 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
336 bwt->decodeTable = decodeTable;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
337 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
338
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
339 bwt->occMajorSizeInWord = BWTOccValueMajorSizeInWord(textLength);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
340 bwt->occValueMajor = (bgint_t*)calloc(bwt->occMajorSizeInWord, sizeof(bgint_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
341
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
342 bwt->occSizeInWord = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
343 bwt->occValue = NULL;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
344
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
345 return bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
346 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
347
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
348 BWTInc *BWTIncCreate(const bgint_t textLength, unsigned int initialMaxBuildSize, unsigned int incMaxBuildSize)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
349 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
350 BWTInc *bwtInc;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
351 unsigned int i, n_iter;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
352
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
353 if (textLength < incMaxBuildSize) incMaxBuildSize = textLength;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
354 if (textLength < initialMaxBuildSize) initialMaxBuildSize = textLength;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
355
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
356 bwtInc = (BWTInc*)calloc(1, sizeof(BWTInc));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
357 bwtInc->numberOfIterationDone = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
358 bwtInc->bwt = BWTCreate(textLength, NULL);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
359 bwtInc->initialMaxBuildSize = initialMaxBuildSize;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
360 bwtInc->incMaxBuildSize = incMaxBuildSize;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
361 bwtInc->cumulativeCountInCurrentBuild = (bgint_t*)calloc((ALPHABET_SIZE + 1), sizeof(bgint_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
362 initializeVAL_bg(bwtInc->cumulativeCountInCurrentBuild, ALPHABET_SIZE + 1, 0);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
363
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
364 // Build frequently accessed data
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
365 bwtInc->packedShift = (unsigned*)calloc(CHAR_PER_WORD, sizeof(unsigned int));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
366 for (i=0; i<CHAR_PER_WORD; i++)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
367 bwtInc->packedShift[i] = BITS_IN_WORD - (i+1) * BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
368
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
369 n_iter = (textLength - initialMaxBuildSize) / incMaxBuildSize + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
370 bwtInc->availableWord = BWTResidentSizeInWord(textLength) + BWTOccValueMinorSizeInWord(textLength) // minimal memory requirement
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
371 + OCC_INTERVAL / BIT_PER_CHAR * n_iter * 2 * (sizeof(bgint_t) / 4) // buffer at the end of occ array
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
372 + incMaxBuildSize/5 * 3 * (sizeof(bgint_t) / 4); // space for the 3 temporary arrays in each iteration
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
373 if (bwtInc->availableWord < MIN_AVAILABLE_WORD) bwtInc->availableWord = MIN_AVAILABLE_WORD; // lh3: otherwise segfaul when availableWord is too small
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
374 fprintf(stderr, "[%s] textLength=%ld, availableWord=%ld\n", __func__, (long)textLength, (long)bwtInc->availableWord);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
375 bwtInc->workingMemory = (unsigned*)calloc(bwtInc->availableWord, BYTES_IN_WORD);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
376
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
377 return bwtInc;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
378 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
379 // for BWTIncConstruct()
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
380 static void BWTIncPutPackedTextToRank(const unsigned int *packedText, bgint_t* __restrict rank,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
381 bgint_t* __restrict cumulativeCount, const bgint_t numChar)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
382 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
383 bgint_t i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
384 unsigned int j;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
385 unsigned int c, t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
386 unsigned int packedMask;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
387 bgint_t rankIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
388 bgint_t lastWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
389 unsigned int numCharInLastWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
390
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
391 lastWord = (numChar - 1) / CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
392 numCharInLastWord = numChar - lastWord * CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
393
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
394 packedMask = ALL_ONE_MASK >> (BITS_IN_WORD - BIT_PER_CHAR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
395 rankIndex = numChar - 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
396
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
397 t = packedText[lastWord] >> (BITS_IN_WORD - numCharInLastWord * BIT_PER_CHAR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
398 for (i=0; i<numCharInLastWord; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
399 c = t & packedMask;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
400 cumulativeCount[c+1]++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
401 rank[rankIndex] = c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
402 rankIndex--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
403 t >>= BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
404 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
405
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
406 for (i=lastWord; i--;) { // loop from lastWord - 1 to 0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
407 t = packedText[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
408 for (j=0; j<CHAR_PER_WORD; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
409 c = t & packedMask;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
410 cumulativeCount[c+1]++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
411 rank[rankIndex] = c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
412 rankIndex--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
413 t >>= BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
414 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
415 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
416
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
417 // Convert occurrence to cumulativeCount
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
418 cumulativeCount[2] += cumulativeCount[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
419 cumulativeCount[3] += cumulativeCount[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
420 cumulativeCount[4] += cumulativeCount[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
421 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
422
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
423
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
424 static void ForwardDNAAllOccCountNoLimit(const unsigned int* dna, const bgint_t index,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
425 bgint_t* __restrict occCount, const unsigned int* dnaDecodeTable)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
426 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
427 static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
428 0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
429 0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
430 0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
431
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
432 bgint_t iteration, i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
433 unsigned int wordToCount, charToCount;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
434 unsigned int j, c, sum;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
435
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
436 occCount[0] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
437 occCount[1] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
438 occCount[2] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
439 occCount[3] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
440
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
441 iteration = index / 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
442 wordToCount = (index - iteration * 256) / 16;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
443 charToCount = index - iteration * 256 - wordToCount * 16;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
444
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
445 for (i=0; i<iteration; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
446
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
447 sum = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
448 for (j=0; j<16; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
449 sum += dnaDecodeTable[*dna >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
450 sum += dnaDecodeTable[*dna & 0x0000FFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
451 dna++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
452 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
453 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
454 occCount[0] += sum & 0x000000FF; sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
455 occCount[1] += sum & 0x000000FF; sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
456 occCount[2] += sum & 0x000000FF; sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
457 occCount[3] += sum;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
458 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
459 // only some or all of the 3 bits are on
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
460 // in reality, only one of the four cases are possible
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
461 if (sum == 0x00000100) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
462 occCount[0] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
463 } else if (sum == 0x00010000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
464 occCount[1] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
465 } else if (sum == 0x01000000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
466 occCount[2] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
467 } else if (sum == 0x00000000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
468 occCount[3] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
469 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
470 fprintf(stderr, "ForwardDNAAllOccCountNoLimit(): DNA occ sum exception!\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
471 exit(1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
472 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
473 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
474
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
475 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
476
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
477 sum = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
478 for (j=0; j<wordToCount; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
479 sum += dnaDecodeTable[*dna >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
480 sum += dnaDecodeTable[*dna & 0x0000FFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
481 dna++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
482 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
483
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
484 if (charToCount > 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
485 c = *dna & truncateRightMask[charToCount]; // increase count of 'a' by 16 - c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
486 sum += dnaDecodeTable[c >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
487 sum += dnaDecodeTable[c & 0xFFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
488 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
489 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
490
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
491 occCount[0] += sum & 0x000000FF; sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
492 occCount[1] += sum & 0x000000FF; sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
493 occCount[2] += sum & 0x000000FF; sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
494 occCount[3] += sum;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
495 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
496
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
497 static void BWTIncBuildPackedBwt(const bgint_t *relativeRank, unsigned int* __restrict bwt, const bgint_t numChar,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
498 const bgint_t *cumulativeCount, const unsigned int *packedShift) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
499
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
500 bgint_t i, r;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
501 unsigned int c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
502 bgint_t previousRank, currentRank;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
503 bgint_t wordIndex, charIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
504 bgint_t inverseSa0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
505
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
506 inverseSa0 = previousRank = relativeRank[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
507
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
508 for (i=1; i<=numChar; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
509 currentRank = relativeRank[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
510 // previousRank > cumulativeCount[c] because $ is one of the char
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
511 c = (previousRank > cumulativeCount[1]) + (previousRank > cumulativeCount[2])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
512 + (previousRank > cumulativeCount[3]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
513 // set bwt for currentRank
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
514 if (c > 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
515 // c <> 'a'
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
516 r = currentRank;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
517 if (r > inverseSa0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
518 // - 1 because $ at inverseSa0 is not encoded
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
519 r--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
520 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
521 wordIndex = r / CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
522 charIndex = r - wordIndex * CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
523 bwt[wordIndex] |= c << packedShift[charIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
524 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
525 previousRank = currentRank;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
526 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
527 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
528
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
529 static inline bgint_t BWTOccValueExplicit(const BWT *bwt, const bgint_t occIndexExplicit,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
530 const unsigned int character)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
531 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
532 bgint_t occIndexMajor;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
533
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
534 occIndexMajor = occIndexExplicit * OCC_INTERVAL / OCC_INTERVAL_MAJOR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
535
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
536 if (occIndexExplicit % OCC_VALUE_PER_WORD == 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
537 return bwt->occValueMajor[occIndexMajor * ALPHABET_SIZE + character] +
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
538 (bwt->occValue[occIndexExplicit / OCC_VALUE_PER_WORD * ALPHABET_SIZE + character] >> 16);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
539
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
540 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
541 return bwt->occValueMajor[occIndexMajor * ALPHABET_SIZE + character] +
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
542 (bwt->occValue[occIndexExplicit / OCC_VALUE_PER_WORD * ALPHABET_SIZE + character] & 0x0000FFFF);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
543 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
544 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
545
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
546
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
547 static unsigned int ForwardDNAOccCount(const unsigned int* dna, const unsigned int index, const unsigned int character,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
548 const unsigned int* dnaDecodeTable)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
549 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
550 static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
551 0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
552 0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
553 0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
554
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
555 unsigned int wordToCount, charToCount;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
556 unsigned int i, c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
557 unsigned int sum = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
558
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
559 wordToCount = index / 16;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
560 charToCount = index - wordToCount * 16;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
561
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
562 for (i=0; i<wordToCount; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
563 sum += dnaDecodeTable[dna[i] >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
564 sum += dnaDecodeTable[dna[i] & 0x0000FFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
565 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
566
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
567 if (charToCount > 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
568 c = dna[i] & truncateRightMask[charToCount]; // increase count of 'a' by 16 - c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
569 sum += dnaDecodeTable[c >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
570 sum += dnaDecodeTable[c & 0xFFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
571 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
572 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
573
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
574 return (sum >> (character * 8)) & 0x000000FF;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
575
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
576 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
577
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
578 static unsigned int BackwardDNAOccCount(const unsigned int* dna, const unsigned int index, const unsigned int character,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
579 const unsigned int* dnaDecodeTable)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
580 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
581 static const unsigned int truncateLeftMask[16] = { 0x00000000, 0x00000003, 0x0000000F, 0x0000003F,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
582 0x000000FF, 0x000003FF, 0x00000FFF, 0x00003FFF,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
583 0x0000FFFF, 0x0003FFFF, 0x000FFFFF, 0x003FFFFF,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
584 0x00FFFFFF, 0x03FFFFFF, 0x0FFFFFFF, 0x3FFFFFFF };
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
585
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
586 unsigned int wordToCount, charToCount;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
587 unsigned int i, c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
588 unsigned int sum = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
589
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
590 wordToCount = index / 16;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
591 charToCount = index - wordToCount * 16;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
592
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
593 dna -= wordToCount + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
594
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
595 if (charToCount > 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
596 c = *dna & truncateLeftMask[charToCount]; // increase count of 'a' by 16 - c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
597 sum += dnaDecodeTable[c >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
598 sum += dnaDecodeTable[c & 0xFFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
599 sum += charToCount - 16; // decrease count of 'a' by 16 - positionToProcess
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
600 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
601
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
602 for (i=0; i<wordToCount; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
603 dna++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
604 sum += dnaDecodeTable[*dna >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
605 sum += dnaDecodeTable[*dna & 0x0000FFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
606 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
607
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
608 return (sum >> (character * 8)) & 0x000000FF;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
609
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
610 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
611
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
612 bgint_t BWTOccValue(const BWT *bwt, bgint_t index, const unsigned int character)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
613 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
614 bgint_t occValue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
615 bgint_t occExplicitIndex, occIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
616
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
617 // $ is supposed to be positioned at inverseSa0 but it is not encoded
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
618 // therefore index is subtracted by 1 for adjustment
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
619 if (index > bwt->inverseSa0)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
620 index--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
621
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
622 occExplicitIndex = (index + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL; // Bidirectional encoding
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
623 occIndex = occExplicitIndex * OCC_INTERVAL;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
624 occValue = BWTOccValueExplicit(bwt, occExplicitIndex, character);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
625
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
626 if (occIndex == index)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
627 return occValue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
628
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
629 if (occIndex < index) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
630 return occValue + ForwardDNAOccCount(bwt->bwtCode + occIndex / CHAR_PER_WORD, index - occIndex, character, bwt->decodeTable);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
631 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
632 return occValue - BackwardDNAOccCount(bwt->bwtCode + occIndex / CHAR_PER_WORD, occIndex - index, character, bwt->decodeTable);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
633 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
634 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
635
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
636 static bgint_t BWTIncGetAbsoluteRank(BWT *bwt, bgint_t* __restrict absoluteRank, bgint_t* __restrict seq,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
637 const unsigned int *packedText, const bgint_t numChar,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
638 const bgint_t* cumulativeCount, const unsigned int firstCharInLastIteration)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
639 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
640 bgint_t saIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
641 bgint_t lastWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
642 unsigned int packedMask;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
643 bgint_t i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
644 unsigned int c, t, j;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
645 bgint_t rankIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
646 unsigned int shift;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
647 bgint_t seqIndexFromStart[ALPHABET_SIZE];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
648 bgint_t seqIndexFromEnd[ALPHABET_SIZE];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
649
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
650 for (i=0; i<ALPHABET_SIZE; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
651 seqIndexFromStart[i] = cumulativeCount[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
652 seqIndexFromEnd[i] = cumulativeCount[i+1] - 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
653 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
654
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
655 shift = BITS_IN_WORD - BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
656 packedMask = ALL_ONE_MASK >> shift;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
657 saIndex = bwt->inverseSa0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
658 rankIndex = numChar - 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
659
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
660 lastWord = numChar / CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
661 for (i=lastWord; i--;) { // loop from lastWord - 1 to 0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
662 t = packedText[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
663 for (j=0; j<CHAR_PER_WORD; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
664 c = t & packedMask;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
665 saIndex = bwt->cumulativeFreq[c] + BWTOccValue(bwt, saIndex, c) + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
666 // A counting sort using the first character of suffix is done here
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
667 // If rank > inverseSa0 -> fill seq from end, otherwise fill seq from start -> to leave the right entry for inverseSa0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
668 if (saIndex > bwt->inverseSa0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
669 seq[seqIndexFromEnd[c]] = rankIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
670 absoluteRank[seqIndexFromEnd[c]] = saIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
671 seqIndexFromEnd[c]--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
672 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
673 seq[seqIndexFromStart[c]] = rankIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
674 absoluteRank[seqIndexFromStart[c]] = saIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
675 seqIndexFromStart[c]++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
676 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
677 rankIndex--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
678 t >>= BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
679 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
680 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
681
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
682 absoluteRank[seqIndexFromStart[firstCharInLastIteration]] = bwt->inverseSa0; // representing the substring of all preceding characters
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
683 seq[seqIndexFromStart[firstCharInLastIteration]] = numChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
684
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
685 return seqIndexFromStart[firstCharInLastIteration];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
686 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
687
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
688 static void BWTIncSortKey(bgint_t* __restrict key, bgint_t* __restrict seq, const bgint_t numItem)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
689 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
690 #define EQUAL_KEY_THRESHOLD 4 // Partition for equal key if data array size / the number of data with equal value with pivot < EQUAL_KEY_THRESHOLD
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
691
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
692 int64_t lowIndex, highIndex, midIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
693 int64_t lowPartitionIndex, highPartitionIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
694 int64_t lowStack[32], highStack[32];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
695 int stackDepth;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
696 int64_t i, j;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
697 bgint_t tempSeq, tempKey;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
698 int64_t numberOfEqualKey;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
699
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
700 if (numItem < 2) return;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
701
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
702 stackDepth = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
703
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
704 lowIndex = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
705 highIndex = numItem - 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
706
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
707 for (;;) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
708
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
709 for (;;) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
710
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
711 // Sort small array of data
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
712 if (highIndex - lowIndex < BWTINC_INSERT_SORT_NUM_ITEM) { // Insertion sort on smallest arrays
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
713 for (i=lowIndex+1; i<=highIndex; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
714 tempSeq = seq[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
715 tempKey = key[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
716 for (j = i; j > lowIndex && key[j-1] > tempKey; j--) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
717 seq[j] = seq[j-1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
718 key[j] = key[j-1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
719 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
720 if (j != i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
721 seq[j] = tempSeq;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
722 key[j] = tempKey;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
723 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
724 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
725 break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
726 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
727
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
728 // Choose pivot as median of the lowest, middle, and highest data; sort the three data
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
729
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
730 midIndex = average(lowIndex, highIndex);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
731 if (key[lowIndex] > key[midIndex]) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
732 tempSeq = seq[lowIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
733 tempKey = key[lowIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
734 seq[lowIndex] = seq[midIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
735 key[lowIndex] = key[midIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
736 seq[midIndex] = tempSeq;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
737 key[midIndex] = tempKey;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
738 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
739 if (key[lowIndex] > key[highIndex]) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
740 tempSeq = seq[lowIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
741 tempKey = key[lowIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
742 seq[lowIndex] = seq[highIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
743 key[lowIndex] = key[highIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
744 seq[highIndex] = tempSeq;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
745 key[highIndex] = tempKey;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
746 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
747 if (key[midIndex] > key[highIndex]) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
748 tempSeq = seq[midIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
749 tempKey = key[midIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
750 seq[midIndex] = seq[highIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
751 key[midIndex] = key[highIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
752 seq[highIndex] = tempSeq;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
753 key[highIndex] = tempKey;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
754 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
755
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
756 // Partition data
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
757
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
758 numberOfEqualKey = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
759
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
760 lowPartitionIndex = lowIndex + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
761 highPartitionIndex = highIndex - 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
762
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
763 for (;;) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
764 while (lowPartitionIndex <= highPartitionIndex && key[lowPartitionIndex] <= key[midIndex]) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
765 numberOfEqualKey += (key[lowPartitionIndex] == key[midIndex]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
766 lowPartitionIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
767 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
768 while (lowPartitionIndex < highPartitionIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
769 if (key[midIndex] >= key[highPartitionIndex]) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
770 numberOfEqualKey += (key[midIndex] == key[highPartitionIndex]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
771 break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
772 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
773 highPartitionIndex--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
774 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
775 if (lowPartitionIndex >= highPartitionIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
776 break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
777 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
778 tempSeq = seq[lowPartitionIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
779 tempKey = key[lowPartitionIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
780 seq[lowPartitionIndex] = seq[highPartitionIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
781 key[lowPartitionIndex] = key[highPartitionIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
782 seq[highPartitionIndex] = tempSeq;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
783 key[highPartitionIndex] = tempKey;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
784 if (highPartitionIndex == midIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
785 // partition key has been moved
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
786 midIndex = lowPartitionIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
787 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
788 lowPartitionIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
789 highPartitionIndex--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
790 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
791
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
792 // Adjust the partition index
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
793 highPartitionIndex = lowPartitionIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
794 lowPartitionIndex--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
795
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
796 // move the partition key to end of low partition
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
797 tempSeq = seq[midIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
798 tempKey = key[midIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
799 seq[midIndex] = seq[lowPartitionIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
800 key[midIndex] = key[lowPartitionIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
801 seq[lowPartitionIndex] = tempSeq;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
802 key[lowPartitionIndex] = tempKey;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
803
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
804 if (highIndex - lowIndex + BWTINC_INSERT_SORT_NUM_ITEM <= EQUAL_KEY_THRESHOLD * numberOfEqualKey) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
805
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
806 // Many keys = partition key; separate the equal key data from the lower partition
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
807
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
808 midIndex = lowIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
809
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
810 for (;;) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
811 while (midIndex < lowPartitionIndex && key[midIndex] < key[lowPartitionIndex]) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
812 midIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
813 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
814 while (midIndex < lowPartitionIndex && key[lowPartitionIndex] == key[lowPartitionIndex - 1]) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
815 lowPartitionIndex--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
816 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
817 if (midIndex >= lowPartitionIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
818 break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
819 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
820 tempSeq = seq[midIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
821 tempKey = key[midIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
822 seq[midIndex] = seq[lowPartitionIndex - 1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
823 key[midIndex] = key[lowPartitionIndex - 1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
824 seq[lowPartitionIndex - 1] = tempSeq;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
825 key[lowPartitionIndex - 1] = tempKey;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
826 midIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
827 lowPartitionIndex--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
828 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
829
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
830 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
831
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
832 if (lowPartitionIndex - lowIndex > highIndex - highPartitionIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
833 // put the larger partition to stack
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
834 lowStack[stackDepth] = lowIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
835 highStack[stackDepth] = lowPartitionIndex - 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
836 stackDepth++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
837 // sort the smaller partition first
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
838 lowIndex = highPartitionIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
839 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
840 // put the larger partition to stack
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
841 lowStack[stackDepth] = highPartitionIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
842 highStack[stackDepth] = highIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
843 stackDepth++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
844 // sort the smaller partition first
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
845 if (lowPartitionIndex > lowIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
846 highIndex = lowPartitionIndex - 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
847 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
848 // all keys in the partition equals to the partition key
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
849 break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
850 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
851 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
852 continue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
853 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
854
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
855 // Pop a range from stack
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
856 if (stackDepth > 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
857 stackDepth--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
858 lowIndex = lowStack[stackDepth];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
859 highIndex = highStack[stackDepth];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
860 continue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
861 } else return;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
862 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
863 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
864
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
865
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
866 static void BWTIncBuildRelativeRank(bgint_t* __restrict sortedRank, bgint_t* __restrict seq,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
867 bgint_t* __restrict relativeRank, const bgint_t numItem,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
868 bgint_t oldInverseSa0, const bgint_t *cumulativeCount)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
869 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
870 bgint_t i, c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
871 bgint_t s, r;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
872 bgint_t lastRank, lastIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
873 bgint_t oldInverseSa0RelativeRank = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
874 bgint_t freq;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
875
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
876 lastIndex = numItem;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
877 lastRank = sortedRank[numItem];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
878 if (lastRank > oldInverseSa0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
879 sortedRank[numItem]--; // to prepare for merging; $ is not encoded in bwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
880 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
881 s = seq[numItem];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
882 relativeRank[s] = numItem;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
883 if (lastRank == oldInverseSa0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
884 oldInverseSa0RelativeRank = numItem;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
885 oldInverseSa0++; // so that this segment of code is not run again
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
886 lastRank++; // so that oldInverseSa0 become a sorted group with 1 item
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
887 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
888
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
889 c = ALPHABET_SIZE - 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
890 freq = cumulativeCount[c];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
891
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
892 for (i=numItem; i--;) { // from numItem - 1 to 0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
893 r = sortedRank[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
894 if (r > oldInverseSa0)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
895 sortedRank[i]--; // to prepare for merging; $ is not encoded in bwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
896 s = seq[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
897 if (i < freq) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
898 if (lastIndex >= freq)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
899 lastRank++; // to trigger the group across alphabet boundary to be split
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
900 c--;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
901 freq = cumulativeCount[c];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
902 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
903 if (r == lastRank) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
904 relativeRank[s] = lastIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
905 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
906 if (i == lastIndex - 1) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
907 if (lastIndex < numItem && (sbgint_t)seq[lastIndex + 1] < 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
908 seq[lastIndex] = seq[lastIndex + 1] - 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
909 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
910 seq[lastIndex] = (bgint_t)-1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
911 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
912 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
913 lastIndex = i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
914 lastRank = r;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
915 relativeRank[s] = i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
916 if (r == oldInverseSa0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
917 oldInverseSa0RelativeRank = i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
918 oldInverseSa0++; // so that this segment of code is not run again
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
919 lastRank++; // so that oldInverseSa0 become a sorted group with 1 item
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
920 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
921 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
922 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
923
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
924 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
925
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
926 static void BWTIncBuildBwt(unsigned int* insertBwt, const bgint_t *relativeRank, const bgint_t numChar,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
927 const bgint_t *cumulativeCount)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
928 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
929 unsigned int c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
930 bgint_t i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
931 bgint_t previousRank, currentRank;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
932
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
933 previousRank = relativeRank[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
934
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
935 for (i=1; i<=numChar; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
936 currentRank = relativeRank[i];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
937 c = (previousRank >= cumulativeCount[1]) + (previousRank >= cumulativeCount[2])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
938 + (previousRank >= cumulativeCount[3]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
939 insertBwt[currentRank] = c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
940 previousRank = currentRank;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
941 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
942 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
943
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
944 static void BWTIncMergeBwt(const bgint_t *sortedRank, const unsigned int* oldBwt, const unsigned int *insertBwt,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
945 unsigned int* __restrict mergedBwt, const bgint_t numOldBwt, const bgint_t numInsertBwt)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
946 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
947 unsigned int bitsInWordMinusBitPerChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
948 bgint_t leftShift, rightShift;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
949 bgint_t o;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
950 bgint_t oIndex, iIndex, mIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
951 bgint_t mWord, mChar, oWord, oChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
952 bgint_t numInsert;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
953
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
954 bitsInWordMinusBitPerChar = BITS_IN_WORD - BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
955
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
956 oIndex = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
957 iIndex = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
958 mIndex = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
959
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
960 mWord = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
961 mChar = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
962
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
963 mergedBwt[0] = 0; // this can be cleared as merged Bwt slightly shift to the left in each iteration
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
964
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
965 while (oIndex < numOldBwt) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
966
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
967 // copy from insertBwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
968 while (iIndex <= numInsertBwt && sortedRank[iIndex] <= oIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
969 if (sortedRank[iIndex] != 0) { // special value to indicate that this is for new inverseSa0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
970 mergedBwt[mWord] |= insertBwt[iIndex] << (BITS_IN_WORD - (mChar + 1) * BIT_PER_CHAR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
971 mIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
972 mChar++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
973 if (mChar == CHAR_PER_WORD) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
974 mChar = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
975 mWord++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
976 mergedBwt[mWord] = 0; // no need to worry about crossing mergedBwt boundary
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
977 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
978 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
979 iIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
980 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
981
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
982 // Copy from oldBwt to mergedBwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
983 if (iIndex <= numInsertBwt) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
984 o = sortedRank[iIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
985 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
986 o = numOldBwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
987 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
988 numInsert = o - oIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
989
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
990 oWord = oIndex / CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
991 oChar = oIndex - oWord * CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
992 if (oChar > mChar) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
993 leftShift = (oChar - mChar) * BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
994 rightShift = (CHAR_PER_WORD + mChar - oChar) * BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
995 mergedBwt[mWord] = mergedBwt[mWord]
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
996 | (oldBwt[oWord] << (oChar * BIT_PER_CHAR) >> (mChar * BIT_PER_CHAR))
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
997 | (oldBwt[oWord+1] >> rightShift);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
998 oIndex += min(numInsert, CHAR_PER_WORD - mChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
999 while (o > oIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1000 oWord++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1001 mWord++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1002 mergedBwt[mWord] = (oldBwt[oWord] << leftShift) | (oldBwt[oWord+1] >> rightShift);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1003 oIndex += CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1004 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1005 } else if (oChar < mChar) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1006 rightShift = (mChar - oChar) * BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1007 leftShift = (CHAR_PER_WORD + oChar - mChar) * BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1008 mergedBwt[mWord] = mergedBwt[mWord]
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1009 | (oldBwt[oWord] << (oChar * BIT_PER_CHAR) >> (mChar * BIT_PER_CHAR));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1010 oIndex += min(numInsert, CHAR_PER_WORD - mChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1011 while (o > oIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1012 oWord++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1013 mWord++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1014 mergedBwt[mWord] = (oldBwt[oWord-1] << leftShift) | (oldBwt[oWord] >> rightShift);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1015 oIndex += CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1016 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1017 } else { // oChar == mChar
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1018 mergedBwt[mWord] = mergedBwt[mWord] | truncateLeft(oldBwt[oWord], mChar * BIT_PER_CHAR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1019 oIndex += min(numInsert, CHAR_PER_WORD - mChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1020 while (o > oIndex) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1021 oWord++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1022 mWord++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1023 mergedBwt[mWord] = oldBwt[oWord];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1024 oIndex += CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1025 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1026 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1027 oIndex = o;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1028 mIndex += numInsert;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1029
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1030 // Clear the trailing garbage in mergedBwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1031 mWord = mIndex / CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1032 mChar = mIndex - mWord * CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1033 if (mChar == 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1034 mergedBwt[mWord] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1035 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1036 mergedBwt[mWord] = truncateRight(mergedBwt[mWord], (BITS_IN_WORD - mChar * BIT_PER_CHAR));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1037 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1038
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1039 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1040
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1041 // copy from insertBwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1042 while (iIndex <= numInsertBwt) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1043 if (sortedRank[iIndex] != 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1044 mergedBwt[mWord] |= insertBwt[iIndex] << (BITS_IN_WORD - (mChar + 1) * BIT_PER_CHAR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1045 mIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1046 mChar++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1047 if (mChar == CHAR_PER_WORD) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1048 mChar = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1049 mWord++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1050 mergedBwt[mWord] = 0; // no need to worry about crossing mergedBwt boundary
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1051 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1052 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1053 iIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1054 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1055 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1056
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1057 void BWTClearTrailingBwtCode(BWT *bwt)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1058 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1059 bgint_t bwtResidentSizeInWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1060 bgint_t wordIndex, offset;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1061 bgint_t i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1062
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1063 bwtResidentSizeInWord = BWTResidentSizeInWord(bwt->textLength);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1064
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1065 wordIndex = bwt->textLength / CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1066 offset = (bwt->textLength - wordIndex * CHAR_PER_WORD) * BIT_PER_CHAR;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1067 if (offset > 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1068 bwt->bwtCode[wordIndex] = truncateRight(bwt->bwtCode[wordIndex], BITS_IN_WORD - offset);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1069 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1070 if (wordIndex < bwtResidentSizeInWord) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1071 bwt->bwtCode[wordIndex] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1072 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1073 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1074
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1075 for (i=wordIndex+1; i<bwtResidentSizeInWord; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1076 bwt->bwtCode[i] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1077 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1078 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1079
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1080
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1081 void BWTGenerateOccValueFromBwt(const unsigned int* bwt, unsigned int* __restrict occValue,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1082 bgint_t* __restrict occValueMajor,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1083 const bgint_t textLength, const unsigned int* decodeTable)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1084 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1085 bgint_t numberOfOccValueMajor, numberOfOccValue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1086 unsigned int wordBetweenOccValue;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1087 bgint_t numberOfOccIntervalPerMajor;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1088 unsigned int c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1089 bgint_t i, j;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1090 bgint_t occMajorIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1091 bgint_t occIndex, bwtIndex;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1092 bgint_t sum; // perhaps unsigned is big enough
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1093 bgint_t tempOccValue0[ALPHABET_SIZE], tempOccValue1[ALPHABET_SIZE];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1094
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1095 wordBetweenOccValue = OCC_INTERVAL / CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1096
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1097 // Calculate occValue
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1098 numberOfOccValue = (textLength + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; // Value at both end for bi-directional encoding
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1099 numberOfOccIntervalPerMajor = OCC_INTERVAL_MAJOR / OCC_INTERVAL;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1100 numberOfOccValueMajor = (numberOfOccValue + numberOfOccIntervalPerMajor - 1) / numberOfOccIntervalPerMajor;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1101
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1102 tempOccValue0[0] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1103 tempOccValue0[1] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1104 tempOccValue0[2] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1105 tempOccValue0[3] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1106 occValueMajor[0] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1107 occValueMajor[1] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1108 occValueMajor[2] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1109 occValueMajor[3] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1110
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1111 occIndex = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1112 bwtIndex = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1113 for (occMajorIndex=1; occMajorIndex<numberOfOccValueMajor; occMajorIndex++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1114
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1115 for (i=0; i<numberOfOccIntervalPerMajor/2; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1116
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1117 sum = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1118 tempOccValue1[0] = tempOccValue0[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1119 tempOccValue1[1] = tempOccValue0[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1120 tempOccValue1[2] = tempOccValue0[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1121 tempOccValue1[3] = tempOccValue0[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1122
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1123 for (j=0; j<wordBetweenOccValue; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1124 c = bwt[bwtIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1125 sum += decodeTable[c >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1126 sum += decodeTable[c & 0x0000FFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1127 bwtIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1128 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1129 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1130 tempOccValue1[0] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1131 tempOccValue1[1] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1132 tempOccValue1[2] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1133 tempOccValue1[3] += sum;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1134 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1135 if (sum == 0x00000100) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1136 tempOccValue1[0] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1137 } else if (sum == 0x00010000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1138 tempOccValue1[1] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1139 } else if (sum == 0x01000000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1140 tempOccValue1[2] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1141 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1142 tempOccValue1[3] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1143 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1144 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1145 occValue[occIndex * 4 + 0] = (tempOccValue0[0] << 16) | tempOccValue1[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1146 occValue[occIndex * 4 + 1] = (tempOccValue0[1] << 16) | tempOccValue1[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1147 occValue[occIndex * 4 + 2] = (tempOccValue0[2] << 16) | tempOccValue1[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1148 occValue[occIndex * 4 + 3] = (tempOccValue0[3] << 16) | tempOccValue1[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1149 tempOccValue0[0] = tempOccValue1[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1150 tempOccValue0[1] = tempOccValue1[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1151 tempOccValue0[2] = tempOccValue1[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1152 tempOccValue0[3] = tempOccValue1[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1153 sum = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1154
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1155 occIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1156
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1157 for (j=0; j<wordBetweenOccValue; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1158 c = bwt[bwtIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1159 sum += decodeTable[c >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1160 sum += decodeTable[c & 0x0000FFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1161 bwtIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1162 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1163 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1164 tempOccValue0[0] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1165 tempOccValue0[1] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1166 tempOccValue0[2] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1167 tempOccValue0[3] += sum;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1168 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1169 if (sum == 0x00000100) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1170 tempOccValue0[0] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1171 } else if (sum == 0x00010000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1172 tempOccValue0[1] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1173 } else if (sum == 0x01000000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1174 tempOccValue0[2] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1175 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1176 tempOccValue0[3] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1177 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1178 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1179 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1180
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1181 occValueMajor[occMajorIndex * 4 + 0] = occValueMajor[(occMajorIndex - 1) * 4 + 0] + tempOccValue0[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1182 occValueMajor[occMajorIndex * 4 + 1] = occValueMajor[(occMajorIndex - 1) * 4 + 1] + tempOccValue0[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1183 occValueMajor[occMajorIndex * 4 + 2] = occValueMajor[(occMajorIndex - 1) * 4 + 2] + tempOccValue0[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1184 occValueMajor[occMajorIndex * 4 + 3] = occValueMajor[(occMajorIndex - 1) * 4 + 3] + tempOccValue0[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1185 tempOccValue0[0] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1186 tempOccValue0[1] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1187 tempOccValue0[2] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1188 tempOccValue0[3] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1189
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1190 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1191
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1192 while (occIndex < (numberOfOccValue-1)/2) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1193 sum = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1194 tempOccValue1[0] = tempOccValue0[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1195 tempOccValue1[1] = tempOccValue0[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1196 tempOccValue1[2] = tempOccValue0[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1197 tempOccValue1[3] = tempOccValue0[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1198 for (j=0; j<wordBetweenOccValue; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1199 c = bwt[bwtIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1200 sum += decodeTable[c >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1201 sum += decodeTable[c & 0x0000FFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1202 bwtIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1203 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1204 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1205 tempOccValue1[0] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1206 tempOccValue1[1] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1207 tempOccValue1[2] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1208 tempOccValue1[3] += sum;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1209 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1210 if (sum == 0x00000100) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1211 tempOccValue1[0] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1212 } else if (sum == 0x00010000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1213 tempOccValue1[1] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1214 } else if (sum == 0x01000000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1215 tempOccValue1[2] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1216 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1217 tempOccValue1[3] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1218 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1219 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1220 occValue[occIndex * 4 + 0] = (tempOccValue0[0] << 16) | tempOccValue1[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1221 occValue[occIndex * 4 + 1] = (tempOccValue0[1] << 16) | tempOccValue1[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1222 occValue[occIndex * 4 + 2] = (tempOccValue0[2] << 16) | tempOccValue1[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1223 occValue[occIndex * 4 + 3] = (tempOccValue0[3] << 16) | tempOccValue1[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1224 tempOccValue0[0] = tempOccValue1[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1225 tempOccValue0[1] = tempOccValue1[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1226 tempOccValue0[2] = tempOccValue1[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1227 tempOccValue0[3] = tempOccValue1[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1228 sum = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1229 occIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1230
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1231 for (j=0; j<wordBetweenOccValue; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1232 c = bwt[bwtIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1233 sum += decodeTable[c >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1234 sum += decodeTable[c & 0x0000FFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1235 bwtIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1236 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1237 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1238 tempOccValue0[0] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1239 tempOccValue0[1] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1240 tempOccValue0[2] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1241 tempOccValue0[3] += sum;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1242 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1243 if (sum == 0x00000100) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1244 tempOccValue0[0] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1245 } else if (sum == 0x00010000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1246 tempOccValue0[1] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1247 } else if (sum == 0x01000000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1248 tempOccValue0[2] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1249 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1250 tempOccValue0[3] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1251 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1252 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1253 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1254
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1255 sum = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1256 tempOccValue1[0] = tempOccValue0[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1257 tempOccValue1[1] = tempOccValue0[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1258 tempOccValue1[2] = tempOccValue0[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1259 tempOccValue1[3] = tempOccValue0[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1260
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1261 if (occIndex * 2 < numberOfOccValue - 1) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1262 for (j=0; j<wordBetweenOccValue; j++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1263 c = bwt[bwtIndex];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1264 sum += decodeTable[c >> 16];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1265 sum += decodeTable[c & 0x0000FFFF];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1266 bwtIndex++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1267 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1268 if (!DNA_OCC_SUM_EXCEPTION(sum)) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1269 tempOccValue1[0] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1270 tempOccValue1[1] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1271 tempOccValue1[2] += (sum & 0x000000FF); sum >>= 8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1272 tempOccValue1[3] += sum;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1273 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1274 if (sum == 0x00000100) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1275 tempOccValue1[0] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1276 } else if (sum == 0x00010000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1277 tempOccValue1[1] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1278 } else if (sum == 0x01000000) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1279 tempOccValue1[2] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1280 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1281 tempOccValue1[3] += 256;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1282 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1283 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1284 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1285
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1286 occValue[occIndex * 4 + 0] = (tempOccValue0[0] << 16) | tempOccValue1[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1287 occValue[occIndex * 4 + 1] = (tempOccValue0[1] << 16) | tempOccValue1[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1288 occValue[occIndex * 4 + 2] = (tempOccValue0[2] << 16) | tempOccValue1[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1289 occValue[occIndex * 4 + 3] = (tempOccValue0[3] << 16) | tempOccValue1[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1290
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1291 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1292
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1293 static void BWTIncConstruct(BWTInc *bwtInc, const bgint_t numChar)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1294 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1295 unsigned int i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1296 bgint_t mergedBwtSizeInWord, mergedOccSizeInWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1297 unsigned int firstCharInThisIteration;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1298
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1299 bgint_t *relativeRank, *seq, *sortedRank;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1300 unsigned int *insertBwt, *mergedBwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1301 bgint_t newInverseSa0RelativeRank, oldInverseSa0RelativeRank, newInverseSa0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1302
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1303 mergedBwtSizeInWord = BWTResidentSizeInWord(bwtInc->bwt->textLength + numChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1304 mergedOccSizeInWord = BWTOccValueMinorSizeInWord(bwtInc->bwt->textLength + numChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1305
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1306 initializeVAL_bg(bwtInc->cumulativeCountInCurrentBuild, ALPHABET_SIZE + 1, 0);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1307
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1308 if (bwtInc->bwt->textLength == 0) { // Initial build
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1309
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1310 // Set address
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1311 seq = (bgint_t*)bwtInc->workingMemory;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1312 relativeRank = seq + bwtInc->buildSize + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1313 // mergedBwt and packedTex may share memory
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1314 mergedBwt = insertBwt = bwtInc->workingMemory + bwtInc->availableWord - mergedBwtSizeInWord; // build in place
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1315
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1316 assert((void*)(relativeRank + bwtInc->buildSize + 1) <= (void*)bwtInc->packedText);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1317 assert((void*)(relativeRank + bwtInc->buildSize + 1) <= (void*)mergedBwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1318
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1319 // ->packedText is not used any more and may be overwritten by mergedBwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1320 BWTIncPutPackedTextToRank(bwtInc->packedText, relativeRank, bwtInc->cumulativeCountInCurrentBuild, numChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1321
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1322 firstCharInThisIteration = relativeRank[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1323 relativeRank[numChar] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1324
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1325 // Sort suffix
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1326 QSufSortSuffixSort((qsint_t*)relativeRank, (qsint_t*)seq, (qsint_t)numChar, (qsint_t)ALPHABET_SIZE - 1, 0, FALSE);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1327 newInverseSa0 = relativeRank[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1328
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1329 // Clear BWT area
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1330 initializeVAL(insertBwt, mergedBwtSizeInWord, 0);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1331
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1332 // Build BWT
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1333 BWTIncBuildPackedBwt(relativeRank, insertBwt, numChar, bwtInc->cumulativeCountInCurrentBuild, bwtInc->packedShift);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1334
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1335 // so that the cumulativeCount is not deducted
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1336 bwtInc->firstCharInLastIteration = ALPHABET_SIZE;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1337
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1338 } else { // Incremental build
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1339 // Set address
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1340 sortedRank = (bgint_t*)bwtInc->workingMemory;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1341 seq = sortedRank + bwtInc->buildSize + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1342 insertBwt = (unsigned*)seq; // insertBwt and seq share memory
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1343 // relativeRank and ->packedText may share memory
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1344 relativeRank = seq + bwtInc->buildSize + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1345
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1346 assert((void*)relativeRank <= (void*)bwtInc->packedText);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1347
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1348 // Store the first character of this iteration
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1349 firstCharInThisIteration = bwtInc->packedText[0] >> (BITS_IN_WORD - BIT_PER_CHAR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1350
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1351 // Count occurrence of input text
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1352 ForwardDNAAllOccCountNoLimit(bwtInc->packedText, numChar, bwtInc->cumulativeCountInCurrentBuild + 1, bwtInc->bwt->decodeTable);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1353 // Add the first character of the previous iteration to represent the inverseSa0 of the previous iteration
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1354 bwtInc->cumulativeCountInCurrentBuild[bwtInc->firstCharInLastIteration + 1]++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1355 bwtInc->cumulativeCountInCurrentBuild[2] += bwtInc->cumulativeCountInCurrentBuild[1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1356 bwtInc->cumulativeCountInCurrentBuild[3] += bwtInc->cumulativeCountInCurrentBuild[2];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1357 bwtInc->cumulativeCountInCurrentBuild[4] += bwtInc->cumulativeCountInCurrentBuild[3];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1358
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1359 // Get rank of new suffix among processed suffix
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1360 // The seq array is built into ALPHABET_SIZE + 2 groups; ALPHABET_SIZE groups + 1 group divided into 2 by inverseSa0 + inverseSa0 as 1 group
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1361 // ->packedText is not used any more and will be overwritten by relativeRank
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1362 oldInverseSa0RelativeRank = BWTIncGetAbsoluteRank(bwtInc->bwt, sortedRank, seq, bwtInc->packedText,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1363 numChar, bwtInc->cumulativeCountInCurrentBuild, bwtInc->firstCharInLastIteration);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1364
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1365 // Sort rank by ALPHABET_SIZE + 2 groups (or ALPHABET_SIZE + 1 groups when inverseSa0 sit on the border of a group)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1366 for (i=0; i<ALPHABET_SIZE; i++) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1367 if (bwtInc->cumulativeCountInCurrentBuild[i] > oldInverseSa0RelativeRank ||
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1368 bwtInc->cumulativeCountInCurrentBuild[i+1] <= oldInverseSa0RelativeRank) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1369 BWTIncSortKey(sortedRank + bwtInc->cumulativeCountInCurrentBuild[i], seq + bwtInc->cumulativeCountInCurrentBuild[i], bwtInc->cumulativeCountInCurrentBuild[i+1] - bwtInc->cumulativeCountInCurrentBuild[i]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1370 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1371 if (bwtInc->cumulativeCountInCurrentBuild[i] < oldInverseSa0RelativeRank) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1372 BWTIncSortKey(sortedRank + bwtInc->cumulativeCountInCurrentBuild[i], seq + bwtInc->cumulativeCountInCurrentBuild[i], oldInverseSa0RelativeRank - bwtInc->cumulativeCountInCurrentBuild[i]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1373 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1374 if (bwtInc->cumulativeCountInCurrentBuild[i+1] > oldInverseSa0RelativeRank + 1) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1375 BWTIncSortKey(sortedRank + oldInverseSa0RelativeRank + 1, seq + oldInverseSa0RelativeRank + 1, bwtInc->cumulativeCountInCurrentBuild[i+1] - oldInverseSa0RelativeRank - 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1376 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1377 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1378 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1379
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1380 // build relative rank; sortedRank is updated for merging to cater for the fact that $ is not encoded in bwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1381 // the cumulative freq information is used to make sure that inverseSa0 and suffix beginning with different characters are kept in different unsorted groups)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1382 BWTIncBuildRelativeRank(sortedRank, seq, relativeRank, numChar, bwtInc->bwt->inverseSa0, bwtInc->cumulativeCountInCurrentBuild);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1383 assert(relativeRank[numChar] == oldInverseSa0RelativeRank);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1384
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1385 // Sort suffix
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1386 QSufSortSuffixSort((qsint_t*)relativeRank, (qsint_t*)seq, (qsint_t)numChar, (qsint_t)numChar, 1, TRUE);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1387
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1388 newInverseSa0RelativeRank = relativeRank[0];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1389 newInverseSa0 = sortedRank[newInverseSa0RelativeRank] + newInverseSa0RelativeRank;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1390
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1391 sortedRank[newInverseSa0RelativeRank] = 0; // a special value so that this is skipped in the merged bwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1392
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1393 // Build BWT; seq is overwritten by insertBwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1394 BWTIncBuildBwt(insertBwt, relativeRank, numChar, bwtInc->cumulativeCountInCurrentBuild);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1395
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1396 // Merge BWT; relativeRank may be overwritten by mergedBwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1397 mergedBwt = bwtInc->workingMemory + bwtInc->availableWord - mergedBwtSizeInWord
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1398 - bwtInc->numberOfIterationDone * OCC_INTERVAL / BIT_PER_CHAR * (sizeof(bgint_t) / 4); // minus numberOfIteration * occInterval to create a buffer for merging
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1399 assert(mergedBwt >= insertBwt + numChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1400 BWTIncMergeBwt(sortedRank, bwtInc->bwt->bwtCode, insertBwt, mergedBwt, bwtInc->bwt->textLength, numChar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1401 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1402
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1403 // Build auxiliary structure and update info and pointers in BWT
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1404 bwtInc->bwt->textLength += numChar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1405 bwtInc->bwt->bwtCode = mergedBwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1406 bwtInc->bwt->bwtSizeInWord = mergedBwtSizeInWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1407 bwtInc->bwt->occSizeInWord = mergedOccSizeInWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1408 assert(mergedBwt >= bwtInc->workingMemory + mergedOccSizeInWord);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1409
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1410 bwtInc->bwt->occValue = mergedBwt - mergedOccSizeInWord;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1411
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1412 BWTClearTrailingBwtCode(bwtInc->bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1413 BWTGenerateOccValueFromBwt(bwtInc->bwt->bwtCode, bwtInc->bwt->occValue, bwtInc->bwt->occValueMajor,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1414 bwtInc->bwt->textLength, bwtInc->bwt->decodeTable);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1415
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1416 bwtInc->bwt->inverseSa0 = newInverseSa0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1417
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1418 bwtInc->bwt->cumulativeFreq[1] += bwtInc->cumulativeCountInCurrentBuild[1] - (bwtInc->firstCharInLastIteration <= 0);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1419 bwtInc->bwt->cumulativeFreq[2] += bwtInc->cumulativeCountInCurrentBuild[2] - (bwtInc->firstCharInLastIteration <= 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1420 bwtInc->bwt->cumulativeFreq[3] += bwtInc->cumulativeCountInCurrentBuild[3] - (bwtInc->firstCharInLastIteration <= 2);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1421 bwtInc->bwt->cumulativeFreq[4] += bwtInc->cumulativeCountInCurrentBuild[4] - (bwtInc->firstCharInLastIteration <= 3);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1422
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1423 bwtInc->firstCharInLastIteration = firstCharInThisIteration;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1424
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1425 // Set build size and text address for the next build
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1426 BWTIncSetBuildSizeAndTextAddr(bwtInc);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1427 bwtInc->numberOfIterationDone++;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1428
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1429 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1430
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1431 BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxBuildSize, bgint_t incMaxBuildSize)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1432 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1433
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1434 FILE *packedFile;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1435 bgint_t packedFileLen;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1436 bgint_t totalTextLength;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1437 bgint_t textToLoad, textSizeInByte;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1438 bgint_t processedTextLength;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1439 unsigned char lastByteLength;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1440
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1441 BWTInc *bwtInc;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1442
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1443 packedFile = (FILE*)fopen(inputFileName, "rb");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1444
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1445 if (packedFile == NULL) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1446 fprintf(stderr, "BWTIncConstructFromPacked() : Cannot open inputFileName!\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1447 exit(1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1448 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1449
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1450 fseek(packedFile, -1, SEEK_END);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1451 packedFileLen = ftell(packedFile);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1452 fread(&lastByteLength, sizeof(unsigned char), 1, packedFile);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1453 totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1454
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1455 bwtInc = BWTIncCreate(totalTextLength, initialMaxBuildSize, incMaxBuildSize);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1456
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1457 BWTIncSetBuildSizeAndTextAddr(bwtInc);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1458
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1459 if (bwtInc->buildSize > totalTextLength) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1460 textToLoad = totalTextLength;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1461 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1462 textToLoad = totalTextLength - ((totalTextLength - bwtInc->buildSize + CHAR_PER_WORD - 1) / CHAR_PER_WORD * CHAR_PER_WORD);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1463 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1464 textSizeInByte = textToLoad / CHAR_PER_BYTE; // excluded the odd byte
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1465
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1466 fseek(packedFile, -2, SEEK_CUR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1467 fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1468 fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1469 fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1470
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1471 ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1472 BWTIncConstruct(bwtInc, textToLoad);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1473
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1474 processedTextLength = textToLoad;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1475
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1476 while (processedTextLength < totalTextLength) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1477 textToLoad = bwtInc->buildSize / CHAR_PER_WORD * CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1478 if (textToLoad > totalTextLength - processedTextLength) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1479 textToLoad = totalTextLength - processedTextLength;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1480 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1481 textSizeInByte = textToLoad / CHAR_PER_BYTE;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1482 fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1483 fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1484 fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1485 ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1486 BWTIncConstruct(bwtInc, textToLoad);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1487 processedTextLength += textToLoad;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1488 if (bwtInc->numberOfIterationDone % 10 == 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1489 fprintf(stderr, "[BWTIncConstructFromPacked] %lu iterations done. %lu characters processed.\n",
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1490 (long)bwtInc->numberOfIterationDone, (long)processedTextLength);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1491 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1492 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1493 return bwtInc;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1494 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1495
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1496 void BWTFree(BWT *bwt)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1497 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1498 if (bwt == 0) return;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1499 free(bwt->cumulativeFreq);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1500 free(bwt->bwtCode);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1501 free(bwt->occValue);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1502 free(bwt->occValueMajor);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1503 free(bwt->decodeTable);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1504 free(bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1505 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1506
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1507 void BWTIncFree(BWTInc *bwtInc)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1508 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1509 if (bwtInc == 0) return;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1510 free(bwtInc->bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1511 free(bwtInc->workingMemory);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1512 free(bwtInc);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1513 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1514
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1515 static bgint_t BWTFileSizeInWord(const bgint_t numChar)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1516 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1517 // The $ in BWT at the position of inverseSa0 is not encoded
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1518 return (numChar + CHAR_PER_WORD - 1) / CHAR_PER_WORD;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1519 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1520
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1521 void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *occValueFileName)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1522 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1523 FILE *bwtFile;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1524 /* FILE *occValueFile; */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1525 bgint_t bwtLength;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1526
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1527 bwtFile = (FILE*)fopen(bwtFileName, "wb");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1528 if (bwtFile == NULL) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1529 fprintf(stderr, "BWTSaveBwtCodeAndOcc(): Cannot open BWT code file!\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1530 exit(1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1531 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1532
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1533 fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1534 fwrite(bwt->cumulativeFreq + 1, sizeof(bgint_t), ALPHABET_SIZE, bwtFile);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1535 bwtLength = BWTFileSizeInWord(bwt->textLength);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1536 fwrite(bwt->bwtCode, sizeof(unsigned int), bwtLength, bwtFile);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1537 fclose(bwtFile);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1538 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1539
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1540 void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1541 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1542 BWTInc *bwtInc;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1543 bwtInc = BWTIncConstructFromPacked(fn_pac, 10000000, 10000000);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1544 printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1545 BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1546 BWTIncFree(bwtInc);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1547 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1548
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1549 int bwt_bwtgen_main(int argc, char *argv[])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1550 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1551 if (argc < 3) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1552 fprintf(stderr, "Usage: bwtgen <in.pac> <out.bwt>\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1553 return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1554 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1555 bwt_bwtgen(argv[1], argv[2]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1556 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1557 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1558
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1559 #ifdef MAIN_BWT_GEN
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1560
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1561 int main(int argc, char *argv[])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1562 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1563 return bwt_bwtgen_main(argc, argv);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1564 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1565
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1566 #endif