annotate bwa-0.6.2/bwtmisc.c @ 0:dd1186b11b3b draft

Uploaded BWA
author ashvark
date Fri, 18 Jul 2014 07:55:14 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1 /* The MIT License
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
2
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
3 Copyright (c) 2008 Genome Research Ltd (GRL).
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
4
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
5 Permission is hereby granted, free of charge, to any person obtaining
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
6 a copy of this software and associated documentation files (the
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
7 "Software"), to deal in the Software without restriction, including
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
8 without limitation the rights to use, copy, modify, merge, publish,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
9 distribute, sublicense, and/or sell copies of the Software, and to
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
10 permit persons to whom the Software is furnished to do so, subject to
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
11 the following conditions:
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
12
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
13 The above copyright notice and this permission notice shall be
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
14 included in all copies or substantial portions of the Software.
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
15
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
23 SOFTWARE.
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
24 */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
25
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
27
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
28 #include <stdlib.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
29 #include <stdio.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
30 #include <string.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
31 #include <unistd.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
32 #include "bntseq.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
33 #include "utils.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
34 #include "main.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
35 #include "bwt.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
36
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
37 #ifdef _DIVBWT
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
38 #include "divsufsort.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
39 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
40
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
41 int is_bwt(ubyte_t *T, int n);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
42
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
43 int64_t bwa_seq_len(const char *fn_pac)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
44 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
45 FILE *fp;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
46 int64_t pac_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
47 ubyte_t c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
48 fp = xopen(fn_pac, "rb");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
49 fseek(fp, -1, SEEK_END);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
50 pac_len = ftell(fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
51 fread(&c, 1, 1, fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
52 fclose(fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
53 return (pac_len - 1) * 4 + (int)c;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
54 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
55
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
56 bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
57 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
58 bwt_t *bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
59 ubyte_t *buf, *buf2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
60 int i, pac_size;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
61 FILE *fp;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
62
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
63 // initialization
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
64 bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
65 bwt->seq_len = bwa_seq_len(fn_pac);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
66 bwt->bwt_size = (bwt->seq_len + 15) >> 4;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
67 fp = xopen(fn_pac, "rb");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
68
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
69 // prepare sequence
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
70 pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
71 buf2 = (ubyte_t*)calloc(pac_size, 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
72 fread(buf2, 1, pac_size, fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
73 fclose(fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
74 memset(bwt->L2, 0, 5 * 4);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
75 buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
76 for (i = 0; i < bwt->seq_len; ++i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
77 buf[i] = buf2[i>>2] >> ((3 - (i&3)) << 1) & 3;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
78 ++bwt->L2[1+buf[i]];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
79 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
80 for (i = 2; i <= 4; ++i) bwt->L2[i] += bwt->L2[i-1];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
81 free(buf2);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
82
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
83 // Burrows-Wheeler Transform
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
84 if (use_is) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
85 bwt->primary = is_bwt(buf, bwt->seq_len);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
86 } else {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
87 #ifdef _DIVBWT
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
88 bwt->primary = divbwt(buf, buf, 0, bwt->seq_len);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
89 #else
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
90 err_fatal_simple("libdivsufsort is not compiled in.");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
91 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
92 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
93 bwt->bwt = (u_int32_t*)calloc(bwt->bwt_size, 4);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
94 for (i = 0; i < bwt->seq_len; ++i)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
95 bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
96 free(buf);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
97 return bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
98 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
99
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
100 int bwa_pac2bwt(int argc, char *argv[])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
101 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
102 bwt_t *bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
103 int c, use_is = 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
104 while ((c = getopt(argc, argv, "d")) >= 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
105 switch (c) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
106 case 'd': use_is = 0; break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
107 default: return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
108 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
109 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
110 if (optind + 2 > argc) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
111 fprintf(stderr, "Usage: bwa pac2bwt [-d] <in.pac> <out.bwt>\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
112 return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
113 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
114 bwt = bwt_pac2bwt(argv[optind], use_is);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
115 bwt_dump_bwt(argv[optind+1], bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
116 bwt_destroy(bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
117 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
118 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
119
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
120 #define bwt_B00(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
121
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
122 void bwt_bwtupdate_core(bwt_t *bwt)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
123 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
124 bwtint_t i, k, c[4], n_occ;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
125 uint32_t *buf;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
126
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
127 n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
128 bwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
129 buf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
130 c[0] = c[1] = c[2] = c[3] = 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
131 for (i = k = 0; i < bwt->seq_len; ++i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
132 if (i % OCC_INTERVAL == 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
133 memcpy(buf + k, c, sizeof(bwtint_t) * 4);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
134 k += sizeof(bwtint_t); // in fact: sizeof(bwtint_t)=4*(sizeof(bwtint_t)/4)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
135 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
136 if (i % 16 == 0) buf[k++] = bwt->bwt[i/16]; // 16 == sizeof(uint32_t)/2
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
137 ++c[bwt_B00(bwt, i)];
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
138 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
139 // the last element
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
140 memcpy(buf + k, c, sizeof(bwtint_t) * 4);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
141 xassert(k + sizeof(bwtint_t) == bwt->bwt_size, "inconsistent bwt_size");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
142 // update bwt
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
143 free(bwt->bwt); bwt->bwt = buf;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
144 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
145
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
146 int bwa_bwtupdate(int argc, char *argv[])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
147 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
148 bwt_t *bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
149 if (argc < 2) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
150 fprintf(stderr, "Usage: bwa bwtupdate <the.bwt>\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
151 return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
152 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
153 bwt = bwt_restore_bwt(argv[1]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
154 bwt_bwtupdate_core(bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
155 bwt_dump_bwt(argv[1], bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
156 bwt_destroy(bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
157 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
158 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
159
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
160 const int nst_color_space_table[] = { 4, 0, 0, 1, 0, 2, 3, 4, 0, 3, 2, 4, 1, 4, 4, 4};
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
161
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
162 /* this function is not memory efficient, but this will make life easier
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
163 Ideally we should also change .amb files as one 'N' in the nucleotide
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
164 sequence leads to two ambiguous colors. I may do this later... */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
165 uint8_t *bwa_pac2cspac_core(const bntseq_t *bns)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
166 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
167 uint8_t *pac, *cspac;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
168 bwtint_t i;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
169 int c1, c2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
170 pac = (uint8_t*)calloc(bns->l_pac/4 + 1, 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
171 cspac = (uint8_t*)calloc(bns->l_pac/4 + 1, 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
172 fread(pac, 1, bns->l_pac/4+1, bns->fp_pac);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
173 rewind(bns->fp_pac);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
174 c1 = pac[0]>>6; cspac[0] = c1<<6;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
175 for (i = 1; i < bns->l_pac; ++i) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
176 c2 = pac[i>>2] >> (~i&3)*2 & 3;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
177 cspac[i>>2] |= nst_color_space_table[(1<<c1)|(1<<c2)] << (~i&3)*2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
178 c1 = c2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
179 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
180 free(pac);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
181 return cspac;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
182 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
183
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
184 int bwa_pac2cspac(int argc, char *argv[])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
185 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
186 bntseq_t *bns;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
187 uint8_t *cspac, ct;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
188 char *str;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
189 FILE *fp;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
190
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
191 if (argc < 3) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
192 fprintf(stderr, "Usage: bwa pac2cspac <in.nt.prefix> <out.cs.prefix>\n");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
193 return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
194 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
195 bns = bns_restore(argv[1]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
196 cspac = bwa_pac2cspac_core(bns);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
197 bns_dump(bns, argv[2]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
198 // now write cspac
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
199 str = (char*)calloc(strlen(argv[2]) + 5, 1);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
200 strcat(strcpy(str, argv[2]), ".pac");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
201 fp = xopen(str, "wb");
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
202 fwrite(cspac, 1, bns->l_pac/4 + 1, fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
203 ct = bns->l_pac % 4;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
204 fwrite(&ct, 1, 1, fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
205 fclose(fp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
206 bns_destroy(bns);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
207 free(cspac);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
208 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
209 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
210
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
211 int bwa_bwt2sa(int argc, char *argv[])
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
212 {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
213 bwt_t *bwt;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
214 int c, sa_intv = 32;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
215 while ((c = getopt(argc, argv, "i:")) >= 0) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
216 switch (c) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
217 case 'i': sa_intv = atoi(optarg); break;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
218 default: return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
219 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
220 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
221 if (optind + 2 > argc) {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
222 fprintf(stderr, "Usage: bwa bwt2sa [-i %d] <in.bwt> <out.sa>\n", sa_intv);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
223 return 1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
224 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
225 bwt = bwt_restore_bwt(argv[optind]);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
226 bwt_cal_sa(bwt, sa_intv);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
227 bwt_dump_sa(argv[optind+1], bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
228 bwt_destroy(bwt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
229 return 0;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
230 }