0
|
1 /* The MIT License
|
|
2
|
|
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
|
|
4
|
|
5 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6 of this software and associated documentation files (the "Software"), to deal
|
|
7 in the Software without restriction, including without limitation the rights
|
|
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9 copies of the Software, and to permit persons to whom the Software is
|
|
10 furnished to do so, subject to the following conditions:
|
|
11
|
|
12 The above copyright notice and this permission notice shall be included in
|
|
13 all copies or substantial portions of the Software.
|
|
14
|
|
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21 THE SOFTWARE.
|
|
22 */
|
|
23
|
|
24 /*
|
|
25 2009-06-29 by lh3: cache recent uncompressed blocks.
|
|
26 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.
|
|
27 2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */
|
|
28
|
|
29 #include <stdio.h>
|
|
30 #include <stdlib.h>
|
|
31 #include <string.h>
|
|
32 #include <unistd.h>
|
|
33 #include <fcntl.h>
|
|
34 #include <sys/types.h>
|
|
35 #include <sys/stat.h>
|
|
36 #include "bgzf.h"
|
|
37
|
|
38 #include "khash.h"
|
|
39 typedef struct {
|
|
40 int size;
|
|
41 uint8_t *block;
|
|
42 int64_t end_offset;
|
|
43 } cache_t;
|
|
44 KHASH_MAP_INIT_INT64(cache, cache_t)
|
|
45
|
|
46 #if defined(_WIN32) || defined(_MSC_VER)
|
|
47 #define ftello(fp) ftell(fp)
|
|
48 #define fseeko(fp, offset, whence) fseek(fp, offset, whence)
|
|
49 #else
|
|
50 extern off_t ftello(FILE *stream);
|
|
51 extern int fseeko(FILE *stream, off_t offset, int whence);
|
|
52 #endif
|
|
53
|
|
54 typedef int8_t bgzf_byte_t;
|
|
55
|
|
56 static const int DEFAULT_BLOCK_SIZE = 64 * 1024;
|
|
57 static const int MAX_BLOCK_SIZE = 64 * 1024;
|
|
58
|
|
59 static const int BLOCK_HEADER_LENGTH = 18;
|
|
60 static const int BLOCK_FOOTER_LENGTH = 8;
|
|
61
|
|
62 static const int GZIP_ID1 = 31;
|
|
63 static const int GZIP_ID2 = 139;
|
|
64 static const int CM_DEFLATE = 8;
|
|
65 static const int FLG_FEXTRA = 4;
|
|
66 static const int OS_UNKNOWN = 255;
|
|
67 static const int BGZF_ID1 = 66; // 'B'
|
|
68 static const int BGZF_ID2 = 67; // 'C'
|
|
69 static const int BGZF_LEN = 2;
|
|
70 static const int BGZF_XLEN = 6; // BGZF_LEN+4
|
|
71
|
|
72 static const int GZIP_WINDOW_BITS = -15; // no zlib header
|
|
73 static const int Z_DEFAULT_MEM_LEVEL = 8;
|
|
74
|
|
75
|
|
76 inline
|
|
77 void
|
|
78 packInt16(uint8_t* buffer, uint16_t value)
|
|
79 {
|
|
80 buffer[0] = value;
|
|
81 buffer[1] = value >> 8;
|
|
82 }
|
|
83
|
|
84 inline
|
|
85 int
|
|
86 unpackInt16(const uint8_t* buffer)
|
|
87 {
|
|
88 return (buffer[0] | (buffer[1] << 8));
|
|
89 }
|
|
90
|
|
91 inline
|
|
92 void
|
|
93 packInt32(uint8_t* buffer, uint32_t value)
|
|
94 {
|
|
95 buffer[0] = value;
|
|
96 buffer[1] = value >> 8;
|
|
97 buffer[2] = value >> 16;
|
|
98 buffer[3] = value >> 24;
|
|
99 }
|
|
100
|
|
101 static inline
|
|
102 int
|
|
103 bgzf_min(int x, int y)
|
|
104 {
|
|
105 return (x < y) ? x : y;
|
|
106 }
|
|
107
|
|
108 static
|
|
109 void
|
|
110 report_error(BGZF* fp, const char* message) {
|
|
111 fp->error = message;
|
|
112 }
|
|
113
|
|
114 int bgzf_check_bgzf(const char *fn)
|
|
115 {
|
|
116 BGZF *fp;
|
|
117 uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377";
|
|
118 int n;
|
|
119
|
|
120 if ((fp = bgzf_open(fn, "r")) == 0)
|
|
121 {
|
|
122 fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn);
|
|
123 return -1;
|
|
124 }
|
|
125
|
|
126 #ifdef _USE_KNETFILE
|
|
127 n = knet_read(fp->x.fpr, buf, 10);
|
|
128 #else
|
|
129 n = fread(buf, 1, 10, fp->file);
|
|
130 #endif
|
|
131 bgzf_close(fp);
|
|
132
|
|
133 if ( n!=10 )
|
|
134 return -1;
|
|
135
|
|
136 if ( !memcmp(magic, buf, 10) ) return 1;
|
|
137 return 0;
|
|
138 }
|
|
139
|
|
140 static BGZF *bgzf_read_init()
|
|
141 {
|
|
142 BGZF *fp;
|
|
143 fp = calloc(1, sizeof(BGZF));
|
|
144 fp->uncompressed_block_size = MAX_BLOCK_SIZE;
|
|
145 fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
|
|
146 fp->compressed_block_size = MAX_BLOCK_SIZE;
|
|
147 fp->compressed_block = malloc(MAX_BLOCK_SIZE);
|
|
148 fp->cache_size = 0;
|
|
149 fp->cache = kh_init(cache);
|
|
150 return fp;
|
|
151 }
|
|
152
|
|
153 static
|
|
154 BGZF*
|
|
155 open_read(int fd)
|
|
156 {
|
|
157 #ifdef _USE_KNETFILE
|
|
158 knetFile *file = knet_dopen(fd, "r");
|
|
159 #else
|
|
160 FILE* file = fdopen(fd, "r");
|
|
161 #endif
|
|
162 BGZF* fp;
|
|
163 if (file == 0) return 0;
|
|
164 fp = bgzf_read_init();
|
|
165 fp->file_descriptor = fd;
|
|
166 fp->open_mode = 'r';
|
|
167 #ifdef _USE_KNETFILE
|
|
168 fp->x.fpr = file;
|
|
169 #else
|
|
170 fp->file = file;
|
|
171 #endif
|
|
172 return fp;
|
|
173 }
|
|
174
|
|
175 static
|
|
176 BGZF*
|
|
177 open_write(int fd, int compress_level) // compress_level==-1 for the default level
|
|
178 {
|
|
179 FILE* file = fdopen(fd, "w");
|
|
180 BGZF* fp;
|
|
181 if (file == 0) return 0;
|
|
182 fp = malloc(sizeof(BGZF));
|
|
183 fp->file_descriptor = fd;
|
|
184 fp->open_mode = 'w';
|
|
185 fp->owned_file = 0;
|
|
186 fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
|
|
187 if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
|
|
188 #ifdef _USE_KNETFILE
|
|
189 fp->x.fpw = file;
|
|
190 #else
|
|
191 fp->file = file;
|
|
192 #endif
|
|
193 fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
|
|
194 fp->uncompressed_block = NULL;
|
|
195 fp->compressed_block_size = MAX_BLOCK_SIZE;
|
|
196 fp->compressed_block = malloc(MAX_BLOCK_SIZE);
|
|
197 fp->block_address = 0;
|
|
198 fp->block_offset = 0;
|
|
199 fp->block_length = 0;
|
|
200 fp->error = NULL;
|
|
201 return fp;
|
|
202 }
|
|
203
|
|
204 BGZF*
|
|
205 bgzf_open(const char* __restrict path, const char* __restrict mode)
|
|
206 {
|
|
207 BGZF* fp = NULL;
|
|
208 if (strchr(mode, 'r') || strchr(mode, 'R')) { /* The reading mode is preferred. */
|
|
209 #ifdef _USE_KNETFILE
|
|
210 knetFile *file = knet_open(path, mode);
|
|
211 if (file == 0) return 0;
|
|
212 fp = bgzf_read_init();
|
|
213 fp->file_descriptor = -1;
|
|
214 fp->open_mode = 'r';
|
|
215 fp->x.fpr = file;
|
|
216 #else
|
|
217 int fd, oflag = O_RDONLY;
|
|
218 #ifdef _WIN32
|
|
219 oflag |= O_BINARY;
|
|
220 #endif
|
|
221 fd = open(path, oflag);
|
|
222 if (fd == -1) return 0;
|
|
223 fp = open_read(fd);
|
|
224 #endif
|
|
225 } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
|
|
226 int fd, compress_level = -1, oflag = O_WRONLY | O_CREAT | O_TRUNC;
|
|
227 #ifdef _WIN32
|
|
228 oflag |= O_BINARY;
|
|
229 #endif
|
|
230 fd = open(path, oflag, 0666);
|
|
231 if (fd == -1) return 0;
|
|
232 { // set compress_level
|
|
233 int i;
|
|
234 for (i = 0; mode[i]; ++i)
|
|
235 if (mode[i] >= '0' && mode[i] <= '9') break;
|
|
236 if (mode[i]) compress_level = (int)mode[i] - '0';
|
|
237 if (strchr(mode, 'u')) compress_level = 0;
|
|
238 }
|
|
239 fp = open_write(fd, compress_level);
|
|
240 }
|
|
241 if (fp != NULL) fp->owned_file = 1;
|
|
242 return fp;
|
|
243 }
|
|
244
|
|
245 BGZF*
|
|
246 bgzf_fdopen(int fd, const char * __restrict mode)
|
|
247 {
|
|
248 if (fd == -1) return 0;
|
|
249 if (mode[0] == 'r' || mode[0] == 'R') {
|
|
250 return open_read(fd);
|
|
251 } else if (mode[0] == 'w' || mode[0] == 'W') {
|
|
252 int i, compress_level = -1;
|
|
253 for (i = 0; mode[i]; ++i)
|
|
254 if (mode[i] >= '0' && mode[i] <= '9') break;
|
|
255 if (mode[i]) compress_level = (int)mode[i] - '0';
|
|
256 if (strchr(mode, 'u')) compress_level = 0;
|
|
257 return open_write(fd, compress_level);
|
|
258 } else {
|
|
259 return NULL;
|
|
260 }
|
|
261 }
|
|
262
|
|
263 static
|
|
264 int
|
|
265 deflate_block(BGZF* fp, int block_length)
|
|
266 {
|
|
267 // Deflate the block in fp->uncompressed_block into fp->compressed_block.
|
|
268 // Also adds an extra field that stores the compressed block length.
|
|
269
|
|
270 bgzf_byte_t* buffer = fp->compressed_block;
|
|
271 int buffer_size = fp->compressed_block_size;
|
|
272
|
|
273 // Init gzip header
|
|
274 buffer[0] = GZIP_ID1;
|
|
275 buffer[1] = GZIP_ID2;
|
|
276 buffer[2] = CM_DEFLATE;
|
|
277 buffer[3] = FLG_FEXTRA;
|
|
278 buffer[4] = 0; // mtime
|
|
279 buffer[5] = 0;
|
|
280 buffer[6] = 0;
|
|
281 buffer[7] = 0;
|
|
282 buffer[8] = 0;
|
|
283 buffer[9] = OS_UNKNOWN;
|
|
284 buffer[10] = BGZF_XLEN;
|
|
285 buffer[11] = 0;
|
|
286 buffer[12] = BGZF_ID1;
|
|
287 buffer[13] = BGZF_ID2;
|
|
288 buffer[14] = BGZF_LEN;
|
|
289 buffer[15] = 0;
|
|
290 buffer[16] = 0; // placeholder for block length
|
|
291 buffer[17] = 0;
|
|
292
|
|
293 // loop to retry for blocks that do not compress enough
|
|
294 int input_length = block_length;
|
|
295 int compressed_length = 0;
|
|
296 while (1) {
|
|
297 z_stream zs;
|
|
298 zs.zalloc = NULL;
|
|
299 zs.zfree = NULL;
|
|
300 zs.next_in = fp->uncompressed_block;
|
|
301 zs.avail_in = input_length;
|
|
302 zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
|
|
303 zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
|
|
304
|
|
305 int status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED,
|
|
306 GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
|
|
307 if (status != Z_OK) {
|
|
308 report_error(fp, "deflate init failed");
|
|
309 return -1;
|
|
310 }
|
|
311 status = deflate(&zs, Z_FINISH);
|
|
312 if (status != Z_STREAM_END) {
|
|
313 deflateEnd(&zs);
|
|
314 if (status == Z_OK) {
|
|
315 // Not enough space in buffer.
|
|
316 // Can happen in the rare case the input doesn't compress enough.
|
|
317 // Reduce the amount of input until it fits.
|
|
318 input_length -= 1024;
|
|
319 if (input_length <= 0) {
|
|
320 // should never happen
|
|
321 report_error(fp, "input reduction failed");
|
|
322 return -1;
|
|
323 }
|
|
324 continue;
|
|
325 }
|
|
326 report_error(fp, "deflate failed");
|
|
327 return -1;
|
|
328 }
|
|
329 status = deflateEnd(&zs);
|
|
330 if (status != Z_OK) {
|
|
331 report_error(fp, "deflate end failed");
|
|
332 return -1;
|
|
333 }
|
|
334 compressed_length = zs.total_out;
|
|
335 compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
|
|
336 if (compressed_length > MAX_BLOCK_SIZE) {
|
|
337 // should never happen
|
|
338 report_error(fp, "deflate overflow");
|
|
339 return -1;
|
|
340 }
|
|
341 break;
|
|
342 }
|
|
343
|
|
344 packInt16((uint8_t*)&buffer[16], compressed_length-1);
|
|
345 uint32_t crc = crc32(0L, NULL, 0L);
|
|
346 crc = crc32(crc, fp->uncompressed_block, input_length);
|
|
347 packInt32((uint8_t*)&buffer[compressed_length-8], crc);
|
|
348 packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
|
|
349
|
|
350 int remaining = block_length - input_length;
|
|
351 if (remaining > 0) {
|
|
352 if (remaining > input_length) {
|
|
353 // should never happen (check so we can use memcpy)
|
|
354 report_error(fp, "remainder too large");
|
|
355 return -1;
|
|
356 }
|
|
357 memcpy(fp->uncompressed_block,
|
|
358 fp->uncompressed_block + input_length,
|
|
359 remaining);
|
|
360 }
|
|
361 fp->block_offset = remaining;
|
|
362 return compressed_length;
|
|
363 }
|
|
364
|
|
365 static
|
|
366 int
|
|
367 inflate_block(BGZF* fp, int block_length)
|
|
368 {
|
|
369 // Inflate the block in fp->compressed_block into fp->uncompressed_block
|
|
370
|
|
371 z_stream zs;
|
|
372 int status;
|
|
373 zs.zalloc = NULL;
|
|
374 zs.zfree = NULL;
|
|
375 zs.next_in = fp->compressed_block + 18;
|
|
376 zs.avail_in = block_length - 16;
|
|
377 zs.next_out = fp->uncompressed_block;
|
|
378 zs.avail_out = fp->uncompressed_block_size;
|
|
379
|
|
380 status = inflateInit2(&zs, GZIP_WINDOW_BITS);
|
|
381 if (status != Z_OK) {
|
|
382 report_error(fp, "inflate init failed");
|
|
383 return -1;
|
|
384 }
|
|
385 status = inflate(&zs, Z_FINISH);
|
|
386 if (status != Z_STREAM_END) {
|
|
387 inflateEnd(&zs);
|
|
388 report_error(fp, "inflate failed");
|
|
389 return -1;
|
|
390 }
|
|
391 status = inflateEnd(&zs);
|
|
392 if (status != Z_OK) {
|
|
393 report_error(fp, "inflate failed");
|
|
394 return -1;
|
|
395 }
|
|
396 return zs.total_out;
|
|
397 }
|
|
398
|
|
399 static
|
|
400 int
|
|
401 check_header(const bgzf_byte_t* header)
|
|
402 {
|
|
403 return (header[0] == GZIP_ID1 &&
|
|
404 header[1] == (bgzf_byte_t) GZIP_ID2 &&
|
|
405 header[2] == Z_DEFLATED &&
|
|
406 (header[3] & FLG_FEXTRA) != 0 &&
|
|
407 unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN &&
|
|
408 header[12] == BGZF_ID1 &&
|
|
409 header[13] == BGZF_ID2 &&
|
|
410 unpackInt16((uint8_t*)&header[14]) == BGZF_LEN);
|
|
411 }
|
|
412
|
|
413 static void free_cache(BGZF *fp)
|
|
414 {
|
|
415 khint_t k;
|
|
416 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
|
|
417 if (fp->open_mode != 'r') return;
|
|
418 for (k = kh_begin(h); k < kh_end(h); ++k)
|
|
419 if (kh_exist(h, k)) free(kh_val(h, k).block);
|
|
420 kh_destroy(cache, h);
|
|
421 }
|
|
422
|
|
423 static int load_block_from_cache(BGZF *fp, int64_t block_address)
|
|
424 {
|
|
425 khint_t k;
|
|
426 cache_t *p;
|
|
427 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
|
|
428 k = kh_get(cache, h, block_address);
|
|
429 if (k == kh_end(h)) return 0;
|
|
430 p = &kh_val(h, k);
|
|
431 if (fp->block_length != 0) fp->block_offset = 0;
|
|
432 fp->block_address = block_address;
|
|
433 fp->block_length = p->size;
|
|
434 memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE);
|
|
435 #ifdef _USE_KNETFILE
|
|
436 knet_seek(fp->x.fpr, p->end_offset, SEEK_SET);
|
|
437 #else
|
|
438 fseeko(fp->file, p->end_offset, SEEK_SET);
|
|
439 #endif
|
|
440 return p->size;
|
|
441 }
|
|
442
|
|
443 static void cache_block(BGZF *fp, int size)
|
|
444 {
|
|
445 int ret;
|
|
446 khint_t k;
|
|
447 cache_t *p;
|
|
448 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
|
|
449 if (MAX_BLOCK_SIZE >= fp->cache_size) return;
|
|
450 if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) {
|
|
451 /* A better way would be to remove the oldest block in the
|
|
452 * cache, but here we remove a random one for simplicity. This
|
|
453 * should not have a big impact on performance. */
|
|
454 for (k = kh_begin(h); k < kh_end(h); ++k)
|
|
455 if (kh_exist(h, k)) break;
|
|
456 if (k < kh_end(h)) {
|
|
457 free(kh_val(h, k).block);
|
|
458 kh_del(cache, h, k);
|
|
459 }
|
|
460 }
|
|
461 k = kh_put(cache, h, fp->block_address, &ret);
|
|
462 if (ret == 0) return; // if this happens, a bug!
|
|
463 p = &kh_val(h, k);
|
|
464 p->size = fp->block_length;
|
|
465 p->end_offset = fp->block_address + size;
|
|
466 p->block = malloc(MAX_BLOCK_SIZE);
|
|
467 memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE);
|
|
468 }
|
|
469
|
|
470 int
|
|
471 bgzf_read_block(BGZF* fp)
|
|
472 {
|
|
473 bgzf_byte_t header[BLOCK_HEADER_LENGTH];
|
|
474 int count, size = 0, block_length, remaining;
|
|
475 #ifdef _USE_KNETFILE
|
|
476 int64_t block_address = knet_tell(fp->x.fpr);
|
|
477 if (load_block_from_cache(fp, block_address)) return 0;
|
|
478 count = knet_read(fp->x.fpr, header, sizeof(header));
|
|
479 #else
|
|
480 int64_t block_address = ftello(fp->file);
|
|
481 if (load_block_from_cache(fp, block_address)) return 0;
|
|
482 count = fread(header, 1, sizeof(header), fp->file);
|
|
483 #endif
|
|
484 if (count == 0) {
|
|
485 fp->block_length = 0;
|
|
486 return 0;
|
|
487 }
|
|
488 size = count;
|
|
489 if (count != sizeof(header)) {
|
|
490 report_error(fp, "read failed");
|
|
491 return -1;
|
|
492 }
|
|
493 if (!check_header(header)) {
|
|
494 report_error(fp, "invalid block header");
|
|
495 return -1;
|
|
496 }
|
|
497 block_length = unpackInt16((uint8_t*)&header[16]) + 1;
|
|
498 bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
|
|
499 memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
|
|
500 remaining = block_length - BLOCK_HEADER_LENGTH;
|
|
501 #ifdef _USE_KNETFILE
|
|
502 count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
|
|
503 #else
|
|
504 count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
|
|
505 #endif
|
|
506 if (count != remaining) {
|
|
507 report_error(fp, "read failed");
|
|
508 return -1;
|
|
509 }
|
|
510 size += count;
|
|
511 count = inflate_block(fp, block_length);
|
|
512 if (count < 0) return -1;
|
|
513 if (fp->block_length != 0) {
|
|
514 // Do not reset offset if this read follows a seek.
|
|
515 fp->block_offset = 0;
|
|
516 }
|
|
517 fp->block_address = block_address;
|
|
518 fp->block_length = count;
|
|
519 cache_block(fp, size);
|
|
520 return 0;
|
|
521 }
|
|
522
|
|
523 int
|
|
524 bgzf_read(BGZF* fp, void* data, int length)
|
|
525 {
|
|
526 if (length <= 0) {
|
|
527 return 0;
|
|
528 }
|
|
529 if (fp->open_mode != 'r') {
|
|
530 report_error(fp, "file not open for reading");
|
|
531 return -1;
|
|
532 }
|
|
533
|
|
534 int bytes_read = 0;
|
|
535 bgzf_byte_t* output = data;
|
|
536 while (bytes_read < length) {
|
|
537 int copy_length, available = fp->block_length - fp->block_offset;
|
|
538 bgzf_byte_t *buffer;
|
|
539 if (available <= 0) {
|
|
540 if (bgzf_read_block(fp) != 0) {
|
|
541 return -1;
|
|
542 }
|
|
543 available = fp->block_length - fp->block_offset;
|
|
544 if (available <= 0) {
|
|
545 break;
|
|
546 }
|
|
547 }
|
|
548 copy_length = bgzf_min(length-bytes_read, available);
|
|
549 buffer = fp->uncompressed_block;
|
|
550 memcpy(output, buffer + fp->block_offset, copy_length);
|
|
551 fp->block_offset += copy_length;
|
|
552 output += copy_length;
|
|
553 bytes_read += copy_length;
|
|
554 }
|
|
555 if (fp->block_offset == fp->block_length) {
|
|
556 #ifdef _USE_KNETFILE
|
|
557 fp->block_address = knet_tell(fp->x.fpr);
|
|
558 #else
|
|
559 fp->block_address = ftello(fp->file);
|
|
560 #endif
|
|
561 fp->block_offset = 0;
|
|
562 fp->block_length = 0;
|
|
563 }
|
|
564 return bytes_read;
|
|
565 }
|
|
566
|
|
567 int bgzf_flush(BGZF* fp)
|
|
568 {
|
|
569 while (fp->block_offset > 0) {
|
|
570 int count, block_length;
|
|
571 block_length = deflate_block(fp, fp->block_offset);
|
|
572 if (block_length < 0) return -1;
|
|
573 #ifdef _USE_KNETFILE
|
|
574 count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
|
|
575 #else
|
|
576 count = fwrite(fp->compressed_block, 1, block_length, fp->file);
|
|
577 #endif
|
|
578 if (count != block_length) {
|
|
579 report_error(fp, "write failed");
|
|
580 return -1;
|
|
581 }
|
|
582 fp->block_address += block_length;
|
|
583 }
|
|
584 return 0;
|
|
585 }
|
|
586
|
|
587 int bgzf_flush_try(BGZF *fp, int size)
|
|
588 {
|
|
589 if (fp->block_offset + size > fp->uncompressed_block_size)
|
|
590 return bgzf_flush(fp);
|
|
591 return -1;
|
|
592 }
|
|
593
|
|
594 int bgzf_write(BGZF* fp, const void* data, int length)
|
|
595 {
|
|
596 const bgzf_byte_t *input = data;
|
|
597 int block_length, bytes_written;
|
|
598 if (fp->open_mode != 'w') {
|
|
599 report_error(fp, "file not open for writing");
|
|
600 return -1;
|
|
601 }
|
|
602
|
|
603 if (fp->uncompressed_block == NULL)
|
|
604 fp->uncompressed_block = malloc(fp->uncompressed_block_size);
|
|
605
|
|
606 input = data;
|
|
607 block_length = fp->uncompressed_block_size;
|
|
608 bytes_written = 0;
|
|
609 while (bytes_written < length) {
|
|
610 int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
|
|
611 bgzf_byte_t* buffer = fp->uncompressed_block;
|
|
612 memcpy(buffer + fp->block_offset, input, copy_length);
|
|
613 fp->block_offset += copy_length;
|
|
614 input += copy_length;
|
|
615 bytes_written += copy_length;
|
|
616 if (fp->block_offset == block_length) {
|
|
617 if (bgzf_flush(fp) != 0) {
|
|
618 break;
|
|
619 }
|
|
620 }
|
|
621 }
|
|
622 return bytes_written;
|
|
623 }
|
|
624
|
|
625 int bgzf_close(BGZF* fp)
|
|
626 {
|
|
627 if (fp->open_mode == 'w') {
|
|
628 if (bgzf_flush(fp) != 0) return -1;
|
|
629 { // add an empty block
|
|
630 int count, block_length = deflate_block(fp, 0);
|
|
631 #ifdef _USE_KNETFILE
|
|
632 count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
|
|
633 #else
|
|
634 count = fwrite(fp->compressed_block, 1, block_length, fp->file);
|
|
635 #endif
|
|
636 }
|
|
637 #ifdef _USE_KNETFILE
|
|
638 if (fflush(fp->x.fpw) != 0) {
|
|
639 #else
|
|
640 if (fflush(fp->file) != 0) {
|
|
641 #endif
|
|
642 report_error(fp, "flush failed");
|
|
643 return -1;
|
|
644 }
|
|
645 }
|
|
646 if (fp->owned_file) {
|
|
647 #ifdef _USE_KNETFILE
|
|
648 int ret;
|
|
649 if (fp->open_mode == 'w') ret = fclose(fp->x.fpw);
|
|
650 else ret = knet_close(fp->x.fpr);
|
|
651 if (ret != 0) return -1;
|
|
652 #else
|
|
653 if (fclose(fp->file) != 0) return -1;
|
|
654 #endif
|
|
655 }
|
|
656 free(fp->uncompressed_block);
|
|
657 free(fp->compressed_block);
|
|
658 free_cache(fp);
|
|
659 free(fp);
|
|
660 return 0;
|
|
661 }
|
|
662
|
|
663 void bgzf_set_cache_size(BGZF *fp, int cache_size)
|
|
664 {
|
|
665 if (fp) fp->cache_size = cache_size;
|
|
666 }
|
|
667
|
|
668 int bgzf_check_EOF(BGZF *fp)
|
|
669 {
|
|
670 static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
|
|
671 uint8_t buf[28];
|
|
672 off_t offset;
|
|
673 #ifdef _USE_KNETFILE
|
|
674 offset = knet_tell(fp->x.fpr);
|
|
675 if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;
|
|
676 knet_read(fp->x.fpr, buf, 28);
|
|
677 knet_seek(fp->x.fpr, offset, SEEK_SET);
|
|
678 #else
|
|
679 offset = ftello(fp->file);
|
|
680 if (fseeko(fp->file, -28, SEEK_END) != 0) return -1;
|
|
681 fread(buf, 1, 28, fp->file);
|
|
682 fseeko(fp->file, offset, SEEK_SET);
|
|
683 #endif
|
|
684 return (memcmp(magic, buf, 28) == 0)? 1 : 0;
|
|
685 }
|
|
686
|
|
687 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
|
|
688 {
|
|
689 int block_offset;
|
|
690 int64_t block_address;
|
|
691
|
|
692 if (fp->open_mode != 'r') {
|
|
693 report_error(fp, "file not open for read");
|
|
694 return -1;
|
|
695 }
|
|
696 if (where != SEEK_SET) {
|
|
697 report_error(fp, "unimplemented seek option");
|
|
698 return -1;
|
|
699 }
|
|
700 block_offset = pos & 0xFFFF;
|
|
701 block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
|
|
702 #ifdef _USE_KNETFILE
|
|
703 if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
|
|
704 #else
|
|
705 if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
|
|
706 #endif
|
|
707 report_error(fp, "seek failed");
|
|
708 return -1;
|
|
709 }
|
|
710 fp->block_length = 0; // indicates current block is not loaded
|
|
711 fp->block_address = block_address;
|
|
712 fp->block_offset = block_offset;
|
|
713 return 0;
|
|
714 }
|