Mercurial > repos > siyuan > prada
comparison pyPRADA_1.2/tools/samtools-0.1.16/bgzf.c @ 0:acc2ca1a3ba4
Uploaded
author | siyuan |
---|---|
date | Thu, 20 Feb 2014 00:44:58 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acc2ca1a3ba4 |
---|---|
1 /* The MIT License | |
2 | |
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology | |
4 | |
5 Permission is hereby granted, free of charge, to any person obtaining a copy | |
6 of this software and associated documentation files (the "Software"), to deal | |
7 in the Software without restriction, including without limitation the rights | |
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
9 copies of the Software, and to permit persons to whom the Software is | |
10 furnished to do so, subject to the following conditions: | |
11 | |
12 The above copyright notice and this permission notice shall be included in | |
13 all copies or substantial portions of the Software. | |
14 | |
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
21 THE SOFTWARE. | |
22 */ | |
23 | |
24 /* | |
25 2009-06-29 by lh3: cache recent uncompressed blocks. | |
26 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP. | |
27 2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */ | |
28 | |
29 #include <stdio.h> | |
30 #include <stdlib.h> | |
31 #include <string.h> | |
32 #include <unistd.h> | |
33 #include <fcntl.h> | |
34 #include <sys/types.h> | |
35 #include <sys/stat.h> | |
36 #include "bgzf.h" | |
37 | |
38 #include "khash.h" | |
39 typedef struct { | |
40 int size; | |
41 uint8_t *block; | |
42 int64_t end_offset; | |
43 } cache_t; | |
44 KHASH_MAP_INIT_INT64(cache, cache_t) | |
45 | |
46 #if defined(_WIN32) || defined(_MSC_VER) | |
47 #define ftello(fp) ftell(fp) | |
48 #define fseeko(fp, offset, whence) fseek(fp, offset, whence) | |
49 #else | |
50 extern off_t ftello(FILE *stream); | |
51 extern int fseeko(FILE *stream, off_t offset, int whence); | |
52 #endif | |
53 | |
54 typedef int8_t bgzf_byte_t; | |
55 | |
56 static const int DEFAULT_BLOCK_SIZE = 64 * 1024; | |
57 static const int MAX_BLOCK_SIZE = 64 * 1024; | |
58 | |
59 static const int BLOCK_HEADER_LENGTH = 18; | |
60 static const int BLOCK_FOOTER_LENGTH = 8; | |
61 | |
62 static const int GZIP_ID1 = 31; | |
63 static const int GZIP_ID2 = 139; | |
64 static const int CM_DEFLATE = 8; | |
65 static const int FLG_FEXTRA = 4; | |
66 static const int OS_UNKNOWN = 255; | |
67 static const int BGZF_ID1 = 66; // 'B' | |
68 static const int BGZF_ID2 = 67; // 'C' | |
69 static const int BGZF_LEN = 2; | |
70 static const int BGZF_XLEN = 6; // BGZF_LEN+4 | |
71 | |
72 static const int GZIP_WINDOW_BITS = -15; // no zlib header | |
73 static const int Z_DEFAULT_MEM_LEVEL = 8; | |
74 | |
75 | |
76 inline | |
77 void | |
78 packInt16(uint8_t* buffer, uint16_t value) | |
79 { | |
80 buffer[0] = value; | |
81 buffer[1] = value >> 8; | |
82 } | |
83 | |
84 inline | |
85 int | |
86 unpackInt16(const uint8_t* buffer) | |
87 { | |
88 return (buffer[0] | (buffer[1] << 8)); | |
89 } | |
90 | |
91 inline | |
92 void | |
93 packInt32(uint8_t* buffer, uint32_t value) | |
94 { | |
95 buffer[0] = value; | |
96 buffer[1] = value >> 8; | |
97 buffer[2] = value >> 16; | |
98 buffer[3] = value >> 24; | |
99 } | |
100 | |
101 static inline | |
102 int | |
103 bgzf_min(int x, int y) | |
104 { | |
105 return (x < y) ? x : y; | |
106 } | |
107 | |
108 static | |
109 void | |
110 report_error(BGZF* fp, const char* message) { | |
111 fp->error = message; | |
112 } | |
113 | |
114 int bgzf_check_bgzf(const char *fn) | |
115 { | |
116 BGZF *fp; | |
117 uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377"; | |
118 int n; | |
119 | |
120 if ((fp = bgzf_open(fn, "r")) == 0) | |
121 { | |
122 fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn); | |
123 return -1; | |
124 } | |
125 | |
126 #ifdef _USE_KNETFILE | |
127 n = knet_read(fp->x.fpr, buf, 10); | |
128 #else | |
129 n = fread(buf, 1, 10, fp->file); | |
130 #endif | |
131 bgzf_close(fp); | |
132 | |
133 if ( n!=10 ) | |
134 return -1; | |
135 | |
136 if ( !memcmp(magic, buf, 10) ) return 1; | |
137 return 0; | |
138 } | |
139 | |
140 static BGZF *bgzf_read_init() | |
141 { | |
142 BGZF *fp; | |
143 fp = calloc(1, sizeof(BGZF)); | |
144 fp->uncompressed_block_size = MAX_BLOCK_SIZE; | |
145 fp->uncompressed_block = malloc(MAX_BLOCK_SIZE); | |
146 fp->compressed_block_size = MAX_BLOCK_SIZE; | |
147 fp->compressed_block = malloc(MAX_BLOCK_SIZE); | |
148 fp->cache_size = 0; | |
149 fp->cache = kh_init(cache); | |
150 return fp; | |
151 } | |
152 | |
153 static | |
154 BGZF* | |
155 open_read(int fd) | |
156 { | |
157 #ifdef _USE_KNETFILE | |
158 knetFile *file = knet_dopen(fd, "r"); | |
159 #else | |
160 FILE* file = fdopen(fd, "r"); | |
161 #endif | |
162 BGZF* fp; | |
163 if (file == 0) return 0; | |
164 fp = bgzf_read_init(); | |
165 fp->file_descriptor = fd; | |
166 fp->open_mode = 'r'; | |
167 #ifdef _USE_KNETFILE | |
168 fp->x.fpr = file; | |
169 #else | |
170 fp->file = file; | |
171 #endif | |
172 return fp; | |
173 } | |
174 | |
175 static | |
176 BGZF* | |
177 open_write(int fd, int compress_level) // compress_level==-1 for the default level | |
178 { | |
179 FILE* file = fdopen(fd, "w"); | |
180 BGZF* fp; | |
181 if (file == 0) return 0; | |
182 fp = malloc(sizeof(BGZF)); | |
183 fp->file_descriptor = fd; | |
184 fp->open_mode = 'w'; | |
185 fp->owned_file = 0; | |
186 fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 | |
187 if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; | |
188 #ifdef _USE_KNETFILE | |
189 fp->x.fpw = file; | |
190 #else | |
191 fp->file = file; | |
192 #endif | |
193 fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE; | |
194 fp->uncompressed_block = NULL; | |
195 fp->compressed_block_size = MAX_BLOCK_SIZE; | |
196 fp->compressed_block = malloc(MAX_BLOCK_SIZE); | |
197 fp->block_address = 0; | |
198 fp->block_offset = 0; | |
199 fp->block_length = 0; | |
200 fp->error = NULL; | |
201 return fp; | |
202 } | |
203 | |
204 BGZF* | |
205 bgzf_open(const char* __restrict path, const char* __restrict mode) | |
206 { | |
207 BGZF* fp = NULL; | |
208 if (strchr(mode, 'r') || strchr(mode, 'R')) { /* The reading mode is preferred. */ | |
209 #ifdef _USE_KNETFILE | |
210 knetFile *file = knet_open(path, mode); | |
211 if (file == 0) return 0; | |
212 fp = bgzf_read_init(); | |
213 fp->file_descriptor = -1; | |
214 fp->open_mode = 'r'; | |
215 fp->x.fpr = file; | |
216 #else | |
217 int fd, oflag = O_RDONLY; | |
218 #ifdef _WIN32 | |
219 oflag |= O_BINARY; | |
220 #endif | |
221 fd = open(path, oflag); | |
222 if (fd == -1) return 0; | |
223 fp = open_read(fd); | |
224 #endif | |
225 } else if (strchr(mode, 'w') || strchr(mode, 'W')) { | |
226 int fd, compress_level = -1, oflag = O_WRONLY | O_CREAT | O_TRUNC; | |
227 #ifdef _WIN32 | |
228 oflag |= O_BINARY; | |
229 #endif | |
230 fd = open(path, oflag, 0666); | |
231 if (fd == -1) return 0; | |
232 { // set compress_level | |
233 int i; | |
234 for (i = 0; mode[i]; ++i) | |
235 if (mode[i] >= '0' && mode[i] <= '9') break; | |
236 if (mode[i]) compress_level = (int)mode[i] - '0'; | |
237 if (strchr(mode, 'u')) compress_level = 0; | |
238 } | |
239 fp = open_write(fd, compress_level); | |
240 } | |
241 if (fp != NULL) fp->owned_file = 1; | |
242 return fp; | |
243 } | |
244 | |
245 BGZF* | |
246 bgzf_fdopen(int fd, const char * __restrict mode) | |
247 { | |
248 if (fd == -1) return 0; | |
249 if (mode[0] == 'r' || mode[0] == 'R') { | |
250 return open_read(fd); | |
251 } else if (mode[0] == 'w' || mode[0] == 'W') { | |
252 int i, compress_level = -1; | |
253 for (i = 0; mode[i]; ++i) | |
254 if (mode[i] >= '0' && mode[i] <= '9') break; | |
255 if (mode[i]) compress_level = (int)mode[i] - '0'; | |
256 if (strchr(mode, 'u')) compress_level = 0; | |
257 return open_write(fd, compress_level); | |
258 } else { | |
259 return NULL; | |
260 } | |
261 } | |
262 | |
263 static | |
264 int | |
265 deflate_block(BGZF* fp, int block_length) | |
266 { | |
267 // Deflate the block in fp->uncompressed_block into fp->compressed_block. | |
268 // Also adds an extra field that stores the compressed block length. | |
269 | |
270 bgzf_byte_t* buffer = fp->compressed_block; | |
271 int buffer_size = fp->compressed_block_size; | |
272 | |
273 // Init gzip header | |
274 buffer[0] = GZIP_ID1; | |
275 buffer[1] = GZIP_ID2; | |
276 buffer[2] = CM_DEFLATE; | |
277 buffer[3] = FLG_FEXTRA; | |
278 buffer[4] = 0; // mtime | |
279 buffer[5] = 0; | |
280 buffer[6] = 0; | |
281 buffer[7] = 0; | |
282 buffer[8] = 0; | |
283 buffer[9] = OS_UNKNOWN; | |
284 buffer[10] = BGZF_XLEN; | |
285 buffer[11] = 0; | |
286 buffer[12] = BGZF_ID1; | |
287 buffer[13] = BGZF_ID2; | |
288 buffer[14] = BGZF_LEN; | |
289 buffer[15] = 0; | |
290 buffer[16] = 0; // placeholder for block length | |
291 buffer[17] = 0; | |
292 | |
293 // loop to retry for blocks that do not compress enough | |
294 int input_length = block_length; | |
295 int compressed_length = 0; | |
296 while (1) { | |
297 z_stream zs; | |
298 zs.zalloc = NULL; | |
299 zs.zfree = NULL; | |
300 zs.next_in = fp->uncompressed_block; | |
301 zs.avail_in = input_length; | |
302 zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH]; | |
303 zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; | |
304 | |
305 int status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED, | |
306 GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY); | |
307 if (status != Z_OK) { | |
308 report_error(fp, "deflate init failed"); | |
309 return -1; | |
310 } | |
311 status = deflate(&zs, Z_FINISH); | |
312 if (status != Z_STREAM_END) { | |
313 deflateEnd(&zs); | |
314 if (status == Z_OK) { | |
315 // Not enough space in buffer. | |
316 // Can happen in the rare case the input doesn't compress enough. | |
317 // Reduce the amount of input until it fits. | |
318 input_length -= 1024; | |
319 if (input_length <= 0) { | |
320 // should never happen | |
321 report_error(fp, "input reduction failed"); | |
322 return -1; | |
323 } | |
324 continue; | |
325 } | |
326 report_error(fp, "deflate failed"); | |
327 return -1; | |
328 } | |
329 status = deflateEnd(&zs); | |
330 if (status != Z_OK) { | |
331 report_error(fp, "deflate end failed"); | |
332 return -1; | |
333 } | |
334 compressed_length = zs.total_out; | |
335 compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; | |
336 if (compressed_length > MAX_BLOCK_SIZE) { | |
337 // should never happen | |
338 report_error(fp, "deflate overflow"); | |
339 return -1; | |
340 } | |
341 break; | |
342 } | |
343 | |
344 packInt16((uint8_t*)&buffer[16], compressed_length-1); | |
345 uint32_t crc = crc32(0L, NULL, 0L); | |
346 crc = crc32(crc, fp->uncompressed_block, input_length); | |
347 packInt32((uint8_t*)&buffer[compressed_length-8], crc); | |
348 packInt32((uint8_t*)&buffer[compressed_length-4], input_length); | |
349 | |
350 int remaining = block_length - input_length; | |
351 if (remaining > 0) { | |
352 if (remaining > input_length) { | |
353 // should never happen (check so we can use memcpy) | |
354 report_error(fp, "remainder too large"); | |
355 return -1; | |
356 } | |
357 memcpy(fp->uncompressed_block, | |
358 fp->uncompressed_block + input_length, | |
359 remaining); | |
360 } | |
361 fp->block_offset = remaining; | |
362 return compressed_length; | |
363 } | |
364 | |
365 static | |
366 int | |
367 inflate_block(BGZF* fp, int block_length) | |
368 { | |
369 // Inflate the block in fp->compressed_block into fp->uncompressed_block | |
370 | |
371 z_stream zs; | |
372 int status; | |
373 zs.zalloc = NULL; | |
374 zs.zfree = NULL; | |
375 zs.next_in = fp->compressed_block + 18; | |
376 zs.avail_in = block_length - 16; | |
377 zs.next_out = fp->uncompressed_block; | |
378 zs.avail_out = fp->uncompressed_block_size; | |
379 | |
380 status = inflateInit2(&zs, GZIP_WINDOW_BITS); | |
381 if (status != Z_OK) { | |
382 report_error(fp, "inflate init failed"); | |
383 return -1; | |
384 } | |
385 status = inflate(&zs, Z_FINISH); | |
386 if (status != Z_STREAM_END) { | |
387 inflateEnd(&zs); | |
388 report_error(fp, "inflate failed"); | |
389 return -1; | |
390 } | |
391 status = inflateEnd(&zs); | |
392 if (status != Z_OK) { | |
393 report_error(fp, "inflate failed"); | |
394 return -1; | |
395 } | |
396 return zs.total_out; | |
397 } | |
398 | |
399 static | |
400 int | |
401 check_header(const bgzf_byte_t* header) | |
402 { | |
403 return (header[0] == GZIP_ID1 && | |
404 header[1] == (bgzf_byte_t) GZIP_ID2 && | |
405 header[2] == Z_DEFLATED && | |
406 (header[3] & FLG_FEXTRA) != 0 && | |
407 unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN && | |
408 header[12] == BGZF_ID1 && | |
409 header[13] == BGZF_ID2 && | |
410 unpackInt16((uint8_t*)&header[14]) == BGZF_LEN); | |
411 } | |
412 | |
413 static void free_cache(BGZF *fp) | |
414 { | |
415 khint_t k; | |
416 khash_t(cache) *h = (khash_t(cache)*)fp->cache; | |
417 if (fp->open_mode != 'r') return; | |
418 for (k = kh_begin(h); k < kh_end(h); ++k) | |
419 if (kh_exist(h, k)) free(kh_val(h, k).block); | |
420 kh_destroy(cache, h); | |
421 } | |
422 | |
423 static int load_block_from_cache(BGZF *fp, int64_t block_address) | |
424 { | |
425 khint_t k; | |
426 cache_t *p; | |
427 khash_t(cache) *h = (khash_t(cache)*)fp->cache; | |
428 k = kh_get(cache, h, block_address); | |
429 if (k == kh_end(h)) return 0; | |
430 p = &kh_val(h, k); | |
431 if (fp->block_length != 0) fp->block_offset = 0; | |
432 fp->block_address = block_address; | |
433 fp->block_length = p->size; | |
434 memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE); | |
435 #ifdef _USE_KNETFILE | |
436 knet_seek(fp->x.fpr, p->end_offset, SEEK_SET); | |
437 #else | |
438 fseeko(fp->file, p->end_offset, SEEK_SET); | |
439 #endif | |
440 return p->size; | |
441 } | |
442 | |
443 static void cache_block(BGZF *fp, int size) | |
444 { | |
445 int ret; | |
446 khint_t k; | |
447 cache_t *p; | |
448 khash_t(cache) *h = (khash_t(cache)*)fp->cache; | |
449 if (MAX_BLOCK_SIZE >= fp->cache_size) return; | |
450 if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) { | |
451 /* A better way would be to remove the oldest block in the | |
452 * cache, but here we remove a random one for simplicity. This | |
453 * should not have a big impact on performance. */ | |
454 for (k = kh_begin(h); k < kh_end(h); ++k) | |
455 if (kh_exist(h, k)) break; | |
456 if (k < kh_end(h)) { | |
457 free(kh_val(h, k).block); | |
458 kh_del(cache, h, k); | |
459 } | |
460 } | |
461 k = kh_put(cache, h, fp->block_address, &ret); | |
462 if (ret == 0) return; // if this happens, a bug! | |
463 p = &kh_val(h, k); | |
464 p->size = fp->block_length; | |
465 p->end_offset = fp->block_address + size; | |
466 p->block = malloc(MAX_BLOCK_SIZE); | |
467 memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE); | |
468 } | |
469 | |
470 int | |
471 bgzf_read_block(BGZF* fp) | |
472 { | |
473 bgzf_byte_t header[BLOCK_HEADER_LENGTH]; | |
474 int count, size = 0, block_length, remaining; | |
475 #ifdef _USE_KNETFILE | |
476 int64_t block_address = knet_tell(fp->x.fpr); | |
477 if (load_block_from_cache(fp, block_address)) return 0; | |
478 count = knet_read(fp->x.fpr, header, sizeof(header)); | |
479 #else | |
480 int64_t block_address = ftello(fp->file); | |
481 if (load_block_from_cache(fp, block_address)) return 0; | |
482 count = fread(header, 1, sizeof(header), fp->file); | |
483 #endif | |
484 if (count == 0) { | |
485 fp->block_length = 0; | |
486 return 0; | |
487 } | |
488 size = count; | |
489 if (count != sizeof(header)) { | |
490 report_error(fp, "read failed"); | |
491 return -1; | |
492 } | |
493 if (!check_header(header)) { | |
494 report_error(fp, "invalid block header"); | |
495 return -1; | |
496 } | |
497 block_length = unpackInt16((uint8_t*)&header[16]) + 1; | |
498 bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block; | |
499 memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); | |
500 remaining = block_length - BLOCK_HEADER_LENGTH; | |
501 #ifdef _USE_KNETFILE | |
502 count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining); | |
503 #else | |
504 count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file); | |
505 #endif | |
506 if (count != remaining) { | |
507 report_error(fp, "read failed"); | |
508 return -1; | |
509 } | |
510 size += count; | |
511 count = inflate_block(fp, block_length); | |
512 if (count < 0) return -1; | |
513 if (fp->block_length != 0) { | |
514 // Do not reset offset if this read follows a seek. | |
515 fp->block_offset = 0; | |
516 } | |
517 fp->block_address = block_address; | |
518 fp->block_length = count; | |
519 cache_block(fp, size); | |
520 return 0; | |
521 } | |
522 | |
523 int | |
524 bgzf_read(BGZF* fp, void* data, int length) | |
525 { | |
526 if (length <= 0) { | |
527 return 0; | |
528 } | |
529 if (fp->open_mode != 'r') { | |
530 report_error(fp, "file not open for reading"); | |
531 return -1; | |
532 } | |
533 | |
534 int bytes_read = 0; | |
535 bgzf_byte_t* output = data; | |
536 while (bytes_read < length) { | |
537 int copy_length, available = fp->block_length - fp->block_offset; | |
538 bgzf_byte_t *buffer; | |
539 if (available <= 0) { | |
540 if (bgzf_read_block(fp) != 0) { | |
541 return -1; | |
542 } | |
543 available = fp->block_length - fp->block_offset; | |
544 if (available <= 0) { | |
545 break; | |
546 } | |
547 } | |
548 copy_length = bgzf_min(length-bytes_read, available); | |
549 buffer = fp->uncompressed_block; | |
550 memcpy(output, buffer + fp->block_offset, copy_length); | |
551 fp->block_offset += copy_length; | |
552 output += copy_length; | |
553 bytes_read += copy_length; | |
554 } | |
555 if (fp->block_offset == fp->block_length) { | |
556 #ifdef _USE_KNETFILE | |
557 fp->block_address = knet_tell(fp->x.fpr); | |
558 #else | |
559 fp->block_address = ftello(fp->file); | |
560 #endif | |
561 fp->block_offset = 0; | |
562 fp->block_length = 0; | |
563 } | |
564 return bytes_read; | |
565 } | |
566 | |
567 int bgzf_flush(BGZF* fp) | |
568 { | |
569 while (fp->block_offset > 0) { | |
570 int count, block_length; | |
571 block_length = deflate_block(fp, fp->block_offset); | |
572 if (block_length < 0) return -1; | |
573 #ifdef _USE_KNETFILE | |
574 count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw); | |
575 #else | |
576 count = fwrite(fp->compressed_block, 1, block_length, fp->file); | |
577 #endif | |
578 if (count != block_length) { | |
579 report_error(fp, "write failed"); | |
580 return -1; | |
581 } | |
582 fp->block_address += block_length; | |
583 } | |
584 return 0; | |
585 } | |
586 | |
587 int bgzf_flush_try(BGZF *fp, int size) | |
588 { | |
589 if (fp->block_offset + size > fp->uncompressed_block_size) | |
590 return bgzf_flush(fp); | |
591 return -1; | |
592 } | |
593 | |
594 int bgzf_write(BGZF* fp, const void* data, int length) | |
595 { | |
596 const bgzf_byte_t *input = data; | |
597 int block_length, bytes_written; | |
598 if (fp->open_mode != 'w') { | |
599 report_error(fp, "file not open for writing"); | |
600 return -1; | |
601 } | |
602 | |
603 if (fp->uncompressed_block == NULL) | |
604 fp->uncompressed_block = malloc(fp->uncompressed_block_size); | |
605 | |
606 input = data; | |
607 block_length = fp->uncompressed_block_size; | |
608 bytes_written = 0; | |
609 while (bytes_written < length) { | |
610 int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written); | |
611 bgzf_byte_t* buffer = fp->uncompressed_block; | |
612 memcpy(buffer + fp->block_offset, input, copy_length); | |
613 fp->block_offset += copy_length; | |
614 input += copy_length; | |
615 bytes_written += copy_length; | |
616 if (fp->block_offset == block_length) { | |
617 if (bgzf_flush(fp) != 0) { | |
618 break; | |
619 } | |
620 } | |
621 } | |
622 return bytes_written; | |
623 } | |
624 | |
625 int bgzf_close(BGZF* fp) | |
626 { | |
627 if (fp->open_mode == 'w') { | |
628 if (bgzf_flush(fp) != 0) return -1; | |
629 { // add an empty block | |
630 int count, block_length = deflate_block(fp, 0); | |
631 #ifdef _USE_KNETFILE | |
632 count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw); | |
633 #else | |
634 count = fwrite(fp->compressed_block, 1, block_length, fp->file); | |
635 #endif | |
636 } | |
637 #ifdef _USE_KNETFILE | |
638 if (fflush(fp->x.fpw) != 0) { | |
639 #else | |
640 if (fflush(fp->file) != 0) { | |
641 #endif | |
642 report_error(fp, "flush failed"); | |
643 return -1; | |
644 } | |
645 } | |
646 if (fp->owned_file) { | |
647 #ifdef _USE_KNETFILE | |
648 int ret; | |
649 if (fp->open_mode == 'w') ret = fclose(fp->x.fpw); | |
650 else ret = knet_close(fp->x.fpr); | |
651 if (ret != 0) return -1; | |
652 #else | |
653 if (fclose(fp->file) != 0) return -1; | |
654 #endif | |
655 } | |
656 free(fp->uncompressed_block); | |
657 free(fp->compressed_block); | |
658 free_cache(fp); | |
659 free(fp); | |
660 return 0; | |
661 } | |
662 | |
663 void bgzf_set_cache_size(BGZF *fp, int cache_size) | |
664 { | |
665 if (fp) fp->cache_size = cache_size; | |
666 } | |
667 | |
668 int bgzf_check_EOF(BGZF *fp) | |
669 { | |
670 static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0"; | |
671 uint8_t buf[28]; | |
672 off_t offset; | |
673 #ifdef _USE_KNETFILE | |
674 offset = knet_tell(fp->x.fpr); | |
675 if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1; | |
676 knet_read(fp->x.fpr, buf, 28); | |
677 knet_seek(fp->x.fpr, offset, SEEK_SET); | |
678 #else | |
679 offset = ftello(fp->file); | |
680 if (fseeko(fp->file, -28, SEEK_END) != 0) return -1; | |
681 fread(buf, 1, 28, fp->file); | |
682 fseeko(fp->file, offset, SEEK_SET); | |
683 #endif | |
684 return (memcmp(magic, buf, 28) == 0)? 1 : 0; | |
685 } | |
686 | |
687 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) | |
688 { | |
689 int block_offset; | |
690 int64_t block_address; | |
691 | |
692 if (fp->open_mode != 'r') { | |
693 report_error(fp, "file not open for read"); | |
694 return -1; | |
695 } | |
696 if (where != SEEK_SET) { | |
697 report_error(fp, "unimplemented seek option"); | |
698 return -1; | |
699 } | |
700 block_offset = pos & 0xFFFF; | |
701 block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL; | |
702 #ifdef _USE_KNETFILE | |
703 if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) { | |
704 #else | |
705 if (fseeko(fp->file, block_address, SEEK_SET) != 0) { | |
706 #endif | |
707 report_error(fp, "seek failed"); | |
708 return -1; | |
709 } | |
710 fp->block_length = 0; // indicates current block is not loaded | |
711 fp->block_address = block_address; | |
712 fp->block_offset = block_offset; | |
713 return 0; | |
714 } |