annotate ezBAMQC/src/htslib/bgzf.c @ 3:ddfb71ec32ed

Uploaded
author cshl-bsr
date Tue, 29 Mar 2016 15:31:11 -0400
parents dfa3745e5fd8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /* The MIT License
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4 2011, 2012 Attractive Chaos <attractor@live.co.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Copyright (C) 2009, 2013, 2014 Genome Research Ltd
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7 Permission is hereby granted, free of charge, to any person obtaining a copy
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 of this software and associated documentation files (the "Software"), to deal
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 in the Software without restriction, including without limitation the rights
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 copies of the Software, and to permit persons to whom the Software is
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 furnished to do so, subject to the following conditions:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14 The above copyright notice and this permission notice shall be included in
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 all copies or substantial portions of the Software.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 THE SOFTWARE.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 #include <stdio.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 #include <stdlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 #include <string.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 #include <errno.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30 #include <unistd.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 #include <assert.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 #include <pthread.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 #include <sys/types.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34 #include <inttypes.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 #include "htslib/hts.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37 #include "htslib/bgzf.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 #include "htslib/hfile.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 #define BGZF_CACHE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 #define BGZF_MT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 #define BLOCK_HEADER_LENGTH 18
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 #define BLOCK_FOOTER_LENGTH 8
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 /* BGZF/GZIP header (speciallized from RFC 1952; little endian):
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN|
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 BGZF extension:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 ^ ^ ^ ^
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 | | | |
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 FLG.EXTRA XLEN B C
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 BGZF format is compatible with GZIP. It limits the size of each compressed
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57 block to 2^16 bytes and adds and an extra "BC" field in the gzip header which
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 records the size.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0";
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 #ifdef BGZF_CACHE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 int size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 uint8_t *block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 int64_t end_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 } cache_t;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 #include "htslib/khash.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 KHASH_MAP_INIT_INT64(cache, cache_t)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 typedef struct
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 uint64_t uaddr; // offset w.r.t. uncompressed data
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76 uint64_t caddr; // offset w.r.t. compressed data
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78 bgzidx1_t;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 struct __bgzidx_t
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 int noffs, moffs; // the size of the index, n:used, m:allocated
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 bgzidx1_t *offs; // offsets
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84 uint64_t ublock_addr; // offset of the current block (uncompressed data)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 void bgzf_index_destroy(BGZF *fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88 int bgzf_index_add_block(BGZF *fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 static inline void packInt16(uint8_t *buffer, uint16_t value)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 buffer[0] = value;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93 buffer[1] = value >> 8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 static inline int unpackInt16(const uint8_t *buffer)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 return buffer[0] | buffer[1] << 8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 static inline void packInt32(uint8_t *buffer, uint32_t value)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 buffer[0] = value;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 buffer[1] = value >> 8;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 buffer[2] = value >> 16;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106 buffer[3] = value >> 24;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 static BGZF *bgzf_read_init(hFILE *hfpr)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 BGZF *fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 uint8_t magic[18];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 ssize_t n = hpeek(hfpr, magic, 18);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114 if (n < 0) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116 fp = (BGZF*)calloc(1, sizeof(BGZF));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 if (fp == NULL) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119 fp->is_write = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 fp->is_compressed = (n==2 && magic[0]==0x1f && magic[1]==0x8b);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122 fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 fp->is_compressed = (n==18 && magic[0]==0x1f && magic[1]==0x8b) ? 1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124 fp->is_gzip = ( !fp->is_compressed || ((magic[3]&4) && memcmp(&magic[12], "BC\2\0",4)==0) ) ? 0 : 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 #ifdef BGZF_CACHE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126 fp->cache = kh_init(cache);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 return fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131 // get the compress level from the mode string: compress_level==-1 for the default level, -2 plain uncompressed
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 static int mode2level(const char *__restrict mode)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134 int i, compress_level = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 for (i = 0; mode[i]; ++i)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136 if (mode[i] >= '0' && mode[i] <= '9') break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137 if (mode[i]) compress_level = (int)mode[i] - '0';
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 if (strchr(mode, 'u')) compress_level = -2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139 return compress_level;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141 static BGZF *bgzf_write_init(const char *mode)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 BGZF *fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144 fp = (BGZF*)calloc(1, sizeof(BGZF));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145 fp->is_write = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146 int compress_level = mode2level(mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 if ( compress_level==-2 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
148 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
149 fp->is_compressed = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
150 return fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
151 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
152 fp->is_compressed = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
153 fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
154 fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
155 fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
156 if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
157 if ( strchr(mode,'g') )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
158 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
159 // gzip output
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
160 fp->is_gzip = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
161 fp->gz_stream = (z_stream*)calloc(1,sizeof(z_stream));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
162 fp->gz_stream->zalloc = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
163 fp->gz_stream->zfree = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
164 if ( deflateInit2(fp->gz_stream, fp->compress_level, Z_DEFLATED, 15|16, 8, Z_DEFAULT_STRATEGY)!=Z_OK ) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
165 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
166 return fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
167 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
168
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
169 BGZF *bgzf_open(const char *path, const char *mode)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
170 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
171 BGZF *fp = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
172 assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
173 if (strchr(mode, 'r')) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
174 hFILE *fpr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
175 if ((fpr = hopen(path, mode)) == 0) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
176 fp = bgzf_read_init(fpr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
177 if (fp == 0) { hclose_abruptly(fpr); return NULL; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
178 fp->fp = fpr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
179 } else if (strchr(mode, 'w') || strchr(mode, 'a')) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
180 hFILE *fpw;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
181 if ((fpw = hopen(path, mode)) == 0) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
182 fp = bgzf_write_init(mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
183 fp->fp = fpw;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
184 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
185 else { errno = EINVAL; return 0; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
186
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
187 fp->is_be = ed_is_big();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
188 return fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
189 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
190
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
191 BGZF *bgzf_dopen(int fd, const char *mode)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
192 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
193 BGZF *fp = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
194 assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
195 if (strchr(mode, 'r')) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
196 hFILE *fpr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
197 if ((fpr = hdopen(fd, mode)) == 0) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
198 fp = bgzf_read_init(fpr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
199 if (fp == 0) { hclose_abruptly(fpr); return NULL; } // FIXME this closes fd
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
200 fp->fp = fpr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
201 } else if (strchr(mode, 'w') || strchr(mode, 'a')) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
202 hFILE *fpw;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
203 if ((fpw = hdopen(fd, mode)) == 0) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
204 fp = bgzf_write_init(mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
205 fp->fp = fpw;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
206 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
207 else { errno = EINVAL; return 0; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
208
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
209 fp->is_be = ed_is_big();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
210 return fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
211 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
212
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
213 BGZF *bgzf_hopen(hFILE *hfp, const char *mode)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
214 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
215 BGZF *fp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
216 assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
217 if (strchr(mode, 'r')) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
218 fp = bgzf_read_init(hfp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
219 if (fp == NULL) return NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
220 } else if (strchr(mode, 'w') || strchr(mode, 'a')) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
221 fp = bgzf_write_init(mode);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
222 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
223 else { errno = EINVAL; return 0; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
224
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
225 fp->fp = hfp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
226 fp->is_be = ed_is_big();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
227 return fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
228 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
229
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
230 static int bgzf_compress(void *_dst, int *dlen, void *src, int slen, int level)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
231 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
232 uint32_t crc;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
233 z_stream zs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
234 uint8_t *dst = (uint8_t*)_dst;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
235
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
236 // compress the body
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
237 zs.zalloc = NULL; zs.zfree = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
238 zs.next_in = (Bytef*)src;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
239 zs.avail_in = slen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
240 zs.next_out = dst + BLOCK_HEADER_LENGTH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
241 zs.avail_out = *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
242 if (deflateInit2(&zs, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1; // -15 to disable zlib header/footer
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
243 if (deflate(&zs, Z_FINISH) != Z_STREAM_END) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
244 if (deflateEnd(&zs) != Z_OK) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
245 *dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
246 // write the header
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
247 memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
248 packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
249 // write the footer
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
250 crc = crc32(crc32(0L, NULL, 0L), (Bytef*)src, slen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
251 packInt32((uint8_t*)&dst[*dlen - 8], crc);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
252 packInt32((uint8_t*)&dst[*dlen - 4], slen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
253 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
254 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
255
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
256 static int bgzf_gzip_compress(BGZF *fp, void *_dst, int *dlen, void *src, int slen, int level)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
257 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
258 uint8_t *dst = (uint8_t*)_dst;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
259 z_stream *zs = fp->gz_stream;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
260 int flush = slen ? Z_NO_FLUSH : Z_FINISH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
261 zs->next_in = (Bytef*)src;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
262 zs->avail_in = slen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
263 zs->next_out = dst;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
264 zs->avail_out = *dlen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
265 if ( deflate(zs, flush) == Z_STREAM_ERROR ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
266 *dlen = *dlen - zs->avail_out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
267 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
268 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
269
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
270 // Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
271 static int deflate_block(BGZF *fp, int block_length)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
272 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
273 int comp_size = BGZF_MAX_BLOCK_SIZE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
274 int ret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
275 if ( !fp->is_gzip )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
276 ret = bgzf_compress(fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
277 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
278 ret = bgzf_gzip_compress(fp, fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
279
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
280 if ( ret != 0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
281 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
282 fp->errcode |= BGZF_ERR_ZLIB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
283 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
284 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
285 fp->block_offset = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
286 return comp_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
287 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
288
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
289 // Inflate the block in fp->compressed_block into fp->uncompressed_block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
290 static int inflate_block(BGZF* fp, int block_length)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
291 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
292 z_stream zs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
293 zs.zalloc = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
294 zs.zfree = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
295 zs.next_in = (Bytef*)fp->compressed_block + 18;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
296 zs.avail_in = block_length - 16;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
297 zs.next_out = (Bytef*)fp->uncompressed_block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
298 zs.avail_out = BGZF_MAX_BLOCK_SIZE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
299
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
300 if (inflateInit2(&zs, -15) != Z_OK) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
301 fp->errcode |= BGZF_ERR_ZLIB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
302 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
303 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
304 if (inflate(&zs, Z_FINISH) != Z_STREAM_END) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
305 inflateEnd(&zs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
306 fp->errcode |= BGZF_ERR_ZLIB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
307 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
308 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
309 if (inflateEnd(&zs) != Z_OK) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
310 fp->errcode |= BGZF_ERR_ZLIB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
311 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
312 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
313 return zs.total_out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
314 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
315
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
316 static int inflate_gzip_block(BGZF *fp, int cached)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
317 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
318 int ret = Z_OK;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
319 do
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
320 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
321 if ( !cached && fp->gz_stream->avail_out!=0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
322 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
323 fp->gz_stream->avail_in = hread(fp->fp, fp->compressed_block, BGZF_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
324 if ( fp->gz_stream->avail_in<=0 ) return fp->gz_stream->avail_in;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
325 if ( fp->gz_stream->avail_in==0 ) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
326 fp->gz_stream->next_in = fp->compressed_block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
327 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
328 else cached = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
329 do
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
330 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
331 fp->gz_stream->next_out = (Bytef*)fp->uncompressed_block + fp->block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
332 fp->gz_stream->avail_out = BGZF_MAX_BLOCK_SIZE - fp->block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
333 ret = inflate(fp->gz_stream, Z_NO_FLUSH);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
334 if ( ret==Z_BUF_ERROR ) continue; // non-critical error
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
335 if ( ret<0 ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
336 unsigned int have = BGZF_MAX_BLOCK_SIZE - fp->gz_stream->avail_out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
337 if ( have ) return have;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
338 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
339 while ( fp->gz_stream->avail_out == 0 );
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
340 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
341 while (ret != Z_STREAM_END);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
342 return BGZF_MAX_BLOCK_SIZE - fp->gz_stream->avail_out;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
343 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
344
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
345 // Returns: 0 on success (BGZF header); -1 on non-BGZF GZIP header; -2 on error
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
346 static int check_header(const uint8_t *header)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
347 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
348 if ( header[0] != 31 || header[1] != 139 || header[2] != 8 ) return -2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
349 return ((header[3] & 4) != 0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
350 && unpackInt16((uint8_t*)&header[10]) == 6
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
351 && header[12] == 'B' && header[13] == 'C'
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
352 && unpackInt16((uint8_t*)&header[14]) == 2) ? 0 : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
353 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
354
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
355 #ifdef BGZF_CACHE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
356 static void free_cache(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
357 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
358 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
359 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
360 if (fp->is_write) return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
361 for (k = kh_begin(h); k < kh_end(h); ++k)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
362 if (kh_exist(h, k)) free(kh_val(h, k).block);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
363 kh_destroy(cache, h);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
364 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
365
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
366 static int load_block_from_cache(BGZF *fp, int64_t block_address)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
367 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
368 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
369 cache_t *p;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
370 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
371 k = kh_get(cache, h, block_address);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
372 if (k == kh_end(h)) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
373 p = &kh_val(h, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
374 if (fp->block_length != 0) fp->block_offset = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
375 fp->block_address = block_address;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
376 fp->block_length = p->size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
377 memcpy(fp->uncompressed_block, p->block, BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
378 if ( hseek(fp->fp, p->end_offset, SEEK_SET) < 0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
379 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
380 // todo: move the error up
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
381 fprintf(stderr,"Could not hseek to %"PRId64"\n", p->end_offset);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
382 exit(1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
383 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
384 return p->size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
385 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
386
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
387 static void cache_block(BGZF *fp, int size)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
388 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
389 int ret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
390 khint_t k;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
391 cache_t *p;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
392 khash_t(cache) *h = (khash_t(cache)*)fp->cache;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
393 if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
394 if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > (uint32_t)fp->cache_size) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
395 /* A better way would be to remove the oldest block in the
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
396 * cache, but here we remove a random one for simplicity. This
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
397 * should not have a big impact on performance. */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
398 for (k = kh_begin(h); k < kh_end(h); ++k)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
399 if (kh_exist(h, k)) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
400 if (k < kh_end(h)) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
401 free(kh_val(h, k).block);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
402 kh_del(cache, h, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
403 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
404 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
405 k = kh_put(cache, h, fp->block_address, &ret);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
406 if (ret == 0) return; // if this happens, a bug!
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
407 p = &kh_val(h, k);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
408 p->size = fp->block_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
409 p->end_offset = fp->block_address + size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
410 p->block = (uint8_t*)malloc(BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
411 memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
412 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
413 #else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
414 static void free_cache(BGZF *fp) {}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
415 static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
416 static void cache_block(BGZF *fp, int size) {}
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
417 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
418
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
419 int bgzf_read_block(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
420 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
421 uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
422 int count, size = 0, block_length, remaining;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
423
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
424 // Reading an uncompressed file
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
425 if ( !fp->is_compressed )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
426 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
427 count = hread(fp->fp, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
428 if ( count==0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
429 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
430 fp->block_length = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
431 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
432 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
433 if (fp->block_length != 0) fp->block_offset = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
434 fp->block_address += count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
435 fp->block_length = count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
436 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
437 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
438
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
439 // Reading compressed file
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
440 int64_t block_address;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
441 block_address = htell(fp->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
442 if ( fp->is_gzip && fp->gz_stream ) // is this is a initialized gzip stream?
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
443 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
444 count = inflate_gzip_block(fp, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
445 if ( count<0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
446 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
447 fp->errcode |= BGZF_ERR_ZLIB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
448 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
449 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
450 fp->block_length = count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
451 fp->block_address = block_address;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
452 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
453 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
454 if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
455 count = hread(fp->fp, header, sizeof(header));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
456 if (count == 0) { // no data read
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
457 fp->block_length = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
458 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
459 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
460 int ret;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
461 if ( count != sizeof(header) || (ret=check_header(header))==-2 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
462 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
463 fp->errcode |= BGZF_ERR_HEADER;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
464 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
465 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
466 if ( ret==-1 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
467 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
468 // GZIP, not BGZF
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
469 uint8_t *cblock = (uint8_t*)fp->compressed_block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
470 memcpy(cblock, header, sizeof(header));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
471 count = hread(fp->fp, cblock+sizeof(header), BGZF_BLOCK_SIZE - sizeof(header)) + sizeof(header);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
472 int nskip = 10;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
473
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
474 // Check optional fields to skip: FLG.FNAME,FLG.FCOMMENT,FLG.FHCRC,FLG.FEXTRA
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
475 // Note: Some of these fields are untested, I did not have appropriate data available
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
476 if ( header[3] & 0x4 ) // FLG.FEXTRA
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
477 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
478 nskip += unpackInt16(&cblock[nskip]) + 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
479 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
480 if ( header[3] & 0x8 ) // FLG.FNAME
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
481 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
482 while ( nskip<BGZF_BLOCK_SIZE && cblock[nskip] ) nskip++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
483 if ( nskip==BGZF_BLOCK_SIZE )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
484 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
485 fp->errcode |= BGZF_ERR_HEADER;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
486 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
487 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
488 nskip++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
489 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
490 if ( header[3] & 0x10 ) // FLG.FCOMMENT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
491 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
492 while ( nskip<BGZF_BLOCK_SIZE && cblock[nskip] ) nskip++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
493 if ( nskip==BGZF_BLOCK_SIZE )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
494 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
495 fp->errcode |= BGZF_ERR_HEADER;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
496 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
497 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
498 nskip++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
499 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
500 if ( header[3] & 0x2 ) nskip += 2; // FLG.FHCRC
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
501
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
502 fp->is_gzip = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
503 fp->gz_stream = (z_stream*) calloc(1,sizeof(z_stream));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
504 int ret = inflateInit2(fp->gz_stream, -15);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
505 if (ret != Z_OK)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
506 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
507 fp->errcode |= BGZF_ERR_ZLIB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
508 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
509 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
510 fp->gz_stream->avail_in = count - nskip;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
511 fp->gz_stream->next_in = cblock + nskip;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
512 count = inflate_gzip_block(fp, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
513 if ( count<0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
514 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
515 fp->errcode |= BGZF_ERR_ZLIB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
516 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
517 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
518 fp->block_length = count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
519 fp->block_address = block_address;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
520 if ( fp->idx_build_otf ) return -1; // cannot build index for gzip
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
521 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
522 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
523 size = count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
524 block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
525 compressed_block = (uint8_t*)fp->compressed_block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
526 memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
527 remaining = block_length - BLOCK_HEADER_LENGTH;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
528 count = hread(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
529 if (count != remaining) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
530 fp->errcode |= BGZF_ERR_IO;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
531 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
532 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
533 size += count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
534 if ((count = inflate_block(fp, block_length)) < 0) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
535 if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
536 fp->block_address = block_address;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
537 fp->block_length = count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
538 if ( fp->idx_build_otf )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
539 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
540 bgzf_index_add_block(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
541 fp->idx->ublock_addr += count;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
542 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
543 cache_block(fp, size);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
544 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
545 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
546
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
547 ssize_t bgzf_read(BGZF *fp, void *data, size_t length)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
548 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
549 ssize_t bytes_read = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
550 uint8_t *output = (uint8_t*)data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
551 if (length <= 0) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
552 assert(fp->is_write == 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
553 while (bytes_read < length) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
554 int copy_length, available = fp->block_length - fp->block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
555 uint8_t *buffer;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
556 if (available <= 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
557 if (bgzf_read_block(fp) != 0) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
558 available = fp->block_length - fp->block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
559 if (available <= 0) break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
560 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
561 copy_length = length - bytes_read < available? length - bytes_read : available;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
562 buffer = (uint8_t*)fp->uncompressed_block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
563 memcpy(output, buffer + fp->block_offset, copy_length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
564 fp->block_offset += copy_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
565 output += copy_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
566 bytes_read += copy_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
567 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
568 if (fp->block_offset == fp->block_length) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
569 fp->block_address = htell(fp->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
570 fp->block_offset = fp->block_length = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
571 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
572 fp->uncompressed_address += bytes_read;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
573 return bytes_read;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
574 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
575
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
576 ssize_t bgzf_raw_read(BGZF *fp, void *data, size_t length)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
577 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
578 return hread(fp->fp, data, length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
579 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
580
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
581 #ifdef BGZF_MT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
582
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
583 typedef struct {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
584 struct bgzf_mtaux_t *mt;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
585 void *buf;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
586 int i, errcode, toproc, compress_level;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
587 } worker_t;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
588
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
589 typedef struct bgzf_mtaux_t {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
590 int n_threads, n_blks, curr, done;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
591 volatile int proc_cnt;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
592 void **blk;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
593 int *len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
594 worker_t *w;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
595 pthread_t *tid;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
596 pthread_mutex_t lock;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
597 pthread_cond_t cv;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
598 } mtaux_t;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
599
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
600 static int worker_aux(worker_t *w)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
601 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
602 int i, stop = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
603 // wait for condition: to process or all done
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
604 pthread_mutex_lock(&w->mt->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
605 while (!w->toproc && !w->mt->done)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
606 pthread_cond_wait(&w->mt->cv, &w->mt->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
607 if (w->mt->done) stop = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
608 w->toproc = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
609 pthread_mutex_unlock(&w->mt->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
610 if (stop) return 1; // to quit the thread
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
611 w->errcode = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
612 for (i = w->i; i < w->mt->curr; i += w->mt->n_threads) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
613 int clen = BGZF_MAX_BLOCK_SIZE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
614 if (bgzf_compress(w->buf, &clen, w->mt->blk[i], w->mt->len[i], w->compress_level) != 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
615 w->errcode |= BGZF_ERR_ZLIB;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
616 memcpy(w->mt->blk[i], w->buf, clen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
617 w->mt->len[i] = clen;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
618 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
619 __sync_fetch_and_add(&w->mt->proc_cnt, 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
620 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
621 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
622
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
623 static void *mt_worker(void *data)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
624 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
625 while (worker_aux((worker_t*)data) == 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
626 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
627 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
628
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
629 int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
630 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
631 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
632 mtaux_t *mt;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
633 pthread_attr_t attr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
634 if (!fp->is_write || fp->mt || n_threads <= 1) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
635 mt = (mtaux_t*)calloc(1, sizeof(mtaux_t));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
636 mt->n_threads = n_threads;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
637 mt->n_blks = n_threads * n_sub_blks;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
638 mt->len = (int*)calloc(mt->n_blks, sizeof(int));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
639 mt->blk = (void**)calloc(mt->n_blks, sizeof(void*));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
640 for (i = 0; i < mt->n_blks; ++i)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
641 mt->blk[i] = malloc(BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
642 mt->tid = (pthread_t*)calloc(mt->n_threads, sizeof(pthread_t)); // tid[0] is not used, as the worker 0 is launched by the master
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
643 mt->w = (worker_t*)calloc(mt->n_threads, sizeof(worker_t));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
644 for (i = 0; i < mt->n_threads; ++i) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
645 mt->w[i].i = i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
646 mt->w[i].mt = mt;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
647 mt->w[i].compress_level = fp->compress_level;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
648 mt->w[i].buf = malloc(BGZF_MAX_BLOCK_SIZE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
649 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
650 pthread_attr_init(&attr);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
651 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
652 pthread_mutex_init(&mt->lock, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
653 pthread_cond_init(&mt->cv, 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
654 for (i = 1; i < mt->n_threads; ++i) // worker 0 is effectively launched by the master thread
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
655 pthread_create(&mt->tid[i], &attr, mt_worker, &mt->w[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
656 fp->mt = mt;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
657 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
658 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
659
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
660 static void mt_destroy(mtaux_t *mt)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
661 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
662 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
663 // signal all workers to quit
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
664 pthread_mutex_lock(&mt->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
665 mt->done = 1; mt->proc_cnt = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
666 pthread_cond_broadcast(&mt->cv);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
667 pthread_mutex_unlock(&mt->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
668 for (i = 1; i < mt->n_threads; ++i) pthread_join(mt->tid[i], 0); // worker 0 is effectively launched by the master thread
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
669 // free other data allocated on heap
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
670 for (i = 0; i < mt->n_blks; ++i) free(mt->blk[i]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
671 for (i = 0; i < mt->n_threads; ++i) free(mt->w[i].buf);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
672 free(mt->blk); free(mt->len); free(mt->w); free(mt->tid);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
673 pthread_cond_destroy(&mt->cv);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
674 pthread_mutex_destroy(&mt->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
675 free(mt);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
676 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
677
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
678 static void mt_queue(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
679 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
680 mtaux_t *mt = fp->mt;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
681 assert(mt->curr < mt->n_blks); // guaranteed by the caller
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
682 memcpy(mt->blk[mt->curr], fp->uncompressed_block, fp->block_offset);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
683 mt->len[mt->curr] = fp->block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
684 fp->block_offset = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
685 ++mt->curr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
686 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
687
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
688 static int mt_flush_queue(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
689 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
690 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
691 mtaux_t *mt = fp->mt;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
692 // signal all the workers to compress
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
693 pthread_mutex_lock(&mt->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
694 for (i = 0; i < mt->n_threads; ++i) mt->w[i].toproc = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
695 mt->proc_cnt = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
696 pthread_cond_broadcast(&mt->cv);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
697 pthread_mutex_unlock(&mt->lock);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
698 // worker 0 is doing things here
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
699 worker_aux(&mt->w[0]);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
700 // wait for all the threads to complete
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
701 while (mt->proc_cnt < mt->n_threads);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
702 // dump data to disk
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
703 for (i = 0; i < mt->n_threads; ++i) fp->errcode |= mt->w[i].errcode;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
704 for (i = 0; i < mt->curr; ++i)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
705 if (hwrite(fp->fp, mt->blk[i], mt->len[i]) != mt->len[i]) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
706 fp->errcode |= BGZF_ERR_IO;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
707 break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
708 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
709 mt->curr = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
710 return (fp->errcode == 0)? 0 : -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
711 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
712
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
713 static int lazy_flush(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
714 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
715 if (fp->mt) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
716 if (fp->block_offset) mt_queue(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
717 return (fp->mt->curr < fp->mt->n_blks)? 0 : mt_flush_queue(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
718 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
719 else return bgzf_flush(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
720 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
721
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
722 #else // ~ #ifdef BGZF_MT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
723
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
724 int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
725 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
726 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
727 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
728
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
729 static inline int lazy_flush(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
730 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
731 return bgzf_flush(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
732 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
733
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
734 #endif // ~ #ifdef BGZF_MT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
735
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
736 int bgzf_flush(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
737 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
738 if (!fp->is_write) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
739 #ifdef BGZF_MT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
740 if (fp->mt) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
741 if (fp->block_offset) mt_queue(fp); // guaranteed that assertion does not fail
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
742 return mt_flush_queue(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
743 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
744 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
745 while (fp->block_offset > 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
746 if ( fp->idx_build_otf )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
747 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
748 bgzf_index_add_block(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
749 fp->idx->ublock_addr += fp->block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
750 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
751 int block_length = deflate_block(fp, fp->block_offset);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
752 if (block_length < 0) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
753 if (hwrite(fp->fp, fp->compressed_block, block_length) != block_length) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
754 fp->errcode |= BGZF_ERR_IO; // possibly truncated file
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
755 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
756 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
757 fp->block_address += block_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
758 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
759 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
760 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
761
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
762 int bgzf_flush_try(BGZF *fp, ssize_t size)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
763 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
764 if (fp->block_offset + size > BGZF_BLOCK_SIZE) return lazy_flush(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
765 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
766 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
767
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
768 ssize_t bgzf_write(BGZF *fp, const void *data, size_t length)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
769 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
770 if ( !fp->is_compressed )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
771 return hwrite(fp->fp, data, length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
772
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
773 const uint8_t *input = (const uint8_t*)data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
774 ssize_t remaining = length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
775 assert(fp->is_write);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
776 while (remaining > 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
777 uint8_t* buffer = (uint8_t*)fp->uncompressed_block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
778 int copy_length = BGZF_BLOCK_SIZE - fp->block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
779 if (copy_length > remaining) copy_length = remaining;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
780 memcpy(buffer + fp->block_offset, input, copy_length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
781 fp->block_offset += copy_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
782 input += copy_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
783 remaining -= copy_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
784 if (fp->block_offset == BGZF_BLOCK_SIZE) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
785 if (lazy_flush(fp) != 0) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
786 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
787 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
788 return length - remaining;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
789 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
790
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
791 ssize_t bgzf_raw_write(BGZF *fp, const void *data, size_t length)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
792 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
793 return hwrite(fp->fp, data, length);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
794 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
795
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
796 int bgzf_close(BGZF* fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
797 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
798 int ret, block_length;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
799 if (fp == 0) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
800 if (fp->is_write && fp->is_compressed) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
801 if (bgzf_flush(fp) != 0) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
802 fp->compress_level = -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
803 block_length = deflate_block(fp, 0); // write an empty block
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
804 if (hwrite(fp->fp, fp->compressed_block, block_length) < 0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
805 || hflush(fp->fp) != 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
806 fp->errcode |= BGZF_ERR_IO;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
807 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
808 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
809 #ifdef BGZF_MT
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
810 if (fp->mt) mt_destroy(fp->mt);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
811 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
812 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
813 if ( fp->is_gzip )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
814 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
815 if (!fp->is_write) (void)inflateEnd(fp->gz_stream);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
816 else (void)deflateEnd(fp->gz_stream);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
817 free(fp->gz_stream);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
818 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
819 ret = hclose(fp->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
820 if (ret != 0) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
821 bgzf_index_destroy(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
822 free(fp->uncompressed_block);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
823 free(fp->compressed_block);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
824 free_cache(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
825 free(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
826 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
827 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
828
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
829 void bgzf_set_cache_size(BGZF *fp, int cache_size)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
830 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
831 if (fp) fp->cache_size = cache_size;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
832 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
833
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
834 int bgzf_check_EOF(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
835 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
836 uint8_t buf[28];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
837 off_t offset = htell(fp->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
838 if (hseek(fp->fp, -28, SEEK_END) < 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
839 if (errno == ESPIPE) { hclearerr(fp->fp); return 2; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
840 else return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
841 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
842 if ( hread(fp->fp, buf, 28) != 28 ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
843 if ( hseek(fp->fp, offset, SEEK_SET) < 0 ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
844 return (memcmp("\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0", buf, 28) == 0)? 1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
845 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
846
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
847 int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
848 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
849 int block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
850 int64_t block_address;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
851
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
852 if (fp->is_write || where != SEEK_SET) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
853 fp->errcode |= BGZF_ERR_MISUSE;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
854 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
855 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
856 block_offset = pos & 0xFFFF;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
857 block_address = pos >> 16;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
858 if (hseek(fp->fp, block_address, SEEK_SET) < 0) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
859 fp->errcode |= BGZF_ERR_IO;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
860 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
861 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
862 fp->block_length = 0; // indicates current block has not been loaded
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
863 fp->block_address = block_address;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
864 fp->block_offset = block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
865 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
866 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
867
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
868 int bgzf_is_bgzf(const char *fn)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
869 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
870 uint8_t buf[16];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
871 int n;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
872 hFILE *fp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
873 if ((fp = hopen(fn, "r")) == 0) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
874 n = hread(fp, buf, 16);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
875 if ( hclose(fp) < 0 ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
876 if (n != 16) return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
877 return memcmp(g_magic, buf, 16) == 0? 1 : 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
878 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
879
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
880 int bgzf_getc(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
881 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
882 int c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
883 if (fp->block_offset >= fp->block_length) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
884 if (bgzf_read_block(fp) != 0) return -2; /* error */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
885 if (fp->block_length == 0) return -1; /* end-of-file */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
886 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
887 c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
888 if (fp->block_offset == fp->block_length) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
889 fp->block_address = htell(fp->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
890 fp->block_offset = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
891 fp->block_length = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
892 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
893 fp->uncompressed_address++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
894 return c;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
895 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
896
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
897 #ifndef kroundup32
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
898 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
899 #endif
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
900
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
901 int bgzf_getline(BGZF *fp, int delim, kstring_t *str)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
902 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
903 int l, state = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
904 unsigned char *buf = (unsigned char*)fp->uncompressed_block;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
905 str->l = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
906 do {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
907 if (fp->block_offset >= fp->block_length) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
908 if (bgzf_read_block(fp) != 0) { state = -2; break; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
909 if (fp->block_length == 0) { state = -1; break; }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
910 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
911 for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
912 if (l < fp->block_length) state = 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
913 l -= fp->block_offset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
914 if (str->l + l + 1 >= str->m) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
915 str->m = str->l + l + 2;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
916 kroundup32(str->m);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
917 str->s = (char*)realloc(str->s, str->m);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
918 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
919 memcpy(str->s + str->l, buf + fp->block_offset, l);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
920 str->l += l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
921 fp->block_offset += l + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
922 if (fp->block_offset >= fp->block_length) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
923 fp->block_address = htell(fp->fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
924 fp->block_offset = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
925 fp->block_length = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
926 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
927 } while (state == 0);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
928 if (str->l == 0 && state < 0) return state;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
929 fp->uncompressed_address += str->l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
930 if ( delim=='\n' && str->l>0 && str->s[str->l-1]=='\r' ) str->l--;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
931 str->s[str->l] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
932 return str->l;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
933 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
934
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
935 void bgzf_index_destroy(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
936 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
937 if ( !fp->idx ) return;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
938 free(fp->idx->offs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
939 free(fp->idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
940 fp->idx = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
941 fp->idx_build_otf = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
942 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
943
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
944 int bgzf_index_build_init(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
945 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
946 bgzf_index_destroy(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
947 fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
948 if ( !fp->idx ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
949 fp->idx_build_otf = 1; // build index on the fly
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
950 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
951 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
952
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
953 int bgzf_index_add_block(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
954 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
955 fp->idx->noffs++;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
956 if ( fp->idx->noffs > fp->idx->moffs )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
957 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
958 fp->idx->moffs = fp->idx->noffs;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
959 kroundup32(fp->idx->moffs);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
960 fp->idx->offs = (bgzidx1_t*) realloc(fp->idx->offs, fp->idx->moffs*sizeof(bgzidx1_t));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
961 if ( !fp->idx->offs ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
962 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
963 fp->idx->offs[ fp->idx->noffs-1 ].uaddr = fp->idx->ublock_addr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
964 fp->idx->offs[ fp->idx->noffs-1 ].caddr = fp->block_address;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
965 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
966 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
967
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
968 int bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
969 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
970 if (bgzf_flush(fp) != 0) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
971
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
972 assert(fp->idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
973 char *tmp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
974 if ( suffix )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
975 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
976 int blen = strlen(bname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
977 int slen = strlen(suffix);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
978 tmp = (char*) malloc(blen + slen + 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
979 if ( !tmp ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
980 memcpy(tmp,bname,blen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
981 memcpy(tmp+blen,suffix,slen+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
982 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
983
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
984 FILE *idx = fopen(tmp?tmp:bname,"wb");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
985 if ( tmp ) free(tmp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
986 if ( !idx ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
987
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
988 // Note that the index contains one extra record when indexing files opened
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
989 // for reading. The terminating record is not present when opened for writing.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
990 // This is not a bug.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
991
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
992 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
993 if ( fp->is_be )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
994 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
995 uint64_t x = fp->idx->noffs - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
996 fwrite(ed_swap_8p(&x), 1, sizeof(x), idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
997 for (i=1; i<fp->idx->noffs; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
998 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
999 x = fp->idx->offs[i].caddr; fwrite(ed_swap_8p(&x), 1, sizeof(x), idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1000 x = fp->idx->offs[i].uaddr; fwrite(ed_swap_8p(&x), 1, sizeof(x), idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1001 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1002 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1003 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1004 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1005 uint64_t x = fp->idx->noffs - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1006 fwrite(&x, 1, sizeof(x), idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1007 for (i=1; i<fp->idx->noffs; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1008 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1009 fwrite(&fp->idx->offs[i].caddr, 1, sizeof(fp->idx->offs[i].caddr), idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1010 fwrite(&fp->idx->offs[i].uaddr, 1, sizeof(fp->idx->offs[i].uaddr), idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1011 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1012 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1013 fclose(idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1014 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1015 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1016
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1017
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1018 int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1019 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1020 char *tmp = NULL;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1021 if ( suffix )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1022 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1023 int blen = strlen(bname);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1024 int slen = strlen(suffix);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1025 tmp = (char*) malloc(blen + slen + 1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1026 if ( !tmp ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1027 memcpy(tmp,bname,blen);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1028 memcpy(tmp+blen,suffix,slen+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1029 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1030
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1031 FILE *idx = fopen(tmp?tmp:bname,"rb");
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1032 if ( tmp ) free(tmp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1033 if ( !idx ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1034
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1035 fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1036 uint64_t x;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1037 if ( fread(&x, 1, sizeof(x), idx) != sizeof(x) ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1038
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1039 fp->idx->noffs = fp->idx->moffs = 1 + (fp->is_be ? ed_swap_8(x) : x);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1040 fp->idx->offs = (bgzidx1_t*) malloc(fp->idx->moffs*sizeof(bgzidx1_t));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1041 fp->idx->offs[0].caddr = fp->idx->offs[0].uaddr = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1042
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1043 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1044 if ( fp->is_be )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1045 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1046 int ret = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1047 for (i=1; i<fp->idx->noffs; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1048 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1049 ret += fread(&x, 1, sizeof(x), idx); fp->idx->offs[i].caddr = ed_swap_8(x);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1050 ret += fread(&x, 1, sizeof(x), idx); fp->idx->offs[i].uaddr = ed_swap_8(x);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1051 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1052 if ( ret != sizeof(x)*2*(fp->idx->noffs-1) ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1053 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1054 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1055 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1056 int ret = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1057 for (i=1; i<fp->idx->noffs; i++)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1058 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1059 ret += fread(&x, 1, sizeof(x), idx); fp->idx->offs[i].caddr = x;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1060 ret += fread(&x, 1, sizeof(x), idx); fp->idx->offs[i].uaddr = x;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1061 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1062 if ( ret != sizeof(x)*2*(fp->idx->noffs-1) ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1063 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1064 fclose(idx);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1065 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1066
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1067 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1068
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1069 int bgzf_useek(BGZF *fp, long uoffset, int where)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1070 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1071 if ( !fp->is_compressed )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1072 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1073 if (hseek(fp->fp, uoffset, SEEK_SET) < 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1074 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1075 fp->errcode |= BGZF_ERR_IO;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1076 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1077 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1078 fp->block_length = 0; // indicates current block has not been loaded
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1079 fp->block_address = uoffset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1080 fp->block_offset = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1081 bgzf_read_block(fp);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1082 fp->uncompressed_address = uoffset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1083 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1084 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1085
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1086 if ( !fp->idx )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1087 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1088 fp->errcode |= BGZF_ERR_IO;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1089 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1090 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1091
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1092 // binary search
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1093 int ilo = 0, ihi = fp->idx->noffs - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1094 while ( ilo<=ihi )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1095 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1096 int i = (ilo+ihi)*0.5;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1097 if ( uoffset < fp->idx->offs[i].uaddr ) ihi = i - 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1098 else if ( uoffset >= fp->idx->offs[i].uaddr ) ilo = i + 1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1099 else break;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1100 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1101 int i = ilo-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1102 if (hseek(fp->fp, fp->idx->offs[i].caddr, SEEK_SET) < 0)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1103 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1104 fp->errcode |= BGZF_ERR_IO;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1105 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1106 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1107 fp->block_length = 0; // indicates current block has not been loaded
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1108 fp->block_address = fp->idx->offs[i].caddr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1109 fp->block_offset = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1110 if ( bgzf_read_block(fp) < 0 ) return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1111 if ( uoffset - fp->idx->offs[i].uaddr > 0 )
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1112 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1113 fp->block_offset = uoffset - fp->idx->offs[i].uaddr;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1114 assert( fp->block_offset <= fp->block_length ); // todo: skipped, unindexed, blocks
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1115 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1116 fp->uncompressed_address = uoffset;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1117 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1118 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1119
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1120 long bgzf_utell(BGZF *fp)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1121 {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1122 return fp->uncompressed_address; // currently maintained only when reading
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1123 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1124