0
|
1 /*
|
|
2 * RAZF : Random Access compressed(Z) File
|
|
3 * Version: 1.0
|
|
4 * Release Date: 2008-10-27
|
|
5 *
|
|
6 * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>
|
|
7 *
|
|
8 * All rights reserved.
|
|
9 *
|
|
10 * Redistribution and use in source and binary forms, with or without
|
|
11 * modification, are permitted provided that the following conditions
|
|
12 * are met:
|
|
13 * 1. Redistributions of source code must retain the above copyright
|
|
14 * notice, this list of conditions and the following disclaimer.
|
|
15 * 2. Redistributions in binary form must reproduce the above copyright
|
|
16 * notice, this list of conditions and the following disclaimer in the
|
|
17 * documentation and/or other materials provided with the distribution.
|
|
18 *
|
|
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
29 * SUCH DAMAGE.
|
|
30 */
|
|
31
|
|
32 #ifndef _NO_RAZF
|
|
33
|
|
34 #include <fcntl.h>
|
|
35 #include <stdio.h>
|
|
36 #include <stdlib.h>
|
|
37 #include <string.h>
|
|
38 #include <unistd.h>
|
|
39 #include "razf.h"
|
|
40
|
|
41
|
|
42 #if ZLIB_VERNUM < 0x1221
|
|
43 struct _gz_header_s {
|
|
44 int text;
|
|
45 uLong time;
|
|
46 int xflags;
|
|
47 int os;
|
|
48 Bytef *extra;
|
|
49 uInt extra_len;
|
|
50 uInt extra_max;
|
|
51 Bytef *name;
|
|
52 uInt name_max;
|
|
53 Bytef *comment;
|
|
54 uInt comm_max;
|
|
55 int hcrc;
|
|
56 int done;
|
|
57 };
|
|
58 #warning "zlib < 1.2.2.1; RAZF writing is disabled."
|
|
59 #endif
|
|
60
|
|
61 #define DEF_MEM_LEVEL 8
|
|
62
|
|
63 static inline uint32_t byte_swap_4(uint32_t v){
|
|
64 v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
|
|
65 return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
|
|
66 }
|
|
67
|
|
68 static inline uint64_t byte_swap_8(uint64_t v){
|
|
69 v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
|
|
70 v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
|
|
71 return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
|
|
72 }
|
|
73
|
|
74 static inline int is_big_endian(){
|
|
75 int x = 0x01;
|
|
76 char *c = (char*)&x;
|
|
77 return (c[0] != 0x01);
|
|
78 }
|
|
79
|
|
80 #ifndef _RZ_READONLY
|
|
81 static void add_zindex(RAZF *rz, int64_t in, int64_t out){
|
|
82 if(rz->index->size == rz->index->cap){
|
|
83 rz->index->cap = rz->index->cap * 1.5 + 2;
|
|
84 rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);
|
|
85 rz->index->bin_offsets = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));
|
|
86 }
|
|
87 if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;
|
|
88 rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];
|
|
89 rz->index->size ++;
|
|
90 }
|
|
91
|
|
92 static void save_zindex(RAZF *rz, int fd){
|
|
93 int32_t i, v32;
|
|
94 int is_be;
|
|
95 is_be = is_big_endian();
|
|
96 if(is_be) write(fd, &rz->index->size, sizeof(int));
|
|
97 else {
|
|
98 v32 = byte_swap_4((uint32_t)rz->index->size);
|
|
99 write(fd, &v32, sizeof(uint32_t));
|
|
100 }
|
|
101 v32 = rz->index->size / RZ_BIN_SIZE + 1;
|
|
102 if(!is_be){
|
|
103 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
|
|
104 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
|
|
105 }
|
|
106 write(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
|
|
107 write(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size);
|
|
108 }
|
|
109 #endif
|
|
110
|
|
111 #ifdef _USE_KNETFILE
|
|
112 static void load_zindex(RAZF *rz, knetFile *fp){
|
|
113 #else
|
|
114 static void load_zindex(RAZF *rz, int fd){
|
|
115 #endif
|
|
116 int32_t i, v32;
|
|
117 int is_be;
|
|
118 if(!rz->load_index) return;
|
|
119 if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex));
|
|
120 is_be = is_big_endian();
|
|
121 #ifdef _USE_KNETFILE
|
|
122 knet_read(fp, &rz->index->size, sizeof(int));
|
|
123 #else
|
|
124 read(fd, &rz->index->size, sizeof(int));
|
|
125 #endif
|
|
126 if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
|
|
127 rz->index->cap = rz->index->size;
|
|
128 v32 = rz->index->size / RZ_BIN_SIZE + 1;
|
|
129 rz->index->bin_offsets = malloc(sizeof(int64_t) * v32);
|
|
130 #ifdef _USE_KNETFILE
|
|
131 knet_read(fp, rz->index->bin_offsets, sizeof(int64_t) * v32);
|
|
132 #else
|
|
133 read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
|
|
134 #endif
|
|
135 rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size);
|
|
136 #ifdef _USE_KNETFILE
|
|
137 knet_read(fp, rz->index->cell_offsets, sizeof(int) * rz->index->size);
|
|
138 #else
|
|
139 read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size);
|
|
140 #endif
|
|
141 if(!is_be){
|
|
142 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
|
|
143 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
|
|
144 }
|
|
145 }
|
|
146
|
|
147 #ifdef _RZ_READONLY
|
|
148 static RAZF* razf_open_w(int fd)
|
|
149 {
|
|
150 fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n");
|
|
151 return 0;
|
|
152 }
|
|
153 #else
|
|
154 static RAZF* razf_open_w(int fd){
|
|
155 RAZF *rz;
|
|
156 #ifdef _WIN32
|
|
157 setmode(fd, O_BINARY);
|
|
158 #endif
|
|
159 rz = calloc(1, sizeof(RAZF));
|
|
160 rz->mode = 'w';
|
|
161 #ifdef _USE_KNETFILE
|
|
162 rz->x.fpw = fd;
|
|
163 #else
|
|
164 rz->filedes = fd;
|
|
165 #endif
|
|
166 rz->stream = calloc(sizeof(z_stream), 1);
|
|
167 rz->inbuf = malloc(RZ_BUFFER_SIZE);
|
|
168 rz->outbuf = malloc(RZ_BUFFER_SIZE);
|
|
169 rz->index = calloc(sizeof(ZBlockIndex), 1);
|
|
170 deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
|
|
171 rz->stream->avail_out = RZ_BUFFER_SIZE;
|
|
172 rz->stream->next_out = rz->outbuf;
|
|
173 rz->header = calloc(sizeof(gz_header), 1);
|
|
174 rz->header->os = 0x03; //Unix
|
|
175 rz->header->text = 0;
|
|
176 rz->header->time = 0;
|
|
177 rz->header->extra = malloc(7);
|
|
178 strncpy((char*)rz->header->extra, "RAZF", 4);
|
|
179 rz->header->extra[4] = 1; // obsolete field
|
|
180 // block size = RZ_BLOCK_SIZE, Big-Endian
|
|
181 rz->header->extra[5] = RZ_BLOCK_SIZE >> 8;
|
|
182 rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF;
|
|
183 rz->header->extra_len = 7;
|
|
184 rz->header->name = rz->header->comment = 0;
|
|
185 rz->header->hcrc = 0;
|
|
186 deflateSetHeader(rz->stream, rz->header);
|
|
187 rz->block_pos = rz->block_off = 0;
|
|
188 return rz;
|
|
189 }
|
|
190
|
|
191 static void _razf_write(RAZF* rz, const void *data, int size){
|
|
192 int tout;
|
|
193 rz->stream->avail_in = size;
|
|
194 rz->stream->next_in = (void*)data;
|
|
195 while(1){
|
|
196 tout = rz->stream->avail_out;
|
|
197 deflate(rz->stream, Z_NO_FLUSH);
|
|
198 rz->out += tout - rz->stream->avail_out;
|
|
199 if(rz->stream->avail_out) break;
|
|
200 #ifdef _USE_KNETFILE
|
|
201 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
|
|
202 #else
|
|
203 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
|
|
204 #endif
|
|
205 rz->stream->avail_out = RZ_BUFFER_SIZE;
|
|
206 rz->stream->next_out = rz->outbuf;
|
|
207 if(rz->stream->avail_in == 0) break;
|
|
208 };
|
|
209 rz->in += size - rz->stream->avail_in;
|
|
210 rz->block_off += size - rz->stream->avail_in;
|
|
211 }
|
|
212
|
|
213 static void razf_flush(RAZF *rz){
|
|
214 uint32_t tout;
|
|
215 if(rz->buf_len){
|
|
216 _razf_write(rz, rz->inbuf, rz->buf_len);
|
|
217 rz->buf_off = rz->buf_len = 0;
|
|
218 }
|
|
219 if(rz->stream->avail_out){
|
|
220 #ifdef _USE_KNETFILE
|
|
221 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
|
|
222 #else
|
|
223 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
|
|
224 #endif
|
|
225 rz->stream->avail_out = RZ_BUFFER_SIZE;
|
|
226 rz->stream->next_out = rz->outbuf;
|
|
227 }
|
|
228 while(1){
|
|
229 tout = rz->stream->avail_out;
|
|
230 deflate(rz->stream, Z_FULL_FLUSH);
|
|
231 rz->out += tout - rz->stream->avail_out;
|
|
232 if(rz->stream->avail_out == 0){
|
|
233 #ifdef _USE_KNETFILE
|
|
234 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
|
|
235 #else
|
|
236 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
|
|
237 #endif
|
|
238 rz->stream->avail_out = RZ_BUFFER_SIZE;
|
|
239 rz->stream->next_out = rz->outbuf;
|
|
240 } else break;
|
|
241 }
|
|
242 rz->block_pos = rz->out;
|
|
243 rz->block_off = 0;
|
|
244 }
|
|
245
|
|
246 static void razf_end_flush(RAZF *rz){
|
|
247 uint32_t tout;
|
|
248 if(rz->buf_len){
|
|
249 _razf_write(rz, rz->inbuf, rz->buf_len);
|
|
250 rz->buf_off = rz->buf_len = 0;
|
|
251 }
|
|
252 while(1){
|
|
253 tout = rz->stream->avail_out;
|
|
254 deflate(rz->stream, Z_FINISH);
|
|
255 rz->out += tout - rz->stream->avail_out;
|
|
256 if(rz->stream->avail_out < RZ_BUFFER_SIZE){
|
|
257 #ifdef _USE_KNETFILE
|
|
258 write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
|
|
259 #else
|
|
260 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
|
|
261 #endif
|
|
262 rz->stream->avail_out = RZ_BUFFER_SIZE;
|
|
263 rz->stream->next_out = rz->outbuf;
|
|
264 } else break;
|
|
265 }
|
|
266 }
|
|
267
|
|
268 static void _razf_buffered_write(RAZF *rz, const void *data, int size){
|
|
269 int i, n;
|
|
270 while(1){
|
|
271 if(rz->buf_len == RZ_BUFFER_SIZE){
|
|
272 _razf_write(rz, rz->inbuf, rz->buf_len);
|
|
273 rz->buf_len = 0;
|
|
274 }
|
|
275 if(size + rz->buf_len < RZ_BUFFER_SIZE){
|
|
276 for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
|
|
277 rz->buf_len += size;
|
|
278 return;
|
|
279 } else {
|
|
280 n = RZ_BUFFER_SIZE - rz->buf_len;
|
|
281 for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
|
|
282 size -= n;
|
|
283 data += n;
|
|
284 rz->buf_len += n;
|
|
285 }
|
|
286 }
|
|
287 }
|
|
288
|
|
289 int razf_write(RAZF* rz, const void *data, int size){
|
|
290 int ori_size, n;
|
|
291 int64_t next_block;
|
|
292 ori_size = size;
|
|
293 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
|
|
294 while(rz->in + rz->buf_len + size >= next_block){
|
|
295 n = next_block - rz->in - rz->buf_len;
|
|
296 _razf_buffered_write(rz, data, n);
|
|
297 data += n;
|
|
298 size -= n;
|
|
299 razf_flush(rz);
|
|
300 add_zindex(rz, rz->in, rz->out);
|
|
301 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
|
|
302 }
|
|
303 _razf_buffered_write(rz, data, size);
|
|
304 return ori_size;
|
|
305 }
|
|
306 #endif
|
|
307
|
|
308 /* gzip flag byte */
|
|
309 #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
|
|
310 #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */
|
|
311 #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
|
|
312 #define ORIG_NAME 0x08 /* bit 3 set: original file name present */
|
|
313 #define COMMENT 0x10 /* bit 4 set: file comment present */
|
|
314 #define RESERVED 0xE0 /* bits 5..7: reserved */
|
|
315
|
|
316 static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){
|
|
317 int method, flags, n, len;
|
|
318 if(size < 2) return 0;
|
|
319 if(data[0] != 0x1f || data[1] != 0x8b) return 0;
|
|
320 if(size < 4) return 0;
|
|
321 method = data[2];
|
|
322 flags = data[3];
|
|
323 if(method != Z_DEFLATED || (flags & RESERVED)) return 0;
|
|
324 n = 4 + 6; // Skip 6 bytes
|
|
325 *extra_off = n + 2;
|
|
326 *extra_len = 0;
|
|
327 if(flags & EXTRA_FIELD){
|
|
328 if(size < n + 2) return 0;
|
|
329 len = ((int)data[n + 1] << 8) | data[n];
|
|
330 n += 2;
|
|
331 *extra_off = n;
|
|
332 while(len){
|
|
333 if(n >= size) return 0;
|
|
334 n ++;
|
|
335 len --;
|
|
336 }
|
|
337 *extra_len = n - (*extra_off);
|
|
338 }
|
|
339 if(flags & ORIG_NAME) while(n < size && data[n++]);
|
|
340 if(flags & COMMENT) while(n < size && data[n++]);
|
|
341 if(flags & HEAD_CRC){
|
|
342 if(n + 2 > size) return 0;
|
|
343 n += 2;
|
|
344 }
|
|
345 return n;
|
|
346 }
|
|
347
|
|
348 #ifdef _USE_KNETFILE
|
|
349 static RAZF* razf_open_r(knetFile *fp, int _load_index){
|
|
350 #else
|
|
351 static RAZF* razf_open_r(int fd, int _load_index){
|
|
352 #endif
|
|
353 RAZF *rz;
|
|
354 int ext_off, ext_len;
|
|
355 int n, is_be, ret;
|
|
356 int64_t end;
|
|
357 unsigned char c[] = "RAZF";
|
|
358 rz = calloc(1, sizeof(RAZF));
|
|
359 rz->mode = 'r';
|
|
360 #ifdef _USE_KNETFILE
|
|
361 rz->x.fpr = fp;
|
|
362 #else
|
|
363 #ifdef _WIN32
|
|
364 setmode(fd, O_BINARY);
|
|
365 #endif
|
|
366 rz->filedes = fd;
|
|
367 #endif
|
|
368 rz->stream = calloc(sizeof(z_stream), 1);
|
|
369 rz->inbuf = malloc(RZ_BUFFER_SIZE);
|
|
370 rz->outbuf = malloc(RZ_BUFFER_SIZE);
|
|
371 rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
|
|
372 #ifdef _USE_KNETFILE
|
|
373 n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
|
|
374 #else
|
|
375 n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
|
|
376 #endif
|
|
377 ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
|
|
378 if(ret == 0){
|
|
379 PLAIN_FILE:
|
|
380 rz->in = n;
|
|
381 rz->file_type = FILE_TYPE_PLAIN;
|
|
382 memcpy(rz->outbuf, rz->inbuf, n);
|
|
383 rz->buf_len = n;
|
|
384 free(rz->stream);
|
|
385 rz->stream = NULL;
|
|
386 return rz;
|
|
387 }
|
|
388 rz->header_size = ret;
|
|
389 ret = inflateInit2(rz->stream, -WINDOW_BITS);
|
|
390 if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;}
|
|
391 rz->stream->avail_in = n - rz->header_size;
|
|
392 rz->stream->next_in = rz->inbuf + rz->header_size;
|
|
393 rz->stream->avail_out = RZ_BUFFER_SIZE;
|
|
394 rz->stream->next_out = rz->outbuf;
|
|
395 rz->file_type = FILE_TYPE_GZ;
|
|
396 rz->in = rz->header_size;
|
|
397 rz->block_pos = rz->header_size;
|
|
398 rz->next_block_pos = rz->header_size;
|
|
399 rz->block_off = 0;
|
|
400 if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz;
|
|
401 if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){
|
|
402 fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file. in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__);
|
|
403 return rz;
|
|
404 }
|
|
405 rz->load_index = _load_index;
|
|
406 rz->file_type = FILE_TYPE_RZ;
|
|
407 #ifdef _USE_KNETFILE
|
|
408 if(knet_seek(fp, -16, SEEK_END) == -1){
|
|
409 #else
|
|
410 if(lseek(fd, -16, SEEK_END) == -1){
|
|
411 #endif
|
|
412 UNSEEKABLE:
|
|
413 rz->seekable = 0;
|
|
414 rz->index = NULL;
|
|
415 rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL;
|
|
416 } else {
|
|
417 is_be = is_big_endian();
|
|
418 rz->seekable = 1;
|
|
419 #ifdef _USE_KNETFILE
|
|
420 knet_read(fp, &end, sizeof(int64_t));
|
|
421 #else
|
|
422 read(fd, &end, sizeof(int64_t));
|
|
423 #endif
|
|
424 if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
|
|
425 else rz->src_end = end;
|
|
426
|
|
427 #ifdef _USE_KNETFILE
|
|
428 knet_read(fp, &end, sizeof(int64_t));
|
|
429 #else
|
|
430 read(fd, &end, sizeof(int64_t));
|
|
431 #endif
|
|
432 if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
|
|
433 else rz->end = end;
|
|
434 if(n > rz->end){
|
|
435 rz->stream->avail_in -= n - rz->end;
|
|
436 n = rz->end;
|
|
437 }
|
|
438 if(rz->end > rz->src_end){
|
|
439 #ifdef _USE_KNETFILE
|
|
440 knet_seek(fp, rz->in, SEEK_SET);
|
|
441 #else
|
|
442 lseek(fd, rz->in, SEEK_SET);
|
|
443 #endif
|
|
444 goto UNSEEKABLE;
|
|
445 }
|
|
446 #ifdef _USE_KNETFILE
|
|
447 knet_seek(fp, rz->end, SEEK_SET);
|
|
448 if(knet_tell(fp) != rz->end){
|
|
449 knet_seek(fp, rz->in, SEEK_SET);
|
|
450 #else
|
|
451 if(lseek(fd, rz->end, SEEK_SET) != rz->end){
|
|
452 lseek(fd, rz->in, SEEK_SET);
|
|
453 #endif
|
|
454 goto UNSEEKABLE;
|
|
455 }
|
|
456 #ifdef _USE_KNETFILE
|
|
457 load_zindex(rz, fp);
|
|
458 knet_seek(fp, n, SEEK_SET);
|
|
459 #else
|
|
460 load_zindex(rz, fd);
|
|
461 lseek(fd, n, SEEK_SET);
|
|
462 #endif
|
|
463 }
|
|
464 return rz;
|
|
465 }
|
|
466
|
|
467 #ifdef _USE_KNETFILE
|
|
468 RAZF* razf_dopen(int fd, const char *mode){
|
|
469 if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n");
|
|
470 else if(strstr(mode, "w")) return razf_open_w(fd);
|
|
471 return NULL;
|
|
472 }
|
|
473
|
|
474 RAZF* razf_dopen2(int fd, const char *mode)
|
|
475 {
|
|
476 fprintf(stderr,"[razf_dopen2] implement me\n");
|
|
477 return NULL;
|
|
478 }
|
|
479 #else
|
|
480 RAZF* razf_dopen(int fd, const char *mode){
|
|
481 if(strstr(mode, "r")) return razf_open_r(fd, 1);
|
|
482 else if(strstr(mode, "w")) return razf_open_w(fd);
|
|
483 else return NULL;
|
|
484 }
|
|
485
|
|
486 RAZF* razf_dopen2(int fd, const char *mode)
|
|
487 {
|
|
488 if(strstr(mode, "r")) return razf_open_r(fd, 0);
|
|
489 else if(strstr(mode, "w")) return razf_open_w(fd);
|
|
490 else return NULL;
|
|
491 }
|
|
492 #endif
|
|
493
|
|
494 static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
|
|
495 int fd;
|
|
496 RAZF *rz;
|
|
497 if(strstr(mode, "r")){
|
|
498 #ifdef _USE_KNETFILE
|
|
499 knetFile *fd = knet_open(filename, "r");
|
|
500 if (fd == 0) {
|
|
501 fprintf(stderr, "[_razf_open] fail to open %s\n", filename);
|
|
502 return NULL;
|
|
503 }
|
|
504 #else
|
|
505 #ifdef _WIN32
|
|
506 fd = open(filename, O_RDONLY | O_BINARY);
|
|
507 #else
|
|
508 fd = open(filename, O_RDONLY);
|
|
509 #endif
|
|
510 #endif
|
|
511 if(fd < 0) return NULL;
|
|
512 rz = razf_open_r(fd, _load_index);
|
|
513 } else if(strstr(mode, "w")){
|
|
514 #ifdef _WIN32
|
|
515 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
|
|
516 #else
|
|
517 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
|
|
518 #endif
|
|
519 if(fd < 0) return NULL;
|
|
520 rz = razf_open_w(fd);
|
|
521 } else return NULL;
|
|
522 return rz;
|
|
523 }
|
|
524
|
|
525 RAZF* razf_open(const char *filename, const char *mode){
|
|
526 return _razf_open(filename, mode, 1);
|
|
527 }
|
|
528
|
|
529 RAZF* razf_open2(const char *filename, const char *mode){
|
|
530 return _razf_open(filename, mode, 0);
|
|
531 }
|
|
532
|
|
533 int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){
|
|
534 int64_t n;
|
|
535 if(rz->mode != 'r' && rz->mode != 'R') return 0;
|
|
536 switch(rz->file_type){
|
|
537 case FILE_TYPE_PLAIN:
|
|
538 if(rz->end == 0x7fffffffffffffffLL){
|
|
539 #ifdef _USE_KNETFILE
|
|
540 if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0;
|
|
541 n = knet_tell(rz->x.fpr);
|
|
542 knet_seek(rz->x.fpr, 0, SEEK_END);
|
|
543 rz->end = knet_tell(rz->x.fpr);
|
|
544 knet_seek(rz->x.fpr, n, SEEK_SET);
|
|
545 #else
|
|
546 if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
|
|
547 rz->end = lseek(rz->filedes, 0, SEEK_END);
|
|
548 lseek(rz->filedes, n, SEEK_SET);
|
|
549 #endif
|
|
550 }
|
|
551 *u_size = *c_size = rz->end;
|
|
552 return 1;
|
|
553 case FILE_TYPE_GZ:
|
|
554 return 0;
|
|
555 case FILE_TYPE_RZ:
|
|
556 if(rz->src_end == rz->end) return 0;
|
|
557 *u_size = rz->src_end;
|
|
558 *c_size = rz->end;
|
|
559 return 1;
|
|
560 default:
|
|
561 return 0;
|
|
562 }
|
|
563 }
|
|
564
|
|
565 static int _razf_read(RAZF* rz, void *data, int size){
|
|
566 int ret, tin;
|
|
567 if(rz->z_eof || rz->z_err) return 0;
|
|
568 if (rz->file_type == FILE_TYPE_PLAIN) {
|
|
569 #ifdef _USE_KNETFILE
|
|
570 ret = knet_read(rz->x.fpr, data, size);
|
|
571 #else
|
|
572 ret = read(rz->filedes, data, size);
|
|
573 #endif
|
|
574 if (ret == 0) rz->z_eof = 1;
|
|
575 return ret;
|
|
576 }
|
|
577 rz->stream->avail_out = size;
|
|
578 rz->stream->next_out = data;
|
|
579 while(rz->stream->avail_out){
|
|
580 if(rz->stream->avail_in == 0){
|
|
581 if(rz->in >= rz->end){ rz->z_eof = 1; break; }
|
|
582 if(rz->end - rz->in < RZ_BUFFER_SIZE){
|
|
583 #ifdef _USE_KNETFILE
|
|
584 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
|
|
585 #else
|
|
586 rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
|
|
587 #endif
|
|
588 } else {
|
|
589 #ifdef _USE_KNETFILE
|
|
590 rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
|
|
591 #else
|
|
592 rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
|
|
593 #endif
|
|
594 }
|
|
595 if(rz->stream->avail_in == 0){
|
|
596 rz->z_eof = 1;
|
|
597 break;
|
|
598 }
|
|
599 rz->stream->next_in = rz->inbuf;
|
|
600 }
|
|
601 tin = rz->stream->avail_in;
|
|
602 ret = inflate(rz->stream, Z_BLOCK);
|
|
603 rz->in += tin - rz->stream->avail_in;
|
|
604 if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
|
|
605 fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
|
|
606 rz->z_err = 1;
|
|
607 break;
|
|
608 }
|
|
609 if(ret == Z_STREAM_END){
|
|
610 rz->z_eof = 1;
|
|
611 break;
|
|
612 }
|
|
613 if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){
|
|
614 rz->buf_flush = 1;
|
|
615 rz->next_block_pos = rz->in;
|
|
616 break;
|
|
617 }
|
|
618 }
|
|
619 return size - rz->stream->avail_out;
|
|
620 }
|
|
621
|
|
622 int razf_read(RAZF *rz, void *data, int size){
|
|
623 int ori_size, i;
|
|
624 ori_size = size;
|
|
625 while(size > 0){
|
|
626 if(rz->buf_len){
|
|
627 if(size < rz->buf_len){
|
|
628 for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
|
|
629 rz->buf_off += size;
|
|
630 rz->buf_len -= size;
|
|
631 data += size;
|
|
632 rz->block_off += size;
|
|
633 size = 0;
|
|
634 break;
|
|
635 } else {
|
|
636 for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
|
|
637 data += rz->buf_len;
|
|
638 size -= rz->buf_len;
|
|
639 rz->block_off += rz->buf_len;
|
|
640 rz->buf_off = 0;
|
|
641 rz->buf_len = 0;
|
|
642 if(rz->buf_flush){
|
|
643 rz->block_pos = rz->next_block_pos;
|
|
644 rz->block_off = 0;
|
|
645 rz->buf_flush = 0;
|
|
646 }
|
|
647 }
|
|
648 } else if(rz->buf_flush){
|
|
649 rz->block_pos = rz->next_block_pos;
|
|
650 rz->block_off = 0;
|
|
651 rz->buf_flush = 0;
|
|
652 }
|
|
653 if(rz->buf_flush) continue;
|
|
654 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
|
|
655 if(rz->z_eof && rz->buf_len == 0) break;
|
|
656 }
|
|
657 rz->out += ori_size - size;
|
|
658 return ori_size - size;
|
|
659 }
|
|
660
|
|
661 int razf_skip(RAZF* rz, int size){
|
|
662 int ori_size;
|
|
663 ori_size = size;
|
|
664 while(size > 0){
|
|
665 if(rz->buf_len){
|
|
666 if(size < rz->buf_len){
|
|
667 rz->buf_off += size;
|
|
668 rz->buf_len -= size;
|
|
669 rz->block_off += size;
|
|
670 size = 0;
|
|
671 break;
|
|
672 } else {
|
|
673 size -= rz->buf_len;
|
|
674 rz->buf_off = 0;
|
|
675 rz->buf_len = 0;
|
|
676 rz->block_off += rz->buf_len;
|
|
677 if(rz->buf_flush){
|
|
678 rz->block_pos = rz->next_block_pos;
|
|
679 rz->block_off = 0;
|
|
680 rz->buf_flush = 0;
|
|
681 }
|
|
682 }
|
|
683 } else if(rz->buf_flush){
|
|
684 rz->block_pos = rz->next_block_pos;
|
|
685 rz->block_off = 0;
|
|
686 rz->buf_flush = 0;
|
|
687 }
|
|
688 if(rz->buf_flush) continue;
|
|
689 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
|
|
690 if(rz->z_eof || rz->z_err) break;
|
|
691 }
|
|
692 rz->out += ori_size - size;
|
|
693 return ori_size - size;
|
|
694 }
|
|
695
|
|
696 static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
|
|
697 #ifdef _USE_KNETFILE
|
|
698 knet_seek(rz->x.fpr, in, SEEK_SET);
|
|
699 #else
|
|
700 lseek(rz->filedes, in, SEEK_SET);
|
|
701 #endif
|
|
702 rz->in = in;
|
|
703 rz->out = out;
|
|
704 rz->block_pos = in;
|
|
705 rz->next_block_pos = in;
|
|
706 rz->block_off = 0;
|
|
707 rz->buf_flush = 0;
|
|
708 rz->z_eof = rz->z_err = 0;
|
|
709 inflateReset(rz->stream);
|
|
710 rz->stream->avail_in = 0;
|
|
711 rz->buf_off = rz->buf_len = 0;
|
|
712 }
|
|
713
|
|
714 int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){
|
|
715 int64_t pos;
|
|
716 rz->z_eof = 0;
|
|
717 if(rz->file_type == FILE_TYPE_PLAIN){
|
|
718 rz->buf_off = rz->buf_len = 0;
|
|
719 pos = block_start + block_offset;
|
|
720 #ifdef _USE_KNETFILE
|
|
721 knet_seek(rz->x.fpr, pos, SEEK_SET);
|
|
722 pos = knet_tell(rz->x.fpr);
|
|
723 #else
|
|
724 pos = lseek(rz->filedes, pos, SEEK_SET);
|
|
725 #endif
|
|
726 rz->out = rz->in = pos;
|
|
727 return pos;
|
|
728 }
|
|
729 if(block_start == rz->block_pos && block_offset >= rz->block_off) {
|
|
730 block_offset -= rz->block_off;
|
|
731 goto SKIP; // Needn't reset inflate
|
|
732 }
|
|
733 if(block_start == 0) block_start = rz->header_size; // Automaticly revist wrong block_start
|
|
734 _razf_reset_read(rz, block_start, 0);
|
|
735 SKIP:
|
|
736 if(block_offset) razf_skip(rz, block_offset);
|
|
737 return rz->block_off;
|
|
738 }
|
|
739
|
|
740 int64_t razf_seek(RAZF* rz, int64_t pos, int where){
|
|
741 int64_t idx;
|
|
742 int64_t seek_pos, new_out;
|
|
743 rz->z_eof = 0;
|
|
744 if (where == SEEK_CUR) pos += rz->out;
|
|
745 else if (where == SEEK_END) pos += rz->src_end;
|
|
746 if(rz->file_type == FILE_TYPE_PLAIN){
|
|
747 #ifdef _USE_KNETFILE
|
|
748 knet_seek(rz->x.fpr, pos, SEEK_SET);
|
|
749 seek_pos = knet_tell(rz->x.fpr);
|
|
750 #else
|
|
751 seek_pos = lseek(rz->filedes, pos, SEEK_SET);
|
|
752 #endif
|
|
753 rz->buf_off = rz->buf_len = 0;
|
|
754 rz->out = rz->in = seek_pos;
|
|
755 return seek_pos;
|
|
756 } else if(rz->file_type == FILE_TYPE_GZ){
|
|
757 if(pos >= rz->out) goto SKIP;
|
|
758 return rz->out;
|
|
759 }
|
|
760 if(pos == rz->out) return pos;
|
|
761 if(pos > rz->src_end) return rz->out;
|
|
762 if(!rz->seekable || !rz->load_index){
|
|
763 if(pos >= rz->out) goto SKIP;
|
|
764 }
|
|
765 idx = pos / RZ_BLOCK_SIZE - 1;
|
|
766 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
|
|
767 new_out = (idx + 1) * RZ_BLOCK_SIZE;
|
|
768 if(pos > rz->out && new_out <= rz->out) goto SKIP;
|
|
769 _razf_reset_read(rz, seek_pos, new_out);
|
|
770 SKIP:
|
|
771 razf_skip(rz, (int)(pos - rz->out));
|
|
772 return rz->out;
|
|
773 }
|
|
774
|
|
775 uint64_t razf_tell2(RAZF *rz)
|
|
776 {
|
|
777 /*
|
|
778 if (rz->load_index) {
|
|
779 int64_t idx, seek_pos;
|
|
780 idx = rz->out / RZ_BLOCK_SIZE - 1;
|
|
781 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
|
|
782 if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)
|
|
783 fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n",
|
|
784 (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);
|
|
785 }
|
|
786 */
|
|
787 return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);
|
|
788 }
|
|
789
|
|
790 int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)
|
|
791 {
|
|
792 if (where != SEEK_SET) return -1;
|
|
793 return razf_jump(rz, voffset>>16, voffset&0xffff);
|
|
794 }
|
|
795
|
|
796 void razf_close(RAZF *rz){
|
|
797 if(rz->mode == 'w'){
|
|
798 #ifndef _RZ_READONLY
|
|
799 razf_end_flush(rz);
|
|
800 deflateEnd(rz->stream);
|
|
801 #ifdef _USE_KNETFILE
|
|
802 save_zindex(rz, rz->x.fpw);
|
|
803 if(is_big_endian()){
|
|
804 write(rz->x.fpw, &rz->in, sizeof(int64_t));
|
|
805 write(rz->x.fpw, &rz->out, sizeof(int64_t));
|
|
806 } else {
|
|
807 uint64_t v64 = byte_swap_8((uint64_t)rz->in);
|
|
808 write(rz->x.fpw, &v64, sizeof(int64_t));
|
|
809 v64 = byte_swap_8((uint64_t)rz->out);
|
|
810 write(rz->x.fpw, &v64, sizeof(int64_t));
|
|
811 }
|
|
812 #else
|
|
813 save_zindex(rz, rz->filedes);
|
|
814 if(is_big_endian()){
|
|
815 write(rz->filedes, &rz->in, sizeof(int64_t));
|
|
816 write(rz->filedes, &rz->out, sizeof(int64_t));
|
|
817 } else {
|
|
818 uint64_t v64 = byte_swap_8((uint64_t)rz->in);
|
|
819 write(rz->filedes, &v64, sizeof(int64_t));
|
|
820 v64 = byte_swap_8((uint64_t)rz->out);
|
|
821 write(rz->filedes, &v64, sizeof(int64_t));
|
|
822 }
|
|
823 #endif
|
|
824 #endif
|
|
825 } else if(rz->mode == 'r'){
|
|
826 if(rz->stream) inflateEnd(rz->stream);
|
|
827 }
|
|
828 if(rz->inbuf) free(rz->inbuf);
|
|
829 if(rz->outbuf) free(rz->outbuf);
|
|
830 if(rz->header){
|
|
831 free(rz->header->extra);
|
|
832 free(rz->header->name);
|
|
833 free(rz->header->comment);
|
|
834 free(rz->header);
|
|
835 }
|
|
836 if(rz->index){
|
|
837 free(rz->index->bin_offsets);
|
|
838 free(rz->index->cell_offsets);
|
|
839 free(rz->index);
|
|
840 }
|
|
841 free(rz->stream);
|
|
842 #ifdef _USE_KNETFILE
|
|
843 if (rz->mode == 'r')
|
|
844 knet_close(rz->x.fpr);
|
|
845 if (rz->mode == 'w')
|
|
846 close(rz->x.fpw);
|
|
847 #else
|
|
848 close(rz->filedes);
|
|
849 #endif
|
|
850 free(rz);
|
|
851 }
|
|
852
|
|
853 #endif
|