Mercurial > repos > ryanmorin > nextgen_variant_identification
comparison SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/samtools-0.1.6/razf.c @ 0:74f5ea818cea
Uploaded
author | ryanmorin |
---|---|
date | Wed, 12 Oct 2011 19:50:38 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:74f5ea818cea |
---|---|
1 /* | |
2 * RAZF : Random Access compressed(Z) File | |
3 * Version: 1.0 | |
4 * Release Date: 2008-10-27 | |
5 * | |
6 * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk> | |
7 * | |
8 * All rights reserved. | |
9 * | |
10 * Redistribution and use in source and binary forms, with or without | |
11 * modification, are permitted provided that the following conditions | |
12 * are met: | |
13 * 1. Redistributions of source code must retain the above copyright | |
14 * notice, this list of conditions and the following disclaimer. | |
15 * 2. Redistributions in binary form must reproduce the above copyright | |
16 * notice, this list of conditions and the following disclaimer in the | |
17 * documentation and/or other materials provided with the distribution. | |
18 * | |
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | |
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE | |
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
29 * SUCH DAMAGE. | |
30 */ | |
31 | |
32 #ifndef _NO_RAZF | |
33 | |
34 #include <fcntl.h> | |
35 #include <stdio.h> | |
36 #include <stdlib.h> | |
37 #include <string.h> | |
38 #include <unistd.h> | |
39 #include "razf.h" | |
40 | |
41 #if ZLIB_VERNUM < 0x1221 | |
42 struct _gz_header_s { | |
43 int text; | |
44 uLong time; | |
45 int xflags; | |
46 int os; | |
47 Bytef *extra; | |
48 uInt extra_len; | |
49 uInt extra_max; | |
50 Bytef *name; | |
51 uInt name_max; | |
52 Bytef *comment; | |
53 uInt comm_max; | |
54 int hcrc; | |
55 int done; | |
56 }; | |
57 #warning "zlib < 1.2.2.1; RAZF writing is disabled." | |
58 #endif | |
59 | |
60 #define DEF_MEM_LEVEL 8 | |
61 | |
62 static inline uint32_t byte_swap_4(uint32_t v){ | |
63 v = ((v & 0x0000FFFFU) << 16) | (v >> 16); | |
64 return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); | |
65 } | |
66 | |
67 static inline uint64_t byte_swap_8(uint64_t v){ | |
68 v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); | |
69 v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); | |
70 return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); | |
71 } | |
72 | |
73 static inline int is_big_endian(){ | |
74 int x = 0x01; | |
75 char *c = (char*)&x; | |
76 return (c[0] != 0x01); | |
77 } | |
78 | |
79 #ifndef _RZ_READONLY | |
80 static void add_zindex(RAZF *rz, int64_t in, int64_t out){ | |
81 if(rz->index->size == rz->index->cap){ | |
82 rz->index->cap = rz->index->cap * 1.5 + 2; | |
83 rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap); | |
84 rz->index->bin_offsets = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1)); | |
85 } | |
86 if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out; | |
87 rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE]; | |
88 rz->index->size ++; | |
89 } | |
90 | |
91 static void save_zindex(RAZF *rz, int fd){ | |
92 int32_t i, v32; | |
93 int is_be; | |
94 is_be = is_big_endian(); | |
95 if(is_be) write(fd, &rz->index->size, sizeof(int)); | |
96 else { | |
97 v32 = byte_swap_4((uint32_t)rz->index->size); | |
98 write(fd, &v32, sizeof(uint32_t)); | |
99 } | |
100 v32 = rz->index->size / RZ_BIN_SIZE + 1; | |
101 if(!is_be){ | |
102 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]); | |
103 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]); | |
104 } | |
105 write(fd, rz->index->bin_offsets, sizeof(int64_t) * v32); | |
106 write(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size); | |
107 } | |
108 #endif | |
109 | |
110 static void load_zindex(RAZF *rz, int fd){ | |
111 int32_t i, v32; | |
112 int is_be; | |
113 if(!rz->load_index) return; | |
114 if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex)); | |
115 is_be = is_big_endian(); | |
116 read(fd, &rz->index->size, sizeof(int)); | |
117 if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size); | |
118 rz->index->cap = rz->index->size; | |
119 v32 = rz->index->size / RZ_BIN_SIZE + 1; | |
120 rz->index->bin_offsets = malloc(sizeof(int64_t) * v32); | |
121 read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32); | |
122 rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size); | |
123 read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size); | |
124 if(!is_be){ | |
125 for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]); | |
126 for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]); | |
127 } | |
128 } | |
129 | |
130 #ifdef _RZ_READONLY | |
131 static RAZF* razf_open_w(int fd) | |
132 { | |
133 fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n"); | |
134 return 0; | |
135 } | |
136 #else | |
137 static RAZF* razf_open_w(int fd){ | |
138 RAZF *rz; | |
139 #ifdef _WIN32 | |
140 setmode(fd, O_BINARY); | |
141 #endif | |
142 rz = calloc(1, sizeof(RAZF)); | |
143 rz->mode = 'w'; | |
144 rz->filedes = fd; | |
145 rz->stream = calloc(sizeof(z_stream), 1); | |
146 rz->inbuf = malloc(RZ_BUFFER_SIZE); | |
147 rz->outbuf = malloc(RZ_BUFFER_SIZE); | |
148 rz->index = calloc(sizeof(ZBlockIndex), 1); | |
149 deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); | |
150 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
151 rz->stream->next_out = rz->outbuf; | |
152 rz->header = calloc(sizeof(gz_header), 1); | |
153 rz->header->os = 0x03; //Unix | |
154 rz->header->text = 0; | |
155 rz->header->time = 0; | |
156 rz->header->extra = malloc(7); | |
157 strncpy((char*)rz->header->extra, "RAZF", 4); | |
158 rz->header->extra[4] = 1; // obsolete field | |
159 // block size = RZ_BLOCK_SIZE, Big-Endian | |
160 rz->header->extra[5] = RZ_BLOCK_SIZE >> 8; | |
161 rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF; | |
162 rz->header->extra_len = 7; | |
163 rz->header->name = rz->header->comment = 0; | |
164 rz->header->hcrc = 0; | |
165 deflateSetHeader(rz->stream, rz->header); | |
166 rz->block_pos = rz->block_off = 0; | |
167 return rz; | |
168 } | |
169 | |
170 static void _razf_write(RAZF* rz, const void *data, int size){ | |
171 int tout; | |
172 rz->stream->avail_in = size; | |
173 rz->stream->next_in = (void*)data; | |
174 while(1){ | |
175 tout = rz->stream->avail_out; | |
176 deflate(rz->stream, Z_NO_FLUSH); | |
177 rz->out += tout - rz->stream->avail_out; | |
178 if(rz->stream->avail_out) break; | |
179 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
180 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
181 rz->stream->next_out = rz->outbuf; | |
182 if(rz->stream->avail_in == 0) break; | |
183 }; | |
184 rz->in += size - rz->stream->avail_in; | |
185 rz->block_off += size - rz->stream->avail_in; | |
186 } | |
187 | |
188 static void razf_flush(RAZF *rz){ | |
189 uint32_t tout; | |
190 if(rz->buf_len){ | |
191 _razf_write(rz, rz->inbuf, rz->buf_len); | |
192 rz->buf_off = rz->buf_len = 0; | |
193 } | |
194 if(rz->stream->avail_out){ | |
195 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
196 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
197 rz->stream->next_out = rz->outbuf; | |
198 } | |
199 while(1){ | |
200 tout = rz->stream->avail_out; | |
201 deflate(rz->stream, Z_FULL_FLUSH); | |
202 rz->out += tout - rz->stream->avail_out; | |
203 if(rz->stream->avail_out == 0){ | |
204 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
205 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
206 rz->stream->next_out = rz->outbuf; | |
207 } else break; | |
208 } | |
209 rz->block_pos = rz->out; | |
210 rz->block_off = 0; | |
211 } | |
212 | |
213 static void razf_end_flush(RAZF *rz){ | |
214 uint32_t tout; | |
215 if(rz->buf_len){ | |
216 _razf_write(rz, rz->inbuf, rz->buf_len); | |
217 rz->buf_off = rz->buf_len = 0; | |
218 } | |
219 while(1){ | |
220 tout = rz->stream->avail_out; | |
221 deflate(rz->stream, Z_FINISH); | |
222 rz->out += tout - rz->stream->avail_out; | |
223 if(rz->stream->avail_out < RZ_BUFFER_SIZE){ | |
224 write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out); | |
225 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
226 rz->stream->next_out = rz->outbuf; | |
227 } else break; | |
228 } | |
229 } | |
230 | |
231 static void _razf_buffered_write(RAZF *rz, const void *data, int size){ | |
232 int i, n; | |
233 while(1){ | |
234 if(rz->buf_len == RZ_BUFFER_SIZE){ | |
235 _razf_write(rz, rz->inbuf, rz->buf_len); | |
236 rz->buf_len = 0; | |
237 } | |
238 if(size + rz->buf_len < RZ_BUFFER_SIZE){ | |
239 for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i]; | |
240 rz->buf_len += size; | |
241 return; | |
242 } else { | |
243 n = RZ_BUFFER_SIZE - rz->buf_len; | |
244 for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i]; | |
245 size -= n; | |
246 data += n; | |
247 rz->buf_len += n; | |
248 } | |
249 } | |
250 } | |
251 | |
252 int razf_write(RAZF* rz, const void *data, int size){ | |
253 int ori_size, n; | |
254 int64_t next_block; | |
255 ori_size = size; | |
256 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE; | |
257 while(rz->in + rz->buf_len + size >= next_block){ | |
258 n = next_block - rz->in - rz->buf_len; | |
259 _razf_buffered_write(rz, data, n); | |
260 data += n; | |
261 size -= n; | |
262 razf_flush(rz); | |
263 add_zindex(rz, rz->in, rz->out); | |
264 next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE; | |
265 } | |
266 _razf_buffered_write(rz, data, size); | |
267 return ori_size; | |
268 } | |
269 #endif | |
270 | |
271 /* gzip flag byte */ | |
272 #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ | |
273 #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ | |
274 #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ | |
275 #define ORIG_NAME 0x08 /* bit 3 set: original file name present */ | |
276 #define COMMENT 0x10 /* bit 4 set: file comment present */ | |
277 #define RESERVED 0xE0 /* bits 5..7: reserved */ | |
278 | |
279 static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){ | |
280 int method, flags, n, len; | |
281 if(size < 2) return 0; | |
282 if(data[0] != 0x1f || data[1] != 0x8b) return 0; | |
283 if(size < 4) return 0; | |
284 method = data[2]; | |
285 flags = data[3]; | |
286 if(method != Z_DEFLATED || (flags & RESERVED)) return 0; | |
287 n = 4 + 6; // Skip 6 bytes | |
288 *extra_off = n + 2; | |
289 *extra_len = 0; | |
290 if(flags & EXTRA_FIELD){ | |
291 if(size < n + 2) return 0; | |
292 len = ((int)data[n + 1] << 8) | data[n]; | |
293 n += 2; | |
294 *extra_off = n; | |
295 while(len){ | |
296 if(n >= size) return 0; | |
297 n ++; | |
298 len --; | |
299 } | |
300 *extra_len = n - (*extra_off); | |
301 } | |
302 if(flags & ORIG_NAME) while(n < size && data[n++]); | |
303 if(flags & COMMENT) while(n < size && data[n++]); | |
304 if(flags & HEAD_CRC){ | |
305 if(n + 2 > size) return 0; | |
306 n += 2; | |
307 } | |
308 return n; | |
309 } | |
310 | |
311 static RAZF* razf_open_r(int fd, int _load_index){ | |
312 RAZF *rz; | |
313 int ext_off, ext_len; | |
314 int n, is_be, ret; | |
315 int64_t end; | |
316 unsigned char c[] = "RAZF"; | |
317 #ifdef _WIN32 | |
318 setmode(fd, O_BINARY); | |
319 #endif | |
320 rz = calloc(1, sizeof(RAZF)); | |
321 rz->mode = 'r'; | |
322 rz->filedes = fd; | |
323 rz->stream = calloc(sizeof(z_stream), 1); | |
324 rz->inbuf = malloc(RZ_BUFFER_SIZE); | |
325 rz->outbuf = malloc(RZ_BUFFER_SIZE); | |
326 rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL; | |
327 n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); | |
328 ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len); | |
329 if(ret == 0){ | |
330 PLAIN_FILE: | |
331 rz->in = n; | |
332 rz->file_type = FILE_TYPE_PLAIN; | |
333 memcpy(rz->outbuf, rz->inbuf, n); | |
334 rz->buf_len = n; | |
335 free(rz->stream); | |
336 rz->stream = NULL; | |
337 return rz; | |
338 } | |
339 rz->header_size = ret; | |
340 ret = inflateInit2(rz->stream, -WINDOW_BITS); | |
341 if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;} | |
342 rz->stream->avail_in = n - rz->header_size; | |
343 rz->stream->next_in = rz->inbuf + rz->header_size; | |
344 rz->stream->avail_out = RZ_BUFFER_SIZE; | |
345 rz->stream->next_out = rz->outbuf; | |
346 rz->file_type = FILE_TYPE_GZ; | |
347 rz->in = rz->header_size; | |
348 rz->block_pos = rz->header_size; | |
349 rz->next_block_pos = rz->header_size; | |
350 rz->block_off = 0; | |
351 if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz; | |
352 if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){ | |
353 fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file. in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__); | |
354 return rz; | |
355 } | |
356 rz->load_index = _load_index; | |
357 rz->file_type = FILE_TYPE_RZ; | |
358 if(lseek(fd, -16, SEEK_END) == -1){ | |
359 UNSEEKABLE: | |
360 rz->seekable = 0; | |
361 rz->index = NULL; | |
362 rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL; | |
363 } else { | |
364 is_be = is_big_endian(); | |
365 rz->seekable = 1; | |
366 read(fd, &end, sizeof(int64_t)); | |
367 if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end); | |
368 else rz->src_end = end; | |
369 read(fd, &end, sizeof(int64_t)); | |
370 if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end); | |
371 else rz->end = end; | |
372 if(n > rz->end){ | |
373 rz->stream->avail_in -= n - rz->end; | |
374 n = rz->end; | |
375 } | |
376 if(rz->end > rz->src_end){ | |
377 lseek(fd, rz->in, SEEK_SET); | |
378 goto UNSEEKABLE; | |
379 } | |
380 if(lseek(fd, rz->end, SEEK_SET) != rz->end){ | |
381 lseek(fd, rz->in, SEEK_SET); | |
382 goto UNSEEKABLE; | |
383 } | |
384 load_zindex(rz, fd); | |
385 lseek(fd, n, SEEK_SET); | |
386 } | |
387 return rz; | |
388 } | |
389 | |
390 RAZF* razf_dopen(int fd, const char *mode){ | |
391 if(strstr(mode, "r")) return razf_open_r(fd, 1); | |
392 else if(strstr(mode, "w")) return razf_open_w(fd); | |
393 else return NULL; | |
394 } | |
395 | |
396 RAZF* razf_dopen2(int fd, const char *mode) | |
397 { | |
398 if(strstr(mode, "r")) return razf_open_r(fd, 0); | |
399 else if(strstr(mode, "w")) return razf_open_w(fd); | |
400 else return NULL; | |
401 } | |
402 | |
403 static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){ | |
404 int fd; | |
405 RAZF *rz; | |
406 if(strstr(mode, "r")){ | |
407 #ifdef _WIN32 | |
408 fd = open(filename, O_RDONLY | O_BINARY); | |
409 #else | |
410 fd = open(filename, O_RDONLY); | |
411 #endif | |
412 rz = razf_open_r(fd, _load_index); | |
413 } else if(strstr(mode, "w")){ | |
414 #ifdef _WIN32 | |
415 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644); | |
416 #else | |
417 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); | |
418 #endif | |
419 rz = razf_open_w(fd); | |
420 } else return NULL; | |
421 return rz; | |
422 } | |
423 | |
424 RAZF* razf_open(const char *filename, const char *mode){ | |
425 return _razf_open(filename, mode, 1); | |
426 } | |
427 | |
428 RAZF* razf_open2(const char *filename, const char *mode){ | |
429 return _razf_open(filename, mode, 0); | |
430 } | |
431 | |
432 int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){ | |
433 int64_t n; | |
434 if(rz->mode != 'r' && rz->mode != 'R') return 0; | |
435 switch(rz->file_type){ | |
436 case FILE_TYPE_PLAIN: | |
437 if(rz->end == 0x7fffffffffffffffLL){ | |
438 if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0; | |
439 rz->end = lseek(rz->filedes, 0, SEEK_END); | |
440 lseek(rz->filedes, n, SEEK_SET); | |
441 } | |
442 *u_size = *c_size = rz->end; | |
443 return 1; | |
444 case FILE_TYPE_GZ: | |
445 return 0; | |
446 case FILE_TYPE_RZ: | |
447 if(rz->src_end == rz->end) return 0; | |
448 *u_size = rz->src_end; | |
449 *c_size = rz->end; | |
450 return 1; | |
451 default: | |
452 return 0; | |
453 } | |
454 } | |
455 | |
456 static int _razf_read(RAZF* rz, void *data, int size){ | |
457 int ret, tin; | |
458 if(rz->z_eof || rz->z_err) return 0; | |
459 if (rz->file_type == FILE_TYPE_PLAIN) { | |
460 ret = read(rz->filedes, data, size); | |
461 if (ret == 0) rz->z_eof = 1; | |
462 return ret; | |
463 } | |
464 rz->stream->avail_out = size; | |
465 rz->stream->next_out = data; | |
466 while(rz->stream->avail_out){ | |
467 if(rz->stream->avail_in == 0){ | |
468 if(rz->in >= rz->end){ rz->z_eof = 1; break; } | |
469 if(rz->end - rz->in < RZ_BUFFER_SIZE){ | |
470 rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in); | |
471 } else { | |
472 rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE); | |
473 } | |
474 if(rz->stream->avail_in == 0){ | |
475 rz->z_eof = 1; | |
476 break; | |
477 } | |
478 rz->stream->next_in = rz->inbuf; | |
479 } | |
480 tin = rz->stream->avail_in; | |
481 ret = inflate(rz->stream, Z_BLOCK); | |
482 rz->in += tin - rz->stream->avail_in; | |
483 if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){ | |
484 fprintf(stderr, "[_razf_read] inflate error: %d (at %s:%d)\n", ret, __FILE__, __LINE__); | |
485 rz->z_err = 1; | |
486 break; | |
487 } | |
488 if(ret == Z_STREAM_END){ | |
489 rz->z_eof = 1; | |
490 break; | |
491 } | |
492 if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){ | |
493 rz->buf_flush = 1; | |
494 rz->next_block_pos = rz->in; | |
495 break; | |
496 } | |
497 } | |
498 return size - rz->stream->avail_out; | |
499 } | |
500 | |
501 int razf_read(RAZF *rz, void *data, int size){ | |
502 int ori_size, i; | |
503 ori_size = size; | |
504 while(size > 0){ | |
505 if(rz->buf_len){ | |
506 if(size < rz->buf_len){ | |
507 for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i]; | |
508 rz->buf_off += size; | |
509 rz->buf_len -= size; | |
510 data += size; | |
511 rz->block_off += size; | |
512 size = 0; | |
513 break; | |
514 } else { | |
515 for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i]; | |
516 data += rz->buf_len; | |
517 size -= rz->buf_len; | |
518 rz->block_off += rz->buf_len; | |
519 rz->buf_off = 0; | |
520 rz->buf_len = 0; | |
521 if(rz->buf_flush){ | |
522 rz->block_pos = rz->next_block_pos; | |
523 rz->block_off = 0; | |
524 rz->buf_flush = 0; | |
525 } | |
526 } | |
527 } else if(rz->buf_flush){ | |
528 rz->block_pos = rz->next_block_pos; | |
529 rz->block_off = 0; | |
530 rz->buf_flush = 0; | |
531 } | |
532 if(rz->buf_flush) continue; | |
533 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE); | |
534 if(rz->z_eof && rz->buf_len == 0) break; | |
535 } | |
536 rz->out += ori_size - size; | |
537 return ori_size - size; | |
538 } | |
539 | |
540 int razf_skip(RAZF* rz, int size){ | |
541 int ori_size; | |
542 ori_size = size; | |
543 while(size > 0){ | |
544 if(rz->buf_len){ | |
545 if(size < rz->buf_len){ | |
546 rz->buf_off += size; | |
547 rz->buf_len -= size; | |
548 rz->block_off += size; | |
549 size = 0; | |
550 break; | |
551 } else { | |
552 size -= rz->buf_len; | |
553 rz->buf_off = 0; | |
554 rz->buf_len = 0; | |
555 rz->block_off += rz->buf_len; | |
556 if(rz->buf_flush){ | |
557 rz->block_pos = rz->next_block_pos; | |
558 rz->block_off = 0; | |
559 rz->buf_flush = 0; | |
560 } | |
561 } | |
562 } else if(rz->buf_flush){ | |
563 rz->block_pos = rz->next_block_pos; | |
564 rz->block_off = 0; | |
565 rz->buf_flush = 0; | |
566 } | |
567 if(rz->buf_flush) continue; | |
568 rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE); | |
569 if(rz->z_eof) break; | |
570 } | |
571 rz->out += ori_size - size; | |
572 return ori_size - size; | |
573 } | |
574 | |
575 static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){ | |
576 lseek(rz->filedes, in, SEEK_SET); | |
577 rz->in = in; | |
578 rz->out = out; | |
579 rz->block_pos = in; | |
580 rz->next_block_pos = in; | |
581 rz->block_off = 0; | |
582 rz->buf_flush = 0; | |
583 rz->z_eof = rz->z_err = 0; | |
584 inflateReset(rz->stream); | |
585 rz->stream->avail_in = 0; | |
586 rz->buf_off = rz->buf_len = 0; | |
587 } | |
588 | |
589 int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){ | |
590 int64_t pos; | |
591 rz->z_eof = 0; | |
592 if(rz->file_type == FILE_TYPE_PLAIN){ | |
593 rz->buf_off = rz->buf_len = 0; | |
594 pos = block_start + block_offset; | |
595 pos = lseek(rz->filedes, pos, SEEK_SET); | |
596 rz->out = rz->in = pos; | |
597 return pos; | |
598 } | |
599 if(block_start == rz->block_pos && block_offset >= rz->block_off) { | |
600 block_offset -= rz->block_off; | |
601 goto SKIP; // Needn't reset inflate | |
602 } | |
603 if(block_start == 0) block_start = rz->header_size; // Automaticly revist wrong block_start | |
604 _razf_reset_read(rz, block_start, 0); | |
605 SKIP: | |
606 if(block_offset) razf_skip(rz, block_offset); | |
607 return rz->block_off; | |
608 } | |
609 | |
610 int64_t razf_seek(RAZF* rz, int64_t pos, int where){ | |
611 int64_t idx; | |
612 int64_t seek_pos, new_out; | |
613 rz->z_eof = 0; | |
614 if (where == SEEK_CUR) pos += rz->out; | |
615 else if (where == SEEK_END) pos += rz->src_end; | |
616 if(rz->file_type == FILE_TYPE_PLAIN){ | |
617 seek_pos = lseek(rz->filedes, pos, SEEK_SET); | |
618 rz->buf_off = rz->buf_len = 0; | |
619 rz->out = rz->in = seek_pos; | |
620 return seek_pos; | |
621 } else if(rz->file_type == FILE_TYPE_GZ){ | |
622 if(pos >= rz->out) goto SKIP; | |
623 return rz->out; | |
624 } | |
625 if(pos == rz->out) return pos; | |
626 if(pos > rz->src_end) return rz->out; | |
627 if(!rz->seekable || !rz->load_index){ | |
628 if(pos >= rz->out) goto SKIP; | |
629 } | |
630 idx = pos / RZ_BLOCK_SIZE - 1; | |
631 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]); | |
632 new_out = (idx + 1) * RZ_BLOCK_SIZE; | |
633 if(pos > rz->out && new_out <= rz->out) goto SKIP; | |
634 _razf_reset_read(rz, seek_pos, new_out); | |
635 SKIP: | |
636 razf_skip(rz, (int)(pos - rz->out)); | |
637 return rz->out; | |
638 } | |
639 | |
640 uint64_t razf_tell2(RAZF *rz) | |
641 { | |
642 /* | |
643 if (rz->load_index) { | |
644 int64_t idx, seek_pos; | |
645 idx = rz->out / RZ_BLOCK_SIZE - 1; | |
646 seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]); | |
647 if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off) | |
648 fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n", | |
649 (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off); | |
650 } | |
651 */ | |
652 return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff); | |
653 } | |
654 | |
655 int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where) | |
656 { | |
657 if (where != SEEK_SET) return -1; | |
658 return razf_jump(rz, voffset>>16, voffset&0xffff); | |
659 } | |
660 | |
661 void razf_close(RAZF *rz){ | |
662 if(rz->mode == 'w'){ | |
663 #ifndef _RZ_READONLY | |
664 razf_end_flush(rz); | |
665 deflateEnd(rz->stream); | |
666 save_zindex(rz, rz->filedes); | |
667 if(is_big_endian()){ | |
668 write(rz->filedes, &rz->in, sizeof(int64_t)); | |
669 write(rz->filedes, &rz->out, sizeof(int64_t)); | |
670 } else { | |
671 uint64_t v64 = byte_swap_8((uint64_t)rz->in); | |
672 write(rz->filedes, &v64, sizeof(int64_t)); | |
673 v64 = byte_swap_8((uint64_t)rz->out); | |
674 write(rz->filedes, &v64, sizeof(int64_t)); | |
675 } | |
676 #endif | |
677 } else if(rz->mode == 'r'){ | |
678 if(rz->stream) inflateEnd(rz->stream); | |
679 } | |
680 if(rz->inbuf) free(rz->inbuf); | |
681 if(rz->outbuf) free(rz->outbuf); | |
682 if(rz->header){ | |
683 free(rz->header->extra); | |
684 free(rz->header->name); | |
685 free(rz->header->comment); | |
686 free(rz->header); | |
687 } | |
688 if(rz->index){ | |
689 free(rz->index->bin_offsets); | |
690 free(rz->index->cell_offsets); | |
691 free(rz->index); | |
692 } | |
693 free(rz->stream); | |
694 close(rz->filedes); | |
695 free(rz); | |
696 } | |
697 | |
698 #endif |