Mercurial > repos > youngkim > ezbamqc
comparison ezBAMQC/src/htslib/cram/mFILE.c @ 0:dfa3745e5fd8
Uploaded
author | youngkim |
---|---|
date | Thu, 24 Mar 2016 17:12:52 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfa3745e5fd8 |
---|---|
1 /* | |
2 Copyright (c) 2005-2006, 2008-2009, 2013 Genome Research Ltd. | |
3 Author: James Bonfield <jkb@sanger.ac.uk> | |
4 | |
5 Redistribution and use in source and binary forms, with or without | |
6 modification, are permitted provided that the following conditions are met: | |
7 | |
8 1. Redistributions of source code must retain the above copyright notice, | |
9 this list of conditions and the following disclaimer. | |
10 | |
11 2. Redistributions in binary form must reproduce the above copyright notice, | |
12 this list of conditions and the following disclaimer in the documentation | |
13 and/or other materials provided with the distribution. | |
14 | |
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger | |
16 Institute nor the names of its contributors may be used to endorse or promote | |
17 products derived from this software without specific prior written permission. | |
18 | |
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND | |
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE | |
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
29 */ | |
30 | |
31 | |
32 #ifdef HAVE_CONFIG_H | |
33 #include "io_lib_config.h" | |
34 #endif | |
35 | |
36 #include <stdio.h> | |
37 #include <stdlib.h> | |
38 #include <errno.h> | |
39 #include <string.h> | |
40 #include <sys/types.h> | |
41 #include <sys/stat.h> | |
42 #include <fcntl.h> | |
43 #include <unistd.h> | |
44 #include <stdarg.h> | |
45 | |
46 #include "cram/os.h" | |
47 #include "cram/mFILE.h" | |
48 #include "cram/vlen.h" | |
49 | |
50 /* | |
51 * This file contains memory-based versions of the most commonly used | |
52 * (by io_lib) stdio functions. | |
53 * | |
54 * Actual file IO takes place either on opening or closing an mFILE. | |
55 * | |
56 * Coupled to this are a bunch of rather scary macros which can be obtained | |
57 * by including stdio_hack.h. It is recommended though that you use mFILE.h | |
58 * instead and replace fopen with mfopen (etc). This is more or less | |
59 * mandatory if you wish to use both FILE and mFILE structs in a single file. | |
60 */ | |
61 | |
62 static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */ | |
63 | |
64 /* | |
65 * Reads the entirety of fp into memory. If 'fn' exists it is the filename | |
66 * associated with fp. This will be used for more optimal reading (via a | |
67 * stat to identify the size and a single read). Otherwise we use successive | |
68 * reads until EOF. | |
69 * | |
70 * Returns a malloced buffer on success of length *size | |
71 * NULL on failure | |
72 */ | |
73 static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) { | |
74 struct stat sb; | |
75 char *data = NULL; | |
76 size_t allocated = 0, used = 0; | |
77 int bufsize = 8192; | |
78 | |
79 #ifdef _WIN32 | |
80 if (binary) | |
81 _setmode(_fileno(fp), _O_BINARY); | |
82 else | |
83 _setmode(_fileno(fp), _O_TEXT); | |
84 #endif | |
85 | |
86 if (fn && -1 != stat(fn, &sb)) { | |
87 data = malloc(allocated = sb.st_size); | |
88 bufsize = sb.st_size; | |
89 } else { | |
90 fn = NULL; | |
91 } | |
92 | |
93 do { | |
94 size_t len; | |
95 if (used + bufsize > allocated) { | |
96 allocated += bufsize; | |
97 data = realloc(data, allocated); | |
98 } | |
99 len = fread(data + used, 1, allocated - used, fp); | |
100 if (len > 0) | |
101 used += len; | |
102 } while (!feof(fp) && (fn == NULL || used < sb.st_size)); | |
103 | |
104 *size = used; | |
105 | |
106 return data; | |
107 } | |
108 | |
109 /* | |
110 * Creates and returns m_channel[0]. | |
111 * We initialise this on the first attempted read, which then slurps in | |
112 * all of stdin until EOF is met. | |
113 */ | |
114 mFILE *mstdin(void) { | |
115 if (m_channel[0]) | |
116 return m_channel[0]; | |
117 | |
118 m_channel[0] = mfcreate(NULL, 0); | |
119 if (NULL == m_channel[0]) return NULL; | |
120 m_channel[0]->fp = stdin; | |
121 return m_channel[0]; | |
122 } | |
123 | |
124 static void init_mstdin(void) { | |
125 static int done_stdin = 0; | |
126 if (done_stdin) | |
127 return; | |
128 | |
129 m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1); | |
130 m_channel[0]->mode = MF_READ; | |
131 done_stdin = 1; | |
132 } | |
133 | |
134 /* | |
135 * Creates and returns m_channel[1]. This is the fake for stdout. It starts as | |
136 * an empty buffer which is physically written out only when mfflush or | |
137 * mfclose are called. | |
138 */ | |
139 mFILE *mstdout(void) { | |
140 if (m_channel[1]) | |
141 return m_channel[1]; | |
142 | |
143 m_channel[1] = mfcreate(NULL, 0); | |
144 if (NULL == m_channel[1]) return NULL; | |
145 m_channel[1]->fp = stdout; | |
146 m_channel[1]->mode = MF_WRITE; | |
147 return m_channel[1]; | |
148 } | |
149 | |
150 /* | |
151 * Stderr as an mFILE. | |
152 * The code handles stderr by returning m_channel[2], but also checking | |
153 * for stderr in fprintf (the common usage of it) to auto-flush. | |
154 */ | |
155 mFILE *mstderr(void) { | |
156 if (m_channel[2]) | |
157 return m_channel[2]; | |
158 | |
159 m_channel[2] = mfcreate(NULL, 0); | |
160 if (NULL == m_channel[2]) return NULL; | |
161 m_channel[2]->fp = stderr; | |
162 m_channel[2]->mode = MF_WRITE; | |
163 return m_channel[2]; | |
164 } | |
165 | |
166 | |
167 /* | |
168 * For creating existing mFILE pointers directly from memory buffers. | |
169 */ | |
170 mFILE *mfcreate(char *data, int size) { | |
171 mFILE *mf = (mFILE *)malloc(sizeof(*mf)); | |
172 if (NULL == mf) return NULL; | |
173 mf->fp = NULL; | |
174 mf->data = data; | |
175 mf->alloced = size; | |
176 mf->size = size; | |
177 mf->eof = 0; | |
178 mf->offset = 0; | |
179 mf->flush_pos = 0; | |
180 mf->mode = MF_READ | MF_WRITE; | |
181 return mf; | |
182 } | |
183 | |
184 /* | |
185 * Recreate an existing mFILE to house new data/size. | |
186 * It also rewinds the file. | |
187 */ | |
188 void mfrecreate(mFILE *mf, char *data, int size) { | |
189 if (mf->data) | |
190 free(mf->data); | |
191 mf->data = data; | |
192 mf->size = size; | |
193 mf->alloced = size; | |
194 mf->eof = 0; | |
195 mf->offset = 0; | |
196 mf->flush_pos = 0; | |
197 } | |
198 | |
199 | |
200 /* | |
201 * Creates a new mFILE to contain the contents of the FILE pointer. | |
202 * This mFILE is purely for in-memory operations and has no links to the | |
203 * original FILE* it came from. It also doesn't close the FILE pointer. | |
204 * Consider using mfreopen() is you need different behaviour. | |
205 * | |
206 * Returns mFILE * on success | |
207 * NULL on failure. | |
208 */ | |
209 mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) { | |
210 mFILE *mf; | |
211 | |
212 /* Open using mfreopen() */ | |
213 if (NULL == (mf = mfreopen(path, mode_str, fp))) | |
214 return NULL; | |
215 | |
216 /* Disassociate from the input stream */ | |
217 mf->fp = NULL; | |
218 | |
219 return mf; | |
220 } | |
221 | |
222 /* | |
223 * Converts a FILE * to an mFILE *. | |
224 * Use this for wrapper functions to turn external prototypes requring | |
225 * FILE * as an argument into internal code using mFILE *. | |
226 */ | |
227 mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) { | |
228 mFILE *mf; | |
229 int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0; | |
230 | |
231 /* Parse mode: | |
232 * r = read file contents (if truncated => don't read) | |
233 * w = write on close | |
234 * a = position at end of buffer | |
235 * x = position at same location as the original fp, don't seek on flush | |
236 */ | |
237 if (strchr(mode_str, 'r')) | |
238 r = 1, mode |= MF_READ; | |
239 if (strchr(mode_str, 'w')) | |
240 w = 1, mode |= MF_WRITE | MF_TRUNC; | |
241 if (strchr(mode_str, 'a')) | |
242 w = a = 1, mode |= MF_WRITE | MF_APPEND; | |
243 if (strchr(mode_str, 'b')) | |
244 b = 1, mode |= MF_BINARY; | |
245 if (strchr(mode_str, 'x')) | |
246 x = 1; | |
247 if (strchr(mode_str, '+')) { | |
248 w = 1, mode |= MF_READ | MF_WRITE; | |
249 if (a) | |
250 r = 1; | |
251 } | |
252 | |
253 if (r) { | |
254 mf = mfcreate(NULL, 0); | |
255 if (NULL == mf) return NULL; | |
256 if (!(mode & MF_TRUNC)) { | |
257 mf->data = mfload(fp, path, &mf->size, b); | |
258 mf->alloced = mf->size; | |
259 if (!a) | |
260 fseek(fp, 0, SEEK_SET); | |
261 } | |
262 } else if (w) { | |
263 /* Write - initialise the data structures */ | |
264 mf = mfcreate(NULL, 0); | |
265 if (NULL == mf) return NULL; | |
266 } else { | |
267 fprintf(stderr, "Must specify either r, w or a for mode\n"); | |
268 return NULL; | |
269 } | |
270 mf->fp = fp; | |
271 mf->mode = mode; | |
272 | |
273 if (x) { | |
274 mf->mode |= MF_MODEX; | |
275 } | |
276 | |
277 if (a) { | |
278 mf->flush_pos = mf->size; | |
279 fseek(fp, 0, SEEK_END); | |
280 } | |
281 | |
282 return mf; | |
283 } | |
284 | |
285 /* | |
286 * Opens a file. If we have read access (r or a+) then it loads the entire | |
287 * file into memory. If We have write access then the pathname is stored. | |
288 * We do not actually write until an mfclose, which then checks this pathname. | |
289 */ | |
290 mFILE *mfopen(const char *path, const char *mode) { | |
291 FILE *fp; | |
292 | |
293 if (NULL == (fp = fopen(path, mode))) | |
294 return NULL; | |
295 return mfreopen(path, mode, fp); | |
296 } | |
297 | |
298 /* | |
299 * Closes an mFILE. If the filename is known (implying write access) then this | |
300 * also writes the data to disk. | |
301 * | |
302 * Stdout is handled by calling mfflush which writes to stdout if appropriate. | |
303 */ | |
304 int mfclose(mFILE *mf) { | |
305 if (!mf) | |
306 return -1; | |
307 | |
308 mfflush(mf); | |
309 | |
310 if (mf->fp) | |
311 fclose(mf->fp); | |
312 | |
313 mfdestroy(mf); | |
314 | |
315 return 0; | |
316 } | |
317 | |
318 /* | |
319 * Closes the file pointer contained within the mFILE without destroying | |
320 * the in-memory data. | |
321 */ | |
322 int mfdetach(mFILE *mf) { | |
323 if (!mf) | |
324 return -1; | |
325 | |
326 mfflush(mf); | |
327 | |
328 if (mf->fp) { | |
329 fclose(mf->fp); | |
330 mf->fp = NULL; | |
331 } | |
332 | |
333 return 0; | |
334 } | |
335 | |
336 /* | |
337 * Destroys an mFILE structure but does not flush or close it | |
338 */ | |
339 int mfdestroy(mFILE *mf) { | |
340 if (!mf) | |
341 return -1; | |
342 | |
343 if (mf->data) | |
344 free(mf->data); | |
345 free(mf); | |
346 | |
347 return 0; | |
348 } | |
349 | |
350 /* | |
351 * Steals that data out of an mFILE. The mFILE itself will be closed. | |
352 * It is up to the caller to free the stolen buffer. If size_out is | |
353 * not NULL, mf->size will be stored in it. | |
354 * This is more-or-less the opposite of mfcreate(). | |
355 */ | |
356 | |
357 void *mfsteal(mFILE *mf, size_t *size_out) { | |
358 void *data; | |
359 | |
360 if (!mf) return NULL; | |
361 | |
362 data = mf->data; | |
363 | |
364 if (NULL != size_out) *size_out = mf->size; | |
365 | |
366 mfdetach(mf); | |
367 mf->data = NULL; | |
368 mfdestroy(mf); | |
369 | |
370 return data; | |
371 } | |
372 | |
373 /* | |
374 * Seek/tell functions. Nothing more than updating and reporting an | |
375 * in-memory index. NB we can seek on stdin or stdout even provided we | |
376 * haven't been flushing. | |
377 */ | |
378 int mfseek(mFILE *mf, long offset, int whence) { | |
379 switch (whence) { | |
380 case SEEK_SET: | |
381 mf->offset = offset; | |
382 break; | |
383 case SEEK_CUR: | |
384 mf->offset += offset; | |
385 break; | |
386 case SEEK_END: | |
387 mf->offset = mf->size + offset; | |
388 break; | |
389 default: | |
390 errno = EINVAL; | |
391 return -1; | |
392 } | |
393 | |
394 mf->eof = 0; | |
395 return 0; | |
396 } | |
397 | |
398 long mftell(mFILE *mf) { | |
399 return mf->offset; | |
400 } | |
401 | |
402 void mrewind(mFILE *mf) { | |
403 mf->offset = 0; | |
404 mf->eof = 0; | |
405 } | |
406 | |
407 /* | |
408 * mftruncate is not directly a translation of ftruncate as the latter | |
409 * takes a file descriptor instead of a FILE *. It performs the analogous | |
410 * role though. | |
411 * | |
412 * If offset is -1 then the file is truncated to be the current file | |
413 * offset. | |
414 */ | |
415 void mftruncate(mFILE *mf, long offset) { | |
416 mf->size = offset != -1 ? offset : mf->offset; | |
417 if (mf->offset > mf->size) | |
418 mf->offset = mf->size; | |
419 } | |
420 | |
421 int mfeof(mFILE *mf) { | |
422 return mf->eof; | |
423 } | |
424 | |
425 /* | |
426 * mFILE read/write functions. Basically these turn fread/fwrite syntax | |
427 * into memcpy statements, with appropriate memory handling for writing. | |
428 */ | |
429 size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) { | |
430 size_t len; | |
431 char *cptr = (char *)ptr; | |
432 | |
433 if (mf == m_channel[0]) init_mstdin(); | |
434 | |
435 if (mf->size <= mf->offset) | |
436 return 0; | |
437 | |
438 len = size * nmemb <= mf->size - mf->offset | |
439 ? size * nmemb | |
440 : mf->size - mf->offset; | |
441 if (!size) | |
442 return 0; | |
443 | |
444 memcpy(cptr, &mf->data[mf->offset], len); | |
445 mf->offset += len; | |
446 | |
447 if (len != size * nmemb) { | |
448 mf->eof = 1; | |
449 } | |
450 | |
451 return len / size; | |
452 } | |
453 | |
454 size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) { | |
455 if (!(mf->mode & MF_WRITE)) | |
456 return 0; | |
457 | |
458 /* Append mode => forced all writes to end of file */ | |
459 if (mf->mode & MF_APPEND) | |
460 mf->offset = mf->size; | |
461 | |
462 /* Make sure we have enough room */ | |
463 while (size * nmemb + mf->offset > mf->alloced) { | |
464 size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024; | |
465 void * new_data = realloc(mf->data, new_alloced); | |
466 if (NULL == new_data) return 0; | |
467 mf->alloced = new_alloced; | |
468 mf->data = new_data; | |
469 } | |
470 | |
471 /* Record where we need to reflush from */ | |
472 if (mf->offset < mf->flush_pos) | |
473 mf->flush_pos = mf->offset; | |
474 | |
475 /* Copy the data over */ | |
476 memcpy(&mf->data[mf->offset], ptr, size * nmemb); | |
477 mf->offset += size * nmemb; | |
478 if (mf->size < mf->offset) | |
479 mf->size = mf->offset; | |
480 | |
481 return nmemb; | |
482 } | |
483 | |
484 int mfgetc(mFILE *mf) { | |
485 if (mf == m_channel[0]) init_mstdin(); | |
486 if (mf->offset < mf->size) { | |
487 return (unsigned char)mf->data[mf->offset++]; | |
488 } | |
489 | |
490 mf->eof = 1; | |
491 return -1; | |
492 } | |
493 | |
494 int mungetc(int c, mFILE *mf) { | |
495 if (mf->offset > 0) { | |
496 mf->data[--mf->offset] = c; | |
497 return c; | |
498 } | |
499 | |
500 mf->eof = 1; | |
501 return -1; | |
502 } | |
503 | |
504 char *mfgets(char *s, int size, mFILE *mf) { | |
505 int i; | |
506 | |
507 if (mf == m_channel[0]) init_mstdin(); | |
508 *s = 0; | |
509 for (i = 0; i < size-1;) { | |
510 if (mf->offset < mf->size) { | |
511 s[i] = mf->data[mf->offset++]; | |
512 if (s[i++] == '\n') | |
513 break; | |
514 } else { | |
515 mf->eof = 1; | |
516 break; | |
517 } | |
518 } | |
519 | |
520 s[i] = 0; | |
521 return i ? s : NULL; | |
522 } | |
523 | |
524 /* | |
525 * Flushes an mFILE. If this is a real open of a file in write mode then | |
526 * mFILE->fp will be set. We then write out any new data in mFILE since the | |
527 * last flush. We cannot tell what may have been modified as we don't keep | |
528 * track of that, so we typically rewrite out the entire file contents between | |
529 * the last flush_pos and the end of file. | |
530 * | |
531 * For stderr/stdout we also reset the offsets so we cannot modify things | |
532 * we've already output. | |
533 */ | |
534 int mfflush(mFILE *mf) { | |
535 if (!mf->fp) | |
536 return 0; | |
537 | |
538 /* FIXME: only do this when opened in write mode */ | |
539 if (mf == m_channel[1] || mf == m_channel[2]) { | |
540 if (mf->flush_pos < mf->size) { | |
541 size_t bytes = mf->size - mf->flush_pos; | |
542 if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes) | |
543 return -1; | |
544 if (0 != fflush(mf->fp)) | |
545 return -1; | |
546 } | |
547 | |
548 /* Stdout & stderr are non-seekable streams so throw away the data */ | |
549 mf->offset = mf->size = mf->flush_pos = 0; | |
550 } | |
551 | |
552 /* only flush when opened in write mode */ | |
553 if (mf->mode & MF_WRITE) { | |
554 if (mf->flush_pos < mf->size) { | |
555 size_t bytes = mf->size - mf->flush_pos; | |
556 if (!(mf->mode & MF_MODEX)) { | |
557 fseek(mf->fp, mf->flush_pos, SEEK_SET); | |
558 } | |
559 if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes) | |
560 return -1; | |
561 if (0 != fflush(mf->fp)) | |
562 return -1; | |
563 } | |
564 if (ftell(mf->fp) != -1 && | |
565 ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1) | |
566 return -1; | |
567 mf->flush_pos = mf->size; | |
568 } | |
569 | |
570 return 0; | |
571 } | |
572 | |
573 /* | |
574 * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to | |
575 * estimate how many additional bytes of storage will be required for the | |
576 * vsprintf to work. | |
577 */ | |
578 int mfprintf(mFILE *mf, char *fmt, ...) { | |
579 int ret; | |
580 size_t est_length; | |
581 va_list args; | |
582 | |
583 va_start(args, fmt); | |
584 est_length = vflen(fmt, args); | |
585 va_end(args); | |
586 while (est_length + mf->offset > mf->alloced) { | |
587 size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024; | |
588 void * new_data = realloc(mf->data, new_alloced); | |
589 if (NULL == new_data) return -1; | |
590 mf->alloced = new_alloced; | |
591 mf->data = new_data; | |
592 } | |
593 | |
594 va_start(args, fmt); | |
595 ret = vsprintf(&mf->data[mf->offset], fmt, args); | |
596 va_end(args); | |
597 | |
598 if (ret > 0) { | |
599 mf->offset += ret; | |
600 if (mf->size < mf->offset) | |
601 mf->size = mf->offset; | |
602 } | |
603 | |
604 if (mf->fp == stderr) { | |
605 /* Auto-flush for stderr */ | |
606 if (0 != mfflush(mf)) return -1; | |
607 } | |
608 | |
609 return ret; | |
610 } | |
611 | |
612 /* | |
613 * Converts an mFILE from binary to ascii mode by replacing all | |
614 * cr-nl with nl. | |
615 * | |
616 * Primarily used on windows when we've uncompressed a binary file which | |
617 * happens to be a text file (eg Experiment File). Previously we would have | |
618 * seeked back to the start and used _setmode(fileno(fp), _O_TEXT). | |
619 * | |
620 * Side effect: resets offset and flush_pos back to the start. | |
621 */ | |
622 void mfascii(mFILE *mf) { | |
623 size_t p1, p2; | |
624 | |
625 for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) { | |
626 if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') { | |
627 p2--; /* delete the \r */ | |
628 } | |
629 mf->data[p2] = mf->data[p1]; | |
630 } | |
631 mf->size = p2; | |
632 | |
633 mf->offset = mf->flush_pos = 0; | |
634 } |