comparison ezBAMQC/src/htslib/cram/mFILE.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:dfa3745e5fd8
1 /*
2 Copyright (c) 2005-2006, 2008-2009, 2013 Genome Research Ltd.
3 Author: James Bonfield <jkb@sanger.ac.uk>
4
5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions are met:
7
8 1. Redistributions of source code must retain the above copyright notice,
9 this list of conditions and the following disclaimer.
10
11 2. Redistributions in binary form must reproduce the above copyright notice,
12 this list of conditions and the following disclaimer in the documentation
13 and/or other materials provided with the distribution.
14
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16 Institute nor the names of its contributors may be used to endorse or promote
17 products derived from this software without specific prior written permission.
18
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31
32 #ifdef HAVE_CONFIG_H
33 #include "io_lib_config.h"
34 #endif
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <errno.h>
39 #include <string.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <fcntl.h>
43 #include <unistd.h>
44 #include <stdarg.h>
45
46 #include "cram/os.h"
47 #include "cram/mFILE.h"
48 #include "cram/vlen.h"
49
50 /*
51 * This file contains memory-based versions of the most commonly used
52 * (by io_lib) stdio functions.
53 *
54 * Actual file IO takes place either on opening or closing an mFILE.
55 *
56 * Coupled to this are a bunch of rather scary macros which can be obtained
57 * by including stdio_hack.h. It is recommended though that you use mFILE.h
58 * instead and replace fopen with mfopen (etc). This is more or less
59 * mandatory if you wish to use both FILE and mFILE structs in a single file.
60 */
61
62 static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */
63
64 /*
65 * Reads the entirety of fp into memory. If 'fn' exists it is the filename
66 * associated with fp. This will be used for more optimal reading (via a
67 * stat to identify the size and a single read). Otherwise we use successive
68 * reads until EOF.
69 *
70 * Returns a malloced buffer on success of length *size
71 * NULL on failure
72 */
73 static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) {
74 struct stat sb;
75 char *data = NULL;
76 size_t allocated = 0, used = 0;
77 int bufsize = 8192;
78
79 #ifdef _WIN32
80 if (binary)
81 _setmode(_fileno(fp), _O_BINARY);
82 else
83 _setmode(_fileno(fp), _O_TEXT);
84 #endif
85
86 if (fn && -1 != stat(fn, &sb)) {
87 data = malloc(allocated = sb.st_size);
88 bufsize = sb.st_size;
89 } else {
90 fn = NULL;
91 }
92
93 do {
94 size_t len;
95 if (used + bufsize > allocated) {
96 allocated += bufsize;
97 data = realloc(data, allocated);
98 }
99 len = fread(data + used, 1, allocated - used, fp);
100 if (len > 0)
101 used += len;
102 } while (!feof(fp) && (fn == NULL || used < sb.st_size));
103
104 *size = used;
105
106 return data;
107 }
108
109 /*
110 * Creates and returns m_channel[0].
111 * We initialise this on the first attempted read, which then slurps in
112 * all of stdin until EOF is met.
113 */
114 mFILE *mstdin(void) {
115 if (m_channel[0])
116 return m_channel[0];
117
118 m_channel[0] = mfcreate(NULL, 0);
119 if (NULL == m_channel[0]) return NULL;
120 m_channel[0]->fp = stdin;
121 return m_channel[0];
122 }
123
124 static void init_mstdin(void) {
125 static int done_stdin = 0;
126 if (done_stdin)
127 return;
128
129 m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1);
130 m_channel[0]->mode = MF_READ;
131 done_stdin = 1;
132 }
133
134 /*
135 * Creates and returns m_channel[1]. This is the fake for stdout. It starts as
136 * an empty buffer which is physically written out only when mfflush or
137 * mfclose are called.
138 */
139 mFILE *mstdout(void) {
140 if (m_channel[1])
141 return m_channel[1];
142
143 m_channel[1] = mfcreate(NULL, 0);
144 if (NULL == m_channel[1]) return NULL;
145 m_channel[1]->fp = stdout;
146 m_channel[1]->mode = MF_WRITE;
147 return m_channel[1];
148 }
149
150 /*
151 * Stderr as an mFILE.
152 * The code handles stderr by returning m_channel[2], but also checking
153 * for stderr in fprintf (the common usage of it) to auto-flush.
154 */
155 mFILE *mstderr(void) {
156 if (m_channel[2])
157 return m_channel[2];
158
159 m_channel[2] = mfcreate(NULL, 0);
160 if (NULL == m_channel[2]) return NULL;
161 m_channel[2]->fp = stderr;
162 m_channel[2]->mode = MF_WRITE;
163 return m_channel[2];
164 }
165
166
167 /*
168 * For creating existing mFILE pointers directly from memory buffers.
169 */
170 mFILE *mfcreate(char *data, int size) {
171 mFILE *mf = (mFILE *)malloc(sizeof(*mf));
172 if (NULL == mf) return NULL;
173 mf->fp = NULL;
174 mf->data = data;
175 mf->alloced = size;
176 mf->size = size;
177 mf->eof = 0;
178 mf->offset = 0;
179 mf->flush_pos = 0;
180 mf->mode = MF_READ | MF_WRITE;
181 return mf;
182 }
183
184 /*
185 * Recreate an existing mFILE to house new data/size.
186 * It also rewinds the file.
187 */
188 void mfrecreate(mFILE *mf, char *data, int size) {
189 if (mf->data)
190 free(mf->data);
191 mf->data = data;
192 mf->size = size;
193 mf->alloced = size;
194 mf->eof = 0;
195 mf->offset = 0;
196 mf->flush_pos = 0;
197 }
198
199
200 /*
201 * Creates a new mFILE to contain the contents of the FILE pointer.
202 * This mFILE is purely for in-memory operations and has no links to the
203 * original FILE* it came from. It also doesn't close the FILE pointer.
204 * Consider using mfreopen() is you need different behaviour.
205 *
206 * Returns mFILE * on success
207 * NULL on failure.
208 */
209 mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) {
210 mFILE *mf;
211
212 /* Open using mfreopen() */
213 if (NULL == (mf = mfreopen(path, mode_str, fp)))
214 return NULL;
215
216 /* Disassociate from the input stream */
217 mf->fp = NULL;
218
219 return mf;
220 }
221
222 /*
223 * Converts a FILE * to an mFILE *.
224 * Use this for wrapper functions to turn external prototypes requring
225 * FILE * as an argument into internal code using mFILE *.
226 */
227 mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) {
228 mFILE *mf;
229 int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0;
230
231 /* Parse mode:
232 * r = read file contents (if truncated => don't read)
233 * w = write on close
234 * a = position at end of buffer
235 * x = position at same location as the original fp, don't seek on flush
236 */
237 if (strchr(mode_str, 'r'))
238 r = 1, mode |= MF_READ;
239 if (strchr(mode_str, 'w'))
240 w = 1, mode |= MF_WRITE | MF_TRUNC;
241 if (strchr(mode_str, 'a'))
242 w = a = 1, mode |= MF_WRITE | MF_APPEND;
243 if (strchr(mode_str, 'b'))
244 b = 1, mode |= MF_BINARY;
245 if (strchr(mode_str, 'x'))
246 x = 1;
247 if (strchr(mode_str, '+')) {
248 w = 1, mode |= MF_READ | MF_WRITE;
249 if (a)
250 r = 1;
251 }
252
253 if (r) {
254 mf = mfcreate(NULL, 0);
255 if (NULL == mf) return NULL;
256 if (!(mode & MF_TRUNC)) {
257 mf->data = mfload(fp, path, &mf->size, b);
258 mf->alloced = mf->size;
259 if (!a)
260 fseek(fp, 0, SEEK_SET);
261 }
262 } else if (w) {
263 /* Write - initialise the data structures */
264 mf = mfcreate(NULL, 0);
265 if (NULL == mf) return NULL;
266 } else {
267 fprintf(stderr, "Must specify either r, w or a for mode\n");
268 return NULL;
269 }
270 mf->fp = fp;
271 mf->mode = mode;
272
273 if (x) {
274 mf->mode |= MF_MODEX;
275 }
276
277 if (a) {
278 mf->flush_pos = mf->size;
279 fseek(fp, 0, SEEK_END);
280 }
281
282 return mf;
283 }
284
285 /*
286 * Opens a file. If we have read access (r or a+) then it loads the entire
287 * file into memory. If We have write access then the pathname is stored.
288 * We do not actually write until an mfclose, which then checks this pathname.
289 */
290 mFILE *mfopen(const char *path, const char *mode) {
291 FILE *fp;
292
293 if (NULL == (fp = fopen(path, mode)))
294 return NULL;
295 return mfreopen(path, mode, fp);
296 }
297
298 /*
299 * Closes an mFILE. If the filename is known (implying write access) then this
300 * also writes the data to disk.
301 *
302 * Stdout is handled by calling mfflush which writes to stdout if appropriate.
303 */
304 int mfclose(mFILE *mf) {
305 if (!mf)
306 return -1;
307
308 mfflush(mf);
309
310 if (mf->fp)
311 fclose(mf->fp);
312
313 mfdestroy(mf);
314
315 return 0;
316 }
317
318 /*
319 * Closes the file pointer contained within the mFILE without destroying
320 * the in-memory data.
321 */
322 int mfdetach(mFILE *mf) {
323 if (!mf)
324 return -1;
325
326 mfflush(mf);
327
328 if (mf->fp) {
329 fclose(mf->fp);
330 mf->fp = NULL;
331 }
332
333 return 0;
334 }
335
336 /*
337 * Destroys an mFILE structure but does not flush or close it
338 */
339 int mfdestroy(mFILE *mf) {
340 if (!mf)
341 return -1;
342
343 if (mf->data)
344 free(mf->data);
345 free(mf);
346
347 return 0;
348 }
349
350 /*
351 * Steals that data out of an mFILE. The mFILE itself will be closed.
352 * It is up to the caller to free the stolen buffer. If size_out is
353 * not NULL, mf->size will be stored in it.
354 * This is more-or-less the opposite of mfcreate().
355 */
356
357 void *mfsteal(mFILE *mf, size_t *size_out) {
358 void *data;
359
360 if (!mf) return NULL;
361
362 data = mf->data;
363
364 if (NULL != size_out) *size_out = mf->size;
365
366 mfdetach(mf);
367 mf->data = NULL;
368 mfdestroy(mf);
369
370 return data;
371 }
372
373 /*
374 * Seek/tell functions. Nothing more than updating and reporting an
375 * in-memory index. NB we can seek on stdin or stdout even provided we
376 * haven't been flushing.
377 */
378 int mfseek(mFILE *mf, long offset, int whence) {
379 switch (whence) {
380 case SEEK_SET:
381 mf->offset = offset;
382 break;
383 case SEEK_CUR:
384 mf->offset += offset;
385 break;
386 case SEEK_END:
387 mf->offset = mf->size + offset;
388 break;
389 default:
390 errno = EINVAL;
391 return -1;
392 }
393
394 mf->eof = 0;
395 return 0;
396 }
397
398 long mftell(mFILE *mf) {
399 return mf->offset;
400 }
401
402 void mrewind(mFILE *mf) {
403 mf->offset = 0;
404 mf->eof = 0;
405 }
406
407 /*
408 * mftruncate is not directly a translation of ftruncate as the latter
409 * takes a file descriptor instead of a FILE *. It performs the analogous
410 * role though.
411 *
412 * If offset is -1 then the file is truncated to be the current file
413 * offset.
414 */
415 void mftruncate(mFILE *mf, long offset) {
416 mf->size = offset != -1 ? offset : mf->offset;
417 if (mf->offset > mf->size)
418 mf->offset = mf->size;
419 }
420
421 int mfeof(mFILE *mf) {
422 return mf->eof;
423 }
424
425 /*
426 * mFILE read/write functions. Basically these turn fread/fwrite syntax
427 * into memcpy statements, with appropriate memory handling for writing.
428 */
429 size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
430 size_t len;
431 char *cptr = (char *)ptr;
432
433 if (mf == m_channel[0]) init_mstdin();
434
435 if (mf->size <= mf->offset)
436 return 0;
437
438 len = size * nmemb <= mf->size - mf->offset
439 ? size * nmemb
440 : mf->size - mf->offset;
441 if (!size)
442 return 0;
443
444 memcpy(cptr, &mf->data[mf->offset], len);
445 mf->offset += len;
446
447 if (len != size * nmemb) {
448 mf->eof = 1;
449 }
450
451 return len / size;
452 }
453
454 size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
455 if (!(mf->mode & MF_WRITE))
456 return 0;
457
458 /* Append mode => forced all writes to end of file */
459 if (mf->mode & MF_APPEND)
460 mf->offset = mf->size;
461
462 /* Make sure we have enough room */
463 while (size * nmemb + mf->offset > mf->alloced) {
464 size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
465 void * new_data = realloc(mf->data, new_alloced);
466 if (NULL == new_data) return 0;
467 mf->alloced = new_alloced;
468 mf->data = new_data;
469 }
470
471 /* Record where we need to reflush from */
472 if (mf->offset < mf->flush_pos)
473 mf->flush_pos = mf->offset;
474
475 /* Copy the data over */
476 memcpy(&mf->data[mf->offset], ptr, size * nmemb);
477 mf->offset += size * nmemb;
478 if (mf->size < mf->offset)
479 mf->size = mf->offset;
480
481 return nmemb;
482 }
483
484 int mfgetc(mFILE *mf) {
485 if (mf == m_channel[0]) init_mstdin();
486 if (mf->offset < mf->size) {
487 return (unsigned char)mf->data[mf->offset++];
488 }
489
490 mf->eof = 1;
491 return -1;
492 }
493
494 int mungetc(int c, mFILE *mf) {
495 if (mf->offset > 0) {
496 mf->data[--mf->offset] = c;
497 return c;
498 }
499
500 mf->eof = 1;
501 return -1;
502 }
503
504 char *mfgets(char *s, int size, mFILE *mf) {
505 int i;
506
507 if (mf == m_channel[0]) init_mstdin();
508 *s = 0;
509 for (i = 0; i < size-1;) {
510 if (mf->offset < mf->size) {
511 s[i] = mf->data[mf->offset++];
512 if (s[i++] == '\n')
513 break;
514 } else {
515 mf->eof = 1;
516 break;
517 }
518 }
519
520 s[i] = 0;
521 return i ? s : NULL;
522 }
523
524 /*
525 * Flushes an mFILE. If this is a real open of a file in write mode then
526 * mFILE->fp will be set. We then write out any new data in mFILE since the
527 * last flush. We cannot tell what may have been modified as we don't keep
528 * track of that, so we typically rewrite out the entire file contents between
529 * the last flush_pos and the end of file.
530 *
531 * For stderr/stdout we also reset the offsets so we cannot modify things
532 * we've already output.
533 */
534 int mfflush(mFILE *mf) {
535 if (!mf->fp)
536 return 0;
537
538 /* FIXME: only do this when opened in write mode */
539 if (mf == m_channel[1] || mf == m_channel[2]) {
540 if (mf->flush_pos < mf->size) {
541 size_t bytes = mf->size - mf->flush_pos;
542 if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
543 return -1;
544 if (0 != fflush(mf->fp))
545 return -1;
546 }
547
548 /* Stdout & stderr are non-seekable streams so throw away the data */
549 mf->offset = mf->size = mf->flush_pos = 0;
550 }
551
552 /* only flush when opened in write mode */
553 if (mf->mode & MF_WRITE) {
554 if (mf->flush_pos < mf->size) {
555 size_t bytes = mf->size - mf->flush_pos;
556 if (!(mf->mode & MF_MODEX)) {
557 fseek(mf->fp, mf->flush_pos, SEEK_SET);
558 }
559 if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
560 return -1;
561 if (0 != fflush(mf->fp))
562 return -1;
563 }
564 if (ftell(mf->fp) != -1 &&
565 ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1)
566 return -1;
567 mf->flush_pos = mf->size;
568 }
569
570 return 0;
571 }
572
573 /*
574 * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to
575 * estimate how many additional bytes of storage will be required for the
576 * vsprintf to work.
577 */
578 int mfprintf(mFILE *mf, char *fmt, ...) {
579 int ret;
580 size_t est_length;
581 va_list args;
582
583 va_start(args, fmt);
584 est_length = vflen(fmt, args);
585 va_end(args);
586 while (est_length + mf->offset > mf->alloced) {
587 size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
588 void * new_data = realloc(mf->data, new_alloced);
589 if (NULL == new_data) return -1;
590 mf->alloced = new_alloced;
591 mf->data = new_data;
592 }
593
594 va_start(args, fmt);
595 ret = vsprintf(&mf->data[mf->offset], fmt, args);
596 va_end(args);
597
598 if (ret > 0) {
599 mf->offset += ret;
600 if (mf->size < mf->offset)
601 mf->size = mf->offset;
602 }
603
604 if (mf->fp == stderr) {
605 /* Auto-flush for stderr */
606 if (0 != mfflush(mf)) return -1;
607 }
608
609 return ret;
610 }
611
612 /*
613 * Converts an mFILE from binary to ascii mode by replacing all
614 * cr-nl with nl.
615 *
616 * Primarily used on windows when we've uncompressed a binary file which
617 * happens to be a text file (eg Experiment File). Previously we would have
618 * seeked back to the start and used _setmode(fileno(fp), _O_TEXT).
619 *
620 * Side effect: resets offset and flush_pos back to the start.
621 */
622 void mfascii(mFILE *mf) {
623 size_t p1, p2;
624
625 for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) {
626 if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') {
627 p2--; /* delete the \r */
628 }
629 mf->data[p2] = mf->data[p1];
630 }
631 mf->size = p2;
632
633 mf->offset = mf->flush_pos = 0;
634 }