Mercurial > repos > youngkim > ezbamqc
diff ezBAMQC/src/htslib/cram/mFILE.c @ 0:dfa3745e5fd8
Uploaded
author | youngkim |
---|---|
date | Thu, 24 Mar 2016 17:12:52 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/mFILE.c Thu Mar 24 17:12:52 2016 -0400 @@ -0,0 +1,634 @@ +/* +Copyright (c) 2005-2006, 2008-2009, 2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#ifdef HAVE_CONFIG_H +#include "io_lib_config.h" +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdarg.h> + +#include "cram/os.h" +#include "cram/mFILE.h" +#include "cram/vlen.h" + +/* + * This file contains memory-based versions of the most commonly used + * (by io_lib) stdio functions. + * + * Actual file IO takes place either on opening or closing an mFILE. + * + * Coupled to this are a bunch of rather scary macros which can be obtained + * by including stdio_hack.h. It is recommended though that you use mFILE.h + * instead and replace fopen with mfopen (etc). This is more or less + * mandatory if you wish to use both FILE and mFILE structs in a single file. + */ + +static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */ + +/* + * Reads the entirety of fp into memory. If 'fn' exists it is the filename + * associated with fp. This will be used for more optimal reading (via a + * stat to identify the size and a single read). Otherwise we use successive + * reads until EOF. + * + * Returns a malloced buffer on success of length *size + * NULL on failure + */ +static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) { + struct stat sb; + char *data = NULL; + size_t allocated = 0, used = 0; + int bufsize = 8192; + +#ifdef _WIN32 + if (binary) + _setmode(_fileno(fp), _O_BINARY); + else + _setmode(_fileno(fp), _O_TEXT); +#endif + + if (fn && -1 != stat(fn, &sb)) { + data = malloc(allocated = sb.st_size); + bufsize = sb.st_size; + } else { + fn = NULL; + } + + do { + size_t len; + if (used + bufsize > allocated) { + allocated += bufsize; + data = realloc(data, allocated); + } + len = fread(data + used, 1, allocated - used, fp); + if (len > 0) + used += len; + } while (!feof(fp) && (fn == NULL || used < sb.st_size)); + + *size = used; + + return data; +} + +/* + * Creates and returns m_channel[0]. + * We initialise this on the first attempted read, which then slurps in + * all of stdin until EOF is met. + */ +mFILE *mstdin(void) { + if (m_channel[0]) + return m_channel[0]; + + m_channel[0] = mfcreate(NULL, 0); + if (NULL == m_channel[0]) return NULL; + m_channel[0]->fp = stdin; + return m_channel[0]; +} + +static void init_mstdin(void) { + static int done_stdin = 0; + if (done_stdin) + return; + + m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1); + m_channel[0]->mode = MF_READ; + done_stdin = 1; +} + +/* + * Creates and returns m_channel[1]. This is the fake for stdout. It starts as + * an empty buffer which is physically written out only when mfflush or + * mfclose are called. + */ +mFILE *mstdout(void) { + if (m_channel[1]) + return m_channel[1]; + + m_channel[1] = mfcreate(NULL, 0); + if (NULL == m_channel[1]) return NULL; + m_channel[1]->fp = stdout; + m_channel[1]->mode = MF_WRITE; + return m_channel[1]; +} + +/* + * Stderr as an mFILE. + * The code handles stderr by returning m_channel[2], but also checking + * for stderr in fprintf (the common usage of it) to auto-flush. + */ +mFILE *mstderr(void) { + if (m_channel[2]) + return m_channel[2]; + + m_channel[2] = mfcreate(NULL, 0); + if (NULL == m_channel[2]) return NULL; + m_channel[2]->fp = stderr; + m_channel[2]->mode = MF_WRITE; + return m_channel[2]; +} + + +/* + * For creating existing mFILE pointers directly from memory buffers. + */ +mFILE *mfcreate(char *data, int size) { + mFILE *mf = (mFILE *)malloc(sizeof(*mf)); + if (NULL == mf) return NULL; + mf->fp = NULL; + mf->data = data; + mf->alloced = size; + mf->size = size; + mf->eof = 0; + mf->offset = 0; + mf->flush_pos = 0; + mf->mode = MF_READ | MF_WRITE; + return mf; +} + +/* + * Recreate an existing mFILE to house new data/size. + * It also rewinds the file. + */ +void mfrecreate(mFILE *mf, char *data, int size) { + if (mf->data) + free(mf->data); + mf->data = data; + mf->size = size; + mf->alloced = size; + mf->eof = 0; + mf->offset = 0; + mf->flush_pos = 0; +} + + +/* + * Creates a new mFILE to contain the contents of the FILE pointer. + * This mFILE is purely for in-memory operations and has no links to the + * original FILE* it came from. It also doesn't close the FILE pointer. + * Consider using mfreopen() is you need different behaviour. + * + * Returns mFILE * on success + * NULL on failure. + */ +mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) { + mFILE *mf; + + /* Open using mfreopen() */ + if (NULL == (mf = mfreopen(path, mode_str, fp))) + return NULL; + + /* Disassociate from the input stream */ + mf->fp = NULL; + + return mf; +} + +/* + * Converts a FILE * to an mFILE *. + * Use this for wrapper functions to turn external prototypes requring + * FILE * as an argument into internal code using mFILE *. + */ +mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) { + mFILE *mf; + int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0; + + /* Parse mode: + * r = read file contents (if truncated => don't read) + * w = write on close + * a = position at end of buffer + * x = position at same location as the original fp, don't seek on flush + */ + if (strchr(mode_str, 'r')) + r = 1, mode |= MF_READ; + if (strchr(mode_str, 'w')) + w = 1, mode |= MF_WRITE | MF_TRUNC; + if (strchr(mode_str, 'a')) + w = a = 1, mode |= MF_WRITE | MF_APPEND; + if (strchr(mode_str, 'b')) + b = 1, mode |= MF_BINARY; + if (strchr(mode_str, 'x')) + x = 1; + if (strchr(mode_str, '+')) { + w = 1, mode |= MF_READ | MF_WRITE; + if (a) + r = 1; + } + + if (r) { + mf = mfcreate(NULL, 0); + if (NULL == mf) return NULL; + if (!(mode & MF_TRUNC)) { + mf->data = mfload(fp, path, &mf->size, b); + mf->alloced = mf->size; + if (!a) + fseek(fp, 0, SEEK_SET); + } + } else if (w) { + /* Write - initialise the data structures */ + mf = mfcreate(NULL, 0); + if (NULL == mf) return NULL; + } else { + fprintf(stderr, "Must specify either r, w or a for mode\n"); + return NULL; + } + mf->fp = fp; + mf->mode = mode; + + if (x) { + mf->mode |= MF_MODEX; + } + + if (a) { + mf->flush_pos = mf->size; + fseek(fp, 0, SEEK_END); + } + + return mf; +} + +/* + * Opens a file. If we have read access (r or a+) then it loads the entire + * file into memory. If We have write access then the pathname is stored. + * We do not actually write until an mfclose, which then checks this pathname. + */ +mFILE *mfopen(const char *path, const char *mode) { + FILE *fp; + + if (NULL == (fp = fopen(path, mode))) + return NULL; + return mfreopen(path, mode, fp); +} + +/* + * Closes an mFILE. If the filename is known (implying write access) then this + * also writes the data to disk. + * + * Stdout is handled by calling mfflush which writes to stdout if appropriate. + */ +int mfclose(mFILE *mf) { + if (!mf) + return -1; + + mfflush(mf); + + if (mf->fp) + fclose(mf->fp); + + mfdestroy(mf); + + return 0; +} + +/* + * Closes the file pointer contained within the mFILE without destroying + * the in-memory data. + */ +int mfdetach(mFILE *mf) { + if (!mf) + return -1; + + mfflush(mf); + + if (mf->fp) { + fclose(mf->fp); + mf->fp = NULL; + } + + return 0; +} + +/* + * Destroys an mFILE structure but does not flush or close it + */ +int mfdestroy(mFILE *mf) { + if (!mf) + return -1; + + if (mf->data) + free(mf->data); + free(mf); + + return 0; +} + +/* + * Steals that data out of an mFILE. The mFILE itself will be closed. + * It is up to the caller to free the stolen buffer. If size_out is + * not NULL, mf->size will be stored in it. + * This is more-or-less the opposite of mfcreate(). + */ + +void *mfsteal(mFILE *mf, size_t *size_out) { + void *data; + + if (!mf) return NULL; + + data = mf->data; + + if (NULL != size_out) *size_out = mf->size; + + mfdetach(mf); + mf->data = NULL; + mfdestroy(mf); + + return data; +} + +/* + * Seek/tell functions. Nothing more than updating and reporting an + * in-memory index. NB we can seek on stdin or stdout even provided we + * haven't been flushing. + */ +int mfseek(mFILE *mf, long offset, int whence) { + switch (whence) { + case SEEK_SET: + mf->offset = offset; + break; + case SEEK_CUR: + mf->offset += offset; + break; + case SEEK_END: + mf->offset = mf->size + offset; + break; + default: + errno = EINVAL; + return -1; + } + + mf->eof = 0; + return 0; +} + +long mftell(mFILE *mf) { + return mf->offset; +} + +void mrewind(mFILE *mf) { + mf->offset = 0; + mf->eof = 0; +} + +/* + * mftruncate is not directly a translation of ftruncate as the latter + * takes a file descriptor instead of a FILE *. It performs the analogous + * role though. + * + * If offset is -1 then the file is truncated to be the current file + * offset. + */ +void mftruncate(mFILE *mf, long offset) { + mf->size = offset != -1 ? offset : mf->offset; + if (mf->offset > mf->size) + mf->offset = mf->size; +} + +int mfeof(mFILE *mf) { + return mf->eof; +} + +/* + * mFILE read/write functions. Basically these turn fread/fwrite syntax + * into memcpy statements, with appropriate memory handling for writing. + */ +size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) { + size_t len; + char *cptr = (char *)ptr; + + if (mf == m_channel[0]) init_mstdin(); + + if (mf->size <= mf->offset) + return 0; + + len = size * nmemb <= mf->size - mf->offset + ? size * nmemb + : mf->size - mf->offset; + if (!size) + return 0; + + memcpy(cptr, &mf->data[mf->offset], len); + mf->offset += len; + + if (len != size * nmemb) { + mf->eof = 1; + } + + return len / size; +} + +size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) { + if (!(mf->mode & MF_WRITE)) + return 0; + + /* Append mode => forced all writes to end of file */ + if (mf->mode & MF_APPEND) + mf->offset = mf->size; + + /* Make sure we have enough room */ + while (size * nmemb + mf->offset > mf->alloced) { + size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024; + void * new_data = realloc(mf->data, new_alloced); + if (NULL == new_data) return 0; + mf->alloced = new_alloced; + mf->data = new_data; + } + + /* Record where we need to reflush from */ + if (mf->offset < mf->flush_pos) + mf->flush_pos = mf->offset; + + /* Copy the data over */ + memcpy(&mf->data[mf->offset], ptr, size * nmemb); + mf->offset += size * nmemb; + if (mf->size < mf->offset) + mf->size = mf->offset; + + return nmemb; +} + +int mfgetc(mFILE *mf) { + if (mf == m_channel[0]) init_mstdin(); + if (mf->offset < mf->size) { + return (unsigned char)mf->data[mf->offset++]; + } + + mf->eof = 1; + return -1; +} + +int mungetc(int c, mFILE *mf) { + if (mf->offset > 0) { + mf->data[--mf->offset] = c; + return c; + } + + mf->eof = 1; + return -1; +} + +char *mfgets(char *s, int size, mFILE *mf) { + int i; + + if (mf == m_channel[0]) init_mstdin(); + *s = 0; + for (i = 0; i < size-1;) { + if (mf->offset < mf->size) { + s[i] = mf->data[mf->offset++]; + if (s[i++] == '\n') + break; + } else { + mf->eof = 1; + break; + } + } + + s[i] = 0; + return i ? s : NULL; +} + +/* + * Flushes an mFILE. If this is a real open of a file in write mode then + * mFILE->fp will be set. We then write out any new data in mFILE since the + * last flush. We cannot tell what may have been modified as we don't keep + * track of that, so we typically rewrite out the entire file contents between + * the last flush_pos and the end of file. + * + * For stderr/stdout we also reset the offsets so we cannot modify things + * we've already output. + */ +int mfflush(mFILE *mf) { + if (!mf->fp) + return 0; + + /* FIXME: only do this when opened in write mode */ + if (mf == m_channel[1] || mf == m_channel[2]) { + if (mf->flush_pos < mf->size) { + size_t bytes = mf->size - mf->flush_pos; + if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes) + return -1; + if (0 != fflush(mf->fp)) + return -1; + } + + /* Stdout & stderr are non-seekable streams so throw away the data */ + mf->offset = mf->size = mf->flush_pos = 0; + } + + /* only flush when opened in write mode */ + if (mf->mode & MF_WRITE) { + if (mf->flush_pos < mf->size) { + size_t bytes = mf->size - mf->flush_pos; + if (!(mf->mode & MF_MODEX)) { + fseek(mf->fp, mf->flush_pos, SEEK_SET); + } + if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes) + return -1; + if (0 != fflush(mf->fp)) + return -1; + } + if (ftell(mf->fp) != -1 && + ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1) + return -1; + mf->flush_pos = mf->size; + } + + return 0; +} + +/* + * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to + * estimate how many additional bytes of storage will be required for the + * vsprintf to work. + */ +int mfprintf(mFILE *mf, char *fmt, ...) { + int ret; + size_t est_length; + va_list args; + + va_start(args, fmt); + est_length = vflen(fmt, args); + va_end(args); + while (est_length + mf->offset > mf->alloced) { + size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024; + void * new_data = realloc(mf->data, new_alloced); + if (NULL == new_data) return -1; + mf->alloced = new_alloced; + mf->data = new_data; + } + + va_start(args, fmt); + ret = vsprintf(&mf->data[mf->offset], fmt, args); + va_end(args); + + if (ret > 0) { + mf->offset += ret; + if (mf->size < mf->offset) + mf->size = mf->offset; + } + + if (mf->fp == stderr) { + /* Auto-flush for stderr */ + if (0 != mfflush(mf)) return -1; + } + + return ret; +} + +/* + * Converts an mFILE from binary to ascii mode by replacing all + * cr-nl with nl. + * + * Primarily used on windows when we've uncompressed a binary file which + * happens to be a text file (eg Experiment File). Previously we would have + * seeked back to the start and used _setmode(fileno(fp), _O_TEXT). + * + * Side effect: resets offset and flush_pos back to the start. + */ +void mfascii(mFILE *mf) { + size_t p1, p2; + + for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) { + if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') { + p2--; /* delete the \r */ + } + mf->data[p2] = mf->data[p1]; + } + mf->size = p2; + + mf->offset = mf->flush_pos = 0; +}