Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/io_lib/mFILE.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
| author | dawe |
|---|---|
| date | Tue, 07 Jun 2011 17:48:05 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d901c9f41a6a |
|---|---|
| 1 #ifdef HAVE_CONFIG_H | |
| 2 #include "io_lib_config.h" | |
| 3 #endif | |
| 4 | |
| 5 #include <stdio.h> | |
| 6 #include <stdlib.h> | |
| 7 #include <errno.h> | |
| 8 #include <string.h> | |
| 9 #include <sys/types.h> | |
| 10 #include <sys/stat.h> | |
| 11 #include <fcntl.h> | |
| 12 #include <unistd.h> | |
| 13 #include <stdarg.h> | |
| 14 | |
| 15 #include "io_lib/os.h" | |
| 16 #include "io_lib/mFILE.h" | |
| 17 #include "io_lib/vlen.h" | |
| 18 | |
| 19 /* | |
| 20 * This file contains memory-based versions of the most commonly used | |
| 21 * (by io_lib) stdio functions. | |
| 22 * | |
| 23 * Actual file IO takes place either on opening or closing an mFILE. | |
| 24 * | |
| 25 * Coupled to this are a bunch of rather scary macros which can be obtained | |
| 26 * by including stdio_hack.h. It is recommended though that you use mFILE.h | |
| 27 * instead and replace fopen with mfopen (etc). This is more or less | |
| 28 * mandatory if you wish to use both FILE and mFILE structs in a single file. | |
| 29 */ | |
| 30 | |
| 31 static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */ | |
| 32 | |
| 33 /* | |
| 34 * Reads the entirety of fp into memory. If 'fn' exists it is the filename | |
| 35 * associated with fp. This will be used for more optimal reading (via a | |
| 36 * stat to identify the size and a single read). Otherwise we use successive | |
| 37 * reads until EOF. | |
| 38 * | |
| 39 * Returns a malloced buffer on success of length *size | |
| 40 * NULL on failure | |
| 41 */ | |
| 42 static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) { | |
| 43 struct stat sb; | |
| 44 char *data = NULL; | |
| 45 size_t allocated = 0, used = 0; | |
| 46 int bufsize = 8192; | |
| 47 | |
| 48 #ifdef _WIN32 | |
| 49 if (binary) | |
| 50 _setmode(_fileno(fp), _O_BINARY); | |
| 51 else | |
| 52 _setmode(_fileno(fp), _O_TEXT); | |
| 53 #endif | |
| 54 | |
| 55 if (fn && -1 != stat(fn, &sb)) { | |
| 56 data = malloc(allocated = sb.st_size); | |
| 57 bufsize = sb.st_size; | |
| 58 } else { | |
| 59 fn = NULL; | |
| 60 } | |
| 61 | |
| 62 do { | |
| 63 size_t len; | |
| 64 if (used + bufsize > allocated) { | |
| 65 allocated += bufsize; | |
| 66 data = realloc(data, allocated); | |
| 67 } | |
| 68 len = fread(data + used, 1, allocated - used, fp); | |
| 69 if (len > 0) | |
| 70 used += len; | |
| 71 } while (!feof(fp) && (fn == NULL || used < sb.st_size)); | |
| 72 | |
| 73 *size = used; | |
| 74 | |
| 75 return data; | |
| 76 } | |
| 77 | |
| 78 /* | |
| 79 * Creates and returns m_channel[0]. | |
| 80 * We initialise this on the first attempted read, which then slurps in | |
| 81 * all of stdin until EOF is met. | |
| 82 */ | |
| 83 mFILE *mstdin(void) { | |
| 84 if (m_channel[0]) | |
| 85 return m_channel[0]; | |
| 86 | |
| 87 m_channel[0] = mfcreate(NULL, 0); | |
| 88 m_channel[0]->fp = stdin; | |
| 89 return m_channel[0]; | |
| 90 } | |
| 91 | |
| 92 static void init_mstdin(void) { | |
| 93 static int done_stdin = 0; | |
| 94 if (done_stdin) | |
| 95 return; | |
| 96 | |
| 97 m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1); | |
| 98 m_channel[0]->mode = MF_READ; | |
| 99 done_stdin = 1; | |
| 100 } | |
| 101 | |
| 102 /* | |
| 103 * Creates and returns m_channel[1]. This is the fake for stdout. It starts as | |
| 104 * an empty buffer which is physically written out only when mfflush or | |
| 105 * mfclose are called. | |
| 106 */ | |
| 107 mFILE *mstdout(void) { | |
| 108 if (m_channel[1]) | |
| 109 return m_channel[1]; | |
| 110 | |
| 111 m_channel[1] = mfcreate(NULL, 0); | |
| 112 m_channel[1]->fp = stdout; | |
| 113 m_channel[1]->mode = MF_WRITE; | |
| 114 return m_channel[1]; | |
| 115 } | |
| 116 | |
| 117 /* | |
| 118 * Stderr as an mFILE. | |
| 119 * The code handles stderr by returning m_channel[2], but also checking | |
| 120 * for stderr in fprintf (the common usage of it) to auto-flush. | |
| 121 */ | |
| 122 mFILE *mstderr(void) { | |
| 123 if (m_channel[2]) | |
| 124 return m_channel[2]; | |
| 125 | |
| 126 m_channel[2] = mfcreate(NULL, 0); | |
| 127 m_channel[2]->fp = stderr; | |
| 128 m_channel[2]->mode = MF_WRITE; | |
| 129 return m_channel[2]; | |
| 130 } | |
| 131 | |
| 132 | |
| 133 /* | |
| 134 * For creating existing mFILE pointers directly from memory buffers. | |
| 135 */ | |
| 136 mFILE *mfcreate(char *data, int size) { | |
| 137 mFILE *mf = (mFILE *)malloc(sizeof(*mf)); | |
| 138 mf->fp = NULL; | |
| 139 mf->data = data; | |
| 140 mf->alloced = size; | |
| 141 mf->size = size; | |
| 142 mf->eof = 0; | |
| 143 mf->offset = 0; | |
| 144 mf->flush_pos = 0; | |
| 145 mf->mode = MF_READ | MF_WRITE; | |
| 146 return mf; | |
| 147 } | |
| 148 | |
| 149 /* | |
| 150 * Recreate an existing mFILE to house new data/size. | |
| 151 * It also rewinds the file. | |
| 152 */ | |
| 153 void mfrecreate(mFILE *mf, char *data, int size) { | |
| 154 if (mf->data) | |
| 155 free(mf->data); | |
| 156 mf->data = data; | |
| 157 mf->size = size; | |
| 158 mf->alloced = size; | |
| 159 mf->eof = 0; | |
| 160 mf->offset = 0; | |
| 161 mf->flush_pos = 0; | |
| 162 } | |
| 163 | |
| 164 | |
| 165 /* | |
| 166 * Creates a new mFILE to contain the contents of the FILE pointer. | |
| 167 * This mFILE is purely for in-memory operations and has no links to the | |
| 168 * original FILE* it came from. It also doesn't close the FILE pointer. | |
| 169 * Consider using mfreopen() is you need different behaviour. | |
| 170 * | |
| 171 * Returns mFILE * on success | |
| 172 * NULL on failure. | |
| 173 */ | |
| 174 mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) { | |
| 175 mFILE *mf; | |
| 176 | |
| 177 /* Open using mfreopen() */ | |
| 178 if (NULL == (mf = mfreopen(path, mode_str, fp))) | |
| 179 return NULL; | |
| 180 | |
| 181 /* Disassociate from the input stream */ | |
| 182 mf->fp = NULL; | |
| 183 | |
| 184 return mf; | |
| 185 } | |
| 186 | |
| 187 /* | |
| 188 * Converts a FILE * to an mFILE *. | |
| 189 * Use this for wrapper functions to turn external prototypes requring | |
| 190 * FILE * as an argument into internal code using mFILE *. | |
| 191 */ | |
| 192 mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) { | |
| 193 mFILE *mf; | |
| 194 int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0; | |
| 195 | |
| 196 /* Parse mode: | |
| 197 * r = read file contents (if truncated => don't read) | |
| 198 * w = write on close | |
| 199 * a = position at end of buffer | |
| 200 * x = position at same location as the original fp | |
| 201 */ | |
| 202 if (strchr(mode_str, 'r')) | |
| 203 r = 1, mode |= MF_READ; | |
| 204 if (strchr(mode_str, 'w')) | |
| 205 w = 1, mode |= MF_WRITE | MF_TRUNC; | |
| 206 if (strchr(mode_str, 'a')) | |
| 207 w = a = 1, mode |= MF_WRITE | MF_APPEND; | |
| 208 if (strchr(mode_str, 'b')) | |
| 209 b = 1, mode |= MF_BINARY; | |
| 210 if (strchr(mode_str, 'x')) | |
| 211 x = 1; | |
| 212 if (strchr(mode_str, '+')) { | |
| 213 w = 1, mode |= MF_READ | MF_WRITE; | |
| 214 if (a) | |
| 215 r = 1; | |
| 216 } | |
| 217 | |
| 218 if (r) { | |
| 219 mf = mfcreate(NULL, 0); | |
| 220 if (!(mode & MF_TRUNC)) { | |
| 221 mf->data = mfload(fp, path, &mf->size, b); | |
| 222 mf->alloced = mf->size; | |
| 223 if (!a) | |
| 224 fseek(fp, 0, SEEK_SET); | |
| 225 } | |
| 226 } else { | |
| 227 /* Write - initialise the data structures */ | |
| 228 mf = mfcreate(NULL, 0); | |
| 229 } | |
| 230 mf->fp = fp; | |
| 231 mf->mode = mode; | |
| 232 | |
| 233 if (x) { | |
| 234 if (ftello(fp) != -1) { | |
| 235 mf->mode |= MF_MODEX; | |
| 236 } | |
| 237 } | |
| 238 | |
| 239 if (a) { | |
| 240 mf->flush_pos = mf->size; | |
| 241 fseek(fp, 0, SEEK_END); | |
| 242 } | |
| 243 | |
| 244 return mf; | |
| 245 } | |
| 246 | |
| 247 /* | |
| 248 * Opens a file. If we have read access (r or a+) then it loads the entire | |
| 249 * file into memory. If We have write access then the pathname is stored. | |
| 250 * We do not actually write until an mfclose, which then checks this pathname. | |
| 251 */ | |
| 252 mFILE *mfopen(const char *path, const char *mode) { | |
| 253 FILE *fp; | |
| 254 | |
| 255 if (NULL == (fp = fopen(path, mode))) | |
| 256 return NULL; | |
| 257 return mfreopen(path, mode, fp); | |
| 258 } | |
| 259 | |
| 260 /* | |
| 261 * Closes an mFILE. If the filename is known (implying write access) then this | |
| 262 * also writes the data to disk. | |
| 263 * | |
| 264 * Stdout is handled by calling mfflush which writes to stdout if appropriate. | |
| 265 */ | |
| 266 int mfclose(mFILE *mf) { | |
| 267 if (!mf) | |
| 268 return -1; | |
| 269 | |
| 270 mfflush(mf); | |
| 271 | |
| 272 if (mf->fp) | |
| 273 fclose(mf->fp); | |
| 274 | |
| 275 mfdestroy(mf); | |
| 276 | |
| 277 return 0; | |
| 278 } | |
| 279 | |
| 280 /* | |
| 281 * Closes the file pointer contained within the mFILE without destroying | |
| 282 * the in-memory data. | |
| 283 */ | |
| 284 int mfdetach(mFILE *mf) { | |
| 285 if (!mf) | |
| 286 return -1; | |
| 287 | |
| 288 mfflush(mf); | |
| 289 | |
| 290 if (mf->fp) { | |
| 291 fclose(mf->fp); | |
| 292 mf->fp = NULL; | |
| 293 } | |
| 294 | |
| 295 return 0; | |
| 296 } | |
| 297 | |
| 298 /* | |
| 299 * Destroys an mFILE structure but does not flush or close it | |
| 300 */ | |
| 301 int mfdestroy(mFILE *mf) { | |
| 302 if (!mf) | |
| 303 return -1; | |
| 304 | |
| 305 if (mf->data) | |
| 306 free(mf->data); | |
| 307 free(mf); | |
| 308 | |
| 309 return 0; | |
| 310 } | |
| 311 | |
| 312 /* | |
| 313 * Seek/tell functions. Nothing more than updating and reporting an | |
| 314 * in-memory index. NB we can seek on stdin or stdout even provided we | |
| 315 * haven't been flushing. | |
| 316 */ | |
| 317 int mfseek(mFILE *mf, long offset, int whence) { | |
| 318 switch (whence) { | |
| 319 case SEEK_SET: | |
| 320 mf->offset = offset; | |
| 321 break; | |
| 322 case SEEK_CUR: | |
| 323 mf->offset += offset; | |
| 324 break; | |
| 325 case SEEK_END: | |
| 326 mf->offset = mf->size + offset; | |
| 327 break; | |
| 328 default: | |
| 329 errno = EINVAL; | |
| 330 return -1; | |
| 331 } | |
| 332 | |
| 333 mf->eof = 0; | |
| 334 return 0; | |
| 335 } | |
| 336 | |
| 337 long mftell(mFILE *mf) { | |
| 338 return mf->offset; | |
| 339 } | |
| 340 | |
| 341 void mrewind(mFILE *mf) { | |
| 342 mf->offset = 0; | |
| 343 mf->eof = 0; | |
| 344 } | |
| 345 | |
| 346 /* | |
| 347 * mftruncate is not directly a translation of ftruncate as the latter | |
| 348 * takes a file descriptor instead of a FILE *. It performs the analogous | |
| 349 * role though. | |
| 350 * | |
| 351 * If offset is -1 then the file is truncated to be the current file | |
| 352 * offset. | |
| 353 */ | |
| 354 void mftruncate(mFILE *mf, long offset) { | |
| 355 mf->size = offset != -1 ? offset : mf->offset; | |
| 356 if (mf->offset > mf->size) | |
| 357 mf->offset = mf->size; | |
| 358 } | |
| 359 | |
| 360 int mfeof(mFILE *mf) { | |
| 361 return mf->eof; | |
| 362 } | |
| 363 | |
| 364 /* | |
| 365 * mFILE read/write functions. Basically these turn fread/fwrite syntax | |
| 366 * into memcpy statements, with appropriate memory handling for writing. | |
| 367 */ | |
| 368 size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) { | |
| 369 size_t len; | |
| 370 char *cptr = (char *)ptr; | |
| 371 | |
| 372 if (mf == m_channel[0]) init_mstdin(); | |
| 373 | |
| 374 if (mf->size <= mf->offset) | |
| 375 return 0; | |
| 376 | |
| 377 len = size * nmemb <= mf->size - mf->offset | |
| 378 ? size * nmemb | |
| 379 : mf->size - mf->offset; | |
| 380 if (!size) | |
| 381 return 0; | |
| 382 | |
| 383 memcpy(cptr, &mf->data[mf->offset], len); | |
| 384 mf->offset += len; | |
| 385 cptr += len; | |
| 386 | |
| 387 if (len != size * nmemb) { | |
| 388 mf->eof = 1; | |
| 389 } | |
| 390 | |
| 391 return len / size; | |
| 392 } | |
| 393 | |
| 394 size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) { | |
| 395 if (!(mf->mode & MF_WRITE)) | |
| 396 return 0; | |
| 397 | |
| 398 /* Append mode => forced all writes to end of file */ | |
| 399 if (mf->mode & MF_APPEND) | |
| 400 mf->offset = mf->size; | |
| 401 | |
| 402 /* Make sure we have enough room */ | |
| 403 while (size * nmemb + mf->offset > mf->alloced) { | |
| 404 mf->alloced = mf->alloced ? mf->alloced * 2 : 1024; | |
| 405 mf->data = (void *)realloc(mf->data, mf->alloced); | |
| 406 } | |
| 407 | |
| 408 /* Record where we need to reflush from */ | |
| 409 if (mf->offset < mf->flush_pos) | |
| 410 mf->flush_pos = mf->offset; | |
| 411 | |
| 412 /* Copy the data over */ | |
| 413 memcpy(&mf->data[mf->offset], ptr, size * nmemb); | |
| 414 mf->offset += size * nmemb; | |
| 415 if (mf->size < mf->offset) | |
| 416 mf->size = mf->offset; | |
| 417 | |
| 418 return nmemb; | |
| 419 } | |
| 420 | |
| 421 int mfgetc(mFILE *mf) { | |
| 422 if (mf == m_channel[0]) init_mstdin(); | |
| 423 if (mf->offset < mf->size) { | |
| 424 return (unsigned char)mf->data[mf->offset++]; | |
| 425 } | |
| 426 | |
| 427 mf->eof = 1; | |
| 428 return -1; | |
| 429 } | |
| 430 | |
| 431 int mungetc(int c, mFILE *mf) { | |
| 432 if (mf->offset > 0) { | |
| 433 mf->data[--mf->offset] = c; | |
| 434 return c; | |
| 435 } | |
| 436 | |
| 437 mf->eof = 1; | |
| 438 return -1; | |
| 439 } | |
| 440 | |
| 441 char *mfgets(char *s, int size, mFILE *mf) { | |
| 442 int i; | |
| 443 | |
| 444 if (mf == m_channel[0]) init_mstdin(); | |
| 445 *s = 0; | |
| 446 for (i = 0; i < size-1;) { | |
| 447 if (mf->offset < mf->size) { | |
| 448 s[i] = mf->data[mf->offset++]; | |
| 449 if (s[i++] == '\n') | |
| 450 break; | |
| 451 } else { | |
| 452 mf->eof = 1; | |
| 453 break; | |
| 454 } | |
| 455 } | |
| 456 | |
| 457 s[i] = 0; | |
| 458 return i ? s : NULL; | |
| 459 } | |
| 460 | |
| 461 /* | |
| 462 * Flushes an mFILE. If this is a real open of a file in write mode then | |
| 463 * mFILE->fp will be set. We then write out any new data in mFILE since the | |
| 464 * last flush. We cannot tell what may have been modified as we don't keep | |
| 465 * track of that, so we typically rewrite out the entire file contents between | |
| 466 * the last flush_pos and the end of file. | |
| 467 * | |
| 468 * For stderr/stdout we also reset the offsets so we cannot modify things | |
| 469 * we've already output. | |
| 470 */ | |
| 471 int mfflush(mFILE *mf) { | |
| 472 if (!mf->fp) | |
| 473 return 0; | |
| 474 | |
| 475 /* FIXME: only do this when opened in write mode */ | |
| 476 if (mf == m_channel[1] || mf == m_channel[2]) { | |
| 477 fwrite(mf->data + mf->flush_pos, 1, mf->size - mf->flush_pos, mf->fp); | |
| 478 fflush(mf->fp); | |
| 479 | |
| 480 /* Stdout & stderr are non-seekable streams so throw away the data */ | |
| 481 mf->offset = mf->size = mf->flush_pos = 0; | |
| 482 } | |
| 483 | |
| 484 /* only flush when opened in write mode */ | |
| 485 if (mf->mode & MF_WRITE) { | |
| 486 if (mf->flush_pos < mf->size) { | |
| 487 if (!mf->mode & MF_MODEX) | |
| 488 fseek(mf->fp, mf->flush_pos, SEEK_SET); | |
| 489 fwrite(mf->data + mf->flush_pos, 1, | |
| 490 mf->size - mf->flush_pos, mf->fp); | |
| 491 fflush(mf->fp); | |
| 492 } | |
| 493 ftruncate(fileno(mf->fp), ftell(mf->fp)); | |
| 494 mf->flush_pos = mf->size; | |
| 495 } | |
| 496 | |
| 497 return 0; | |
| 498 } | |
| 499 | |
| 500 /* | |
| 501 * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to | |
| 502 * estimate how many additional bytes of storage will be required for the | |
| 503 * vsprintf to work. | |
| 504 */ | |
| 505 int mfprintf(mFILE *mf, char *fmt, ...) { | |
| 506 int ret; | |
| 507 size_t est_length; | |
| 508 va_list args; | |
| 509 | |
| 510 va_start(args, fmt); | |
| 511 est_length = vflen(fmt, args); | |
| 512 va_end(args); | |
| 513 while (est_length + mf->offset > mf->alloced) { | |
| 514 mf->alloced = mf->alloced ? mf->alloced * 2 : 1024; | |
| 515 mf->data = (void *)realloc(mf->data, mf->alloced); | |
| 516 } | |
| 517 | |
| 518 va_start(args, fmt); | |
| 519 ret = vsprintf(&mf->data[mf->offset], fmt, args); | |
| 520 va_end(args); | |
| 521 | |
| 522 if (ret > 0) { | |
| 523 mf->offset += ret; | |
| 524 if (mf->size < mf->offset) | |
| 525 mf->size = mf->offset; | |
| 526 } | |
| 527 | |
| 528 if (mf->fp == stderr) { | |
| 529 /* Auto-flush for stderr */ | |
| 530 mfflush(mf); | |
| 531 } | |
| 532 | |
| 533 return ret; | |
| 534 } | |
| 535 | |
| 536 /* | |
| 537 * Converts an mFILE from binary to ascii mode by replacing all | |
| 538 * cr-nl with nl. | |
| 539 * | |
| 540 * Primarily used on windows when we've uncompressed a binary file which | |
| 541 * happens to be a text file (eg Experiment File). Previously we would have | |
| 542 * seeked back to the start and used _setmode(fileno(fp), _O_TEXT). | |
| 543 * | |
| 544 * Side effect: resets offset and flush_pos back to the start. | |
| 545 */ | |
| 546 void mfascii(mFILE *mf) { | |
| 547 size_t p1, p2; | |
| 548 | |
| 549 for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) { | |
| 550 if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') { | |
| 551 p2--; /* delete the \r */ | |
| 552 } | |
| 553 mf->data[p2] = mf->data[p1]; | |
| 554 } | |
| 555 mf->size = p2; | |
| 556 | |
| 557 mf->offset = mf->flush_pos = 0; | |
| 558 } |
