comparison srf2fastq/io_lib-1.12.2/io_lib/mFILE.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 #ifdef HAVE_CONFIG_H
2 #include "io_lib_config.h"
3 #endif
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <errno.h>
8 #include <string.h>
9 #include <sys/types.h>
10 #include <sys/stat.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <stdarg.h>
14
15 #include "io_lib/os.h"
16 #include "io_lib/mFILE.h"
17 #include "io_lib/vlen.h"
18
19 /*
20 * This file contains memory-based versions of the most commonly used
21 * (by io_lib) stdio functions.
22 *
23 * Actual file IO takes place either on opening or closing an mFILE.
24 *
25 * Coupled to this are a bunch of rather scary macros which can be obtained
26 * by including stdio_hack.h. It is recommended though that you use mFILE.h
27 * instead and replace fopen with mfopen (etc). This is more or less
28 * mandatory if you wish to use both FILE and mFILE structs in a single file.
29 */
30
31 static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */
32
33 /*
34 * Reads the entirety of fp into memory. If 'fn' exists it is the filename
35 * associated with fp. This will be used for more optimal reading (via a
36 * stat to identify the size and a single read). Otherwise we use successive
37 * reads until EOF.
38 *
39 * Returns a malloced buffer on success of length *size
40 * NULL on failure
41 */
42 static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) {
43 struct stat sb;
44 char *data = NULL;
45 size_t allocated = 0, used = 0;
46 int bufsize = 8192;
47
48 #ifdef _WIN32
49 if (binary)
50 _setmode(_fileno(fp), _O_BINARY);
51 else
52 _setmode(_fileno(fp), _O_TEXT);
53 #endif
54
55 if (fn && -1 != stat(fn, &sb)) {
56 data = malloc(allocated = sb.st_size);
57 bufsize = sb.st_size;
58 } else {
59 fn = NULL;
60 }
61
62 do {
63 size_t len;
64 if (used + bufsize > allocated) {
65 allocated += bufsize;
66 data = realloc(data, allocated);
67 }
68 len = fread(data + used, 1, allocated - used, fp);
69 if (len > 0)
70 used += len;
71 } while (!feof(fp) && (fn == NULL || used < sb.st_size));
72
73 *size = used;
74
75 return data;
76 }
77
78 /*
79 * Creates and returns m_channel[0].
80 * We initialise this on the first attempted read, which then slurps in
81 * all of stdin until EOF is met.
82 */
83 mFILE *mstdin(void) {
84 if (m_channel[0])
85 return m_channel[0];
86
87 m_channel[0] = mfcreate(NULL, 0);
88 m_channel[0]->fp = stdin;
89 return m_channel[0];
90 }
91
92 static void init_mstdin(void) {
93 static int done_stdin = 0;
94 if (done_stdin)
95 return;
96
97 m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1);
98 m_channel[0]->mode = MF_READ;
99 done_stdin = 1;
100 }
101
102 /*
103 * Creates and returns m_channel[1]. This is the fake for stdout. It starts as
104 * an empty buffer which is physically written out only when mfflush or
105 * mfclose are called.
106 */
107 mFILE *mstdout(void) {
108 if (m_channel[1])
109 return m_channel[1];
110
111 m_channel[1] = mfcreate(NULL, 0);
112 m_channel[1]->fp = stdout;
113 m_channel[1]->mode = MF_WRITE;
114 return m_channel[1];
115 }
116
117 /*
118 * Stderr as an mFILE.
119 * The code handles stderr by returning m_channel[2], but also checking
120 * for stderr in fprintf (the common usage of it) to auto-flush.
121 */
122 mFILE *mstderr(void) {
123 if (m_channel[2])
124 return m_channel[2];
125
126 m_channel[2] = mfcreate(NULL, 0);
127 m_channel[2]->fp = stderr;
128 m_channel[2]->mode = MF_WRITE;
129 return m_channel[2];
130 }
131
132
133 /*
134 * For creating existing mFILE pointers directly from memory buffers.
135 */
136 mFILE *mfcreate(char *data, int size) {
137 mFILE *mf = (mFILE *)malloc(sizeof(*mf));
138 mf->fp = NULL;
139 mf->data = data;
140 mf->alloced = size;
141 mf->size = size;
142 mf->eof = 0;
143 mf->offset = 0;
144 mf->flush_pos = 0;
145 mf->mode = MF_READ | MF_WRITE;
146 return mf;
147 }
148
149 /*
150 * Recreate an existing mFILE to house new data/size.
151 * It also rewinds the file.
152 */
153 void mfrecreate(mFILE *mf, char *data, int size) {
154 if (mf->data)
155 free(mf->data);
156 mf->data = data;
157 mf->size = size;
158 mf->alloced = size;
159 mf->eof = 0;
160 mf->offset = 0;
161 mf->flush_pos = 0;
162 }
163
164
165 /*
166 * Creates a new mFILE to contain the contents of the FILE pointer.
167 * This mFILE is purely for in-memory operations and has no links to the
168 * original FILE* it came from. It also doesn't close the FILE pointer.
169 * Consider using mfreopen() is you need different behaviour.
170 *
171 * Returns mFILE * on success
172 * NULL on failure.
173 */
174 mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) {
175 mFILE *mf;
176
177 /* Open using mfreopen() */
178 if (NULL == (mf = mfreopen(path, mode_str, fp)))
179 return NULL;
180
181 /* Disassociate from the input stream */
182 mf->fp = NULL;
183
184 return mf;
185 }
186
187 /*
188 * Converts a FILE * to an mFILE *.
189 * Use this for wrapper functions to turn external prototypes requring
190 * FILE * as an argument into internal code using mFILE *.
191 */
192 mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) {
193 mFILE *mf;
194 int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0;
195
196 /* Parse mode:
197 * r = read file contents (if truncated => don't read)
198 * w = write on close
199 * a = position at end of buffer
200 * x = position at same location as the original fp
201 */
202 if (strchr(mode_str, 'r'))
203 r = 1, mode |= MF_READ;
204 if (strchr(mode_str, 'w'))
205 w = 1, mode |= MF_WRITE | MF_TRUNC;
206 if (strchr(mode_str, 'a'))
207 w = a = 1, mode |= MF_WRITE | MF_APPEND;
208 if (strchr(mode_str, 'b'))
209 b = 1, mode |= MF_BINARY;
210 if (strchr(mode_str, 'x'))
211 x = 1;
212 if (strchr(mode_str, '+')) {
213 w = 1, mode |= MF_READ | MF_WRITE;
214 if (a)
215 r = 1;
216 }
217
218 if (r) {
219 mf = mfcreate(NULL, 0);
220 if (!(mode & MF_TRUNC)) {
221 mf->data = mfload(fp, path, &mf->size, b);
222 mf->alloced = mf->size;
223 if (!a)
224 fseek(fp, 0, SEEK_SET);
225 }
226 } else {
227 /* Write - initialise the data structures */
228 mf = mfcreate(NULL, 0);
229 }
230 mf->fp = fp;
231 mf->mode = mode;
232
233 if (x) {
234 if (ftello(fp) != -1) {
235 mf->mode |= MF_MODEX;
236 }
237 }
238
239 if (a) {
240 mf->flush_pos = mf->size;
241 fseek(fp, 0, SEEK_END);
242 }
243
244 return mf;
245 }
246
247 /*
248 * Opens a file. If we have read access (r or a+) then it loads the entire
249 * file into memory. If We have write access then the pathname is stored.
250 * We do not actually write until an mfclose, which then checks this pathname.
251 */
252 mFILE *mfopen(const char *path, const char *mode) {
253 FILE *fp;
254
255 if (NULL == (fp = fopen(path, mode)))
256 return NULL;
257 return mfreopen(path, mode, fp);
258 }
259
260 /*
261 * Closes an mFILE. If the filename is known (implying write access) then this
262 * also writes the data to disk.
263 *
264 * Stdout is handled by calling mfflush which writes to stdout if appropriate.
265 */
266 int mfclose(mFILE *mf) {
267 if (!mf)
268 return -1;
269
270 mfflush(mf);
271
272 if (mf->fp)
273 fclose(mf->fp);
274
275 mfdestroy(mf);
276
277 return 0;
278 }
279
280 /*
281 * Closes the file pointer contained within the mFILE without destroying
282 * the in-memory data.
283 */
284 int mfdetach(mFILE *mf) {
285 if (!mf)
286 return -1;
287
288 mfflush(mf);
289
290 if (mf->fp) {
291 fclose(mf->fp);
292 mf->fp = NULL;
293 }
294
295 return 0;
296 }
297
298 /*
299 * Destroys an mFILE structure but does not flush or close it
300 */
301 int mfdestroy(mFILE *mf) {
302 if (!mf)
303 return -1;
304
305 if (mf->data)
306 free(mf->data);
307 free(mf);
308
309 return 0;
310 }
311
312 /*
313 * Seek/tell functions. Nothing more than updating and reporting an
314 * in-memory index. NB we can seek on stdin or stdout even provided we
315 * haven't been flushing.
316 */
317 int mfseek(mFILE *mf, long offset, int whence) {
318 switch (whence) {
319 case SEEK_SET:
320 mf->offset = offset;
321 break;
322 case SEEK_CUR:
323 mf->offset += offset;
324 break;
325 case SEEK_END:
326 mf->offset = mf->size + offset;
327 break;
328 default:
329 errno = EINVAL;
330 return -1;
331 }
332
333 mf->eof = 0;
334 return 0;
335 }
336
337 long mftell(mFILE *mf) {
338 return mf->offset;
339 }
340
341 void mrewind(mFILE *mf) {
342 mf->offset = 0;
343 mf->eof = 0;
344 }
345
346 /*
347 * mftruncate is not directly a translation of ftruncate as the latter
348 * takes a file descriptor instead of a FILE *. It performs the analogous
349 * role though.
350 *
351 * If offset is -1 then the file is truncated to be the current file
352 * offset.
353 */
354 void mftruncate(mFILE *mf, long offset) {
355 mf->size = offset != -1 ? offset : mf->offset;
356 if (mf->offset > mf->size)
357 mf->offset = mf->size;
358 }
359
360 int mfeof(mFILE *mf) {
361 return mf->eof;
362 }
363
364 /*
365 * mFILE read/write functions. Basically these turn fread/fwrite syntax
366 * into memcpy statements, with appropriate memory handling for writing.
367 */
368 size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
369 size_t len;
370 char *cptr = (char *)ptr;
371
372 if (mf == m_channel[0]) init_mstdin();
373
374 if (mf->size <= mf->offset)
375 return 0;
376
377 len = size * nmemb <= mf->size - mf->offset
378 ? size * nmemb
379 : mf->size - mf->offset;
380 if (!size)
381 return 0;
382
383 memcpy(cptr, &mf->data[mf->offset], len);
384 mf->offset += len;
385 cptr += len;
386
387 if (len != size * nmemb) {
388 mf->eof = 1;
389 }
390
391 return len / size;
392 }
393
394 size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
395 if (!(mf->mode & MF_WRITE))
396 return 0;
397
398 /* Append mode => forced all writes to end of file */
399 if (mf->mode & MF_APPEND)
400 mf->offset = mf->size;
401
402 /* Make sure we have enough room */
403 while (size * nmemb + mf->offset > mf->alloced) {
404 mf->alloced = mf->alloced ? mf->alloced * 2 : 1024;
405 mf->data = (void *)realloc(mf->data, mf->alloced);
406 }
407
408 /* Record where we need to reflush from */
409 if (mf->offset < mf->flush_pos)
410 mf->flush_pos = mf->offset;
411
412 /* Copy the data over */
413 memcpy(&mf->data[mf->offset], ptr, size * nmemb);
414 mf->offset += size * nmemb;
415 if (mf->size < mf->offset)
416 mf->size = mf->offset;
417
418 return nmemb;
419 }
420
421 int mfgetc(mFILE *mf) {
422 if (mf == m_channel[0]) init_mstdin();
423 if (mf->offset < mf->size) {
424 return (unsigned char)mf->data[mf->offset++];
425 }
426
427 mf->eof = 1;
428 return -1;
429 }
430
431 int mungetc(int c, mFILE *mf) {
432 if (mf->offset > 0) {
433 mf->data[--mf->offset] = c;
434 return c;
435 }
436
437 mf->eof = 1;
438 return -1;
439 }
440
441 char *mfgets(char *s, int size, mFILE *mf) {
442 int i;
443
444 if (mf == m_channel[0]) init_mstdin();
445 *s = 0;
446 for (i = 0; i < size-1;) {
447 if (mf->offset < mf->size) {
448 s[i] = mf->data[mf->offset++];
449 if (s[i++] == '\n')
450 break;
451 } else {
452 mf->eof = 1;
453 break;
454 }
455 }
456
457 s[i] = 0;
458 return i ? s : NULL;
459 }
460
461 /*
462 * Flushes an mFILE. If this is a real open of a file in write mode then
463 * mFILE->fp will be set. We then write out any new data in mFILE since the
464 * last flush. We cannot tell what may have been modified as we don't keep
465 * track of that, so we typically rewrite out the entire file contents between
466 * the last flush_pos and the end of file.
467 *
468 * For stderr/stdout we also reset the offsets so we cannot modify things
469 * we've already output.
470 */
471 int mfflush(mFILE *mf) {
472 if (!mf->fp)
473 return 0;
474
475 /* FIXME: only do this when opened in write mode */
476 if (mf == m_channel[1] || mf == m_channel[2]) {
477 fwrite(mf->data + mf->flush_pos, 1, mf->size - mf->flush_pos, mf->fp);
478 fflush(mf->fp);
479
480 /* Stdout & stderr are non-seekable streams so throw away the data */
481 mf->offset = mf->size = mf->flush_pos = 0;
482 }
483
484 /* only flush when opened in write mode */
485 if (mf->mode & MF_WRITE) {
486 if (mf->flush_pos < mf->size) {
487 if (!mf->mode & MF_MODEX)
488 fseek(mf->fp, mf->flush_pos, SEEK_SET);
489 fwrite(mf->data + mf->flush_pos, 1,
490 mf->size - mf->flush_pos, mf->fp);
491 fflush(mf->fp);
492 }
493 ftruncate(fileno(mf->fp), ftell(mf->fp));
494 mf->flush_pos = mf->size;
495 }
496
497 return 0;
498 }
499
500 /*
501 * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to
502 * estimate how many additional bytes of storage will be required for the
503 * vsprintf to work.
504 */
505 int mfprintf(mFILE *mf, char *fmt, ...) {
506 int ret;
507 size_t est_length;
508 va_list args;
509
510 va_start(args, fmt);
511 est_length = vflen(fmt, args);
512 va_end(args);
513 while (est_length + mf->offset > mf->alloced) {
514 mf->alloced = mf->alloced ? mf->alloced * 2 : 1024;
515 mf->data = (void *)realloc(mf->data, mf->alloced);
516 }
517
518 va_start(args, fmt);
519 ret = vsprintf(&mf->data[mf->offset], fmt, args);
520 va_end(args);
521
522 if (ret > 0) {
523 mf->offset += ret;
524 if (mf->size < mf->offset)
525 mf->size = mf->offset;
526 }
527
528 if (mf->fp == stderr) {
529 /* Auto-flush for stderr */
530 mfflush(mf);
531 }
532
533 return ret;
534 }
535
536 /*
537 * Converts an mFILE from binary to ascii mode by replacing all
538 * cr-nl with nl.
539 *
540 * Primarily used on windows when we've uncompressed a binary file which
541 * happens to be a text file (eg Experiment File). Previously we would have
542 * seeked back to the start and used _setmode(fileno(fp), _O_TEXT).
543 *
544 * Side effect: resets offset and flush_pos back to the start.
545 */
546 void mfascii(mFILE *mf) {
547 size_t p1, p2;
548
549 for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) {
550 if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') {
551 p2--; /* delete the \r */
552 }
553 mf->data[p2] = mf->data[p1];
554 }
555 mf->size = p2;
556
557 mf->offset = mf->flush_pos = 0;
558 }