Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/io_lib/mFILE.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 #ifdef HAVE_CONFIG_H | |
2 #include "io_lib_config.h" | |
3 #endif | |
4 | |
5 #include <stdio.h> | |
6 #include <stdlib.h> | |
7 #include <errno.h> | |
8 #include <string.h> | |
9 #include <sys/types.h> | |
10 #include <sys/stat.h> | |
11 #include <fcntl.h> | |
12 #include <unistd.h> | |
13 #include <stdarg.h> | |
14 | |
15 #include "io_lib/os.h" | |
16 #include "io_lib/mFILE.h" | |
17 #include "io_lib/vlen.h" | |
18 | |
19 /* | |
20 * This file contains memory-based versions of the most commonly used | |
21 * (by io_lib) stdio functions. | |
22 * | |
23 * Actual file IO takes place either on opening or closing an mFILE. | |
24 * | |
25 * Coupled to this are a bunch of rather scary macros which can be obtained | |
26 * by including stdio_hack.h. It is recommended though that you use mFILE.h | |
27 * instead and replace fopen with mfopen (etc). This is more or less | |
28 * mandatory if you wish to use both FILE and mFILE structs in a single file. | |
29 */ | |
30 | |
31 static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */ | |
32 | |
33 /* | |
34 * Reads the entirety of fp into memory. If 'fn' exists it is the filename | |
35 * associated with fp. This will be used for more optimal reading (via a | |
36 * stat to identify the size and a single read). Otherwise we use successive | |
37 * reads until EOF. | |
38 * | |
39 * Returns a malloced buffer on success of length *size | |
40 * NULL on failure | |
41 */ | |
42 static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) { | |
43 struct stat sb; | |
44 char *data = NULL; | |
45 size_t allocated = 0, used = 0; | |
46 int bufsize = 8192; | |
47 | |
48 #ifdef _WIN32 | |
49 if (binary) | |
50 _setmode(_fileno(fp), _O_BINARY); | |
51 else | |
52 _setmode(_fileno(fp), _O_TEXT); | |
53 #endif | |
54 | |
55 if (fn && -1 != stat(fn, &sb)) { | |
56 data = malloc(allocated = sb.st_size); | |
57 bufsize = sb.st_size; | |
58 } else { | |
59 fn = NULL; | |
60 } | |
61 | |
62 do { | |
63 size_t len; | |
64 if (used + bufsize > allocated) { | |
65 allocated += bufsize; | |
66 data = realloc(data, allocated); | |
67 } | |
68 len = fread(data + used, 1, allocated - used, fp); | |
69 if (len > 0) | |
70 used += len; | |
71 } while (!feof(fp) && (fn == NULL || used < sb.st_size)); | |
72 | |
73 *size = used; | |
74 | |
75 return data; | |
76 } | |
77 | |
78 /* | |
79 * Creates and returns m_channel[0]. | |
80 * We initialise this on the first attempted read, which then slurps in | |
81 * all of stdin until EOF is met. | |
82 */ | |
83 mFILE *mstdin(void) { | |
84 if (m_channel[0]) | |
85 return m_channel[0]; | |
86 | |
87 m_channel[0] = mfcreate(NULL, 0); | |
88 m_channel[0]->fp = stdin; | |
89 return m_channel[0]; | |
90 } | |
91 | |
92 static void init_mstdin(void) { | |
93 static int done_stdin = 0; | |
94 if (done_stdin) | |
95 return; | |
96 | |
97 m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1); | |
98 m_channel[0]->mode = MF_READ; | |
99 done_stdin = 1; | |
100 } | |
101 | |
102 /* | |
103 * Creates and returns m_channel[1]. This is the fake for stdout. It starts as | |
104 * an empty buffer which is physically written out only when mfflush or | |
105 * mfclose are called. | |
106 */ | |
107 mFILE *mstdout(void) { | |
108 if (m_channel[1]) | |
109 return m_channel[1]; | |
110 | |
111 m_channel[1] = mfcreate(NULL, 0); | |
112 m_channel[1]->fp = stdout; | |
113 m_channel[1]->mode = MF_WRITE; | |
114 return m_channel[1]; | |
115 } | |
116 | |
117 /* | |
118 * Stderr as an mFILE. | |
119 * The code handles stderr by returning m_channel[2], but also checking | |
120 * for stderr in fprintf (the common usage of it) to auto-flush. | |
121 */ | |
122 mFILE *mstderr(void) { | |
123 if (m_channel[2]) | |
124 return m_channel[2]; | |
125 | |
126 m_channel[2] = mfcreate(NULL, 0); | |
127 m_channel[2]->fp = stderr; | |
128 m_channel[2]->mode = MF_WRITE; | |
129 return m_channel[2]; | |
130 } | |
131 | |
132 | |
133 /* | |
134 * For creating existing mFILE pointers directly from memory buffers. | |
135 */ | |
136 mFILE *mfcreate(char *data, int size) { | |
137 mFILE *mf = (mFILE *)malloc(sizeof(*mf)); | |
138 mf->fp = NULL; | |
139 mf->data = data; | |
140 mf->alloced = size; | |
141 mf->size = size; | |
142 mf->eof = 0; | |
143 mf->offset = 0; | |
144 mf->flush_pos = 0; | |
145 mf->mode = MF_READ | MF_WRITE; | |
146 return mf; | |
147 } | |
148 | |
149 /* | |
150 * Recreate an existing mFILE to house new data/size. | |
151 * It also rewinds the file. | |
152 */ | |
153 void mfrecreate(mFILE *mf, char *data, int size) { | |
154 if (mf->data) | |
155 free(mf->data); | |
156 mf->data = data; | |
157 mf->size = size; | |
158 mf->alloced = size; | |
159 mf->eof = 0; | |
160 mf->offset = 0; | |
161 mf->flush_pos = 0; | |
162 } | |
163 | |
164 | |
165 /* | |
166 * Creates a new mFILE to contain the contents of the FILE pointer. | |
167 * This mFILE is purely for in-memory operations and has no links to the | |
168 * original FILE* it came from. It also doesn't close the FILE pointer. | |
169 * Consider using mfreopen() is you need different behaviour. | |
170 * | |
171 * Returns mFILE * on success | |
172 * NULL on failure. | |
173 */ | |
174 mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) { | |
175 mFILE *mf; | |
176 | |
177 /* Open using mfreopen() */ | |
178 if (NULL == (mf = mfreopen(path, mode_str, fp))) | |
179 return NULL; | |
180 | |
181 /* Disassociate from the input stream */ | |
182 mf->fp = NULL; | |
183 | |
184 return mf; | |
185 } | |
186 | |
187 /* | |
188 * Converts a FILE * to an mFILE *. | |
189 * Use this for wrapper functions to turn external prototypes requring | |
190 * FILE * as an argument into internal code using mFILE *. | |
191 */ | |
192 mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) { | |
193 mFILE *mf; | |
194 int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0; | |
195 | |
196 /* Parse mode: | |
197 * r = read file contents (if truncated => don't read) | |
198 * w = write on close | |
199 * a = position at end of buffer | |
200 * x = position at same location as the original fp | |
201 */ | |
202 if (strchr(mode_str, 'r')) | |
203 r = 1, mode |= MF_READ; | |
204 if (strchr(mode_str, 'w')) | |
205 w = 1, mode |= MF_WRITE | MF_TRUNC; | |
206 if (strchr(mode_str, 'a')) | |
207 w = a = 1, mode |= MF_WRITE | MF_APPEND; | |
208 if (strchr(mode_str, 'b')) | |
209 b = 1, mode |= MF_BINARY; | |
210 if (strchr(mode_str, 'x')) | |
211 x = 1; | |
212 if (strchr(mode_str, '+')) { | |
213 w = 1, mode |= MF_READ | MF_WRITE; | |
214 if (a) | |
215 r = 1; | |
216 } | |
217 | |
218 if (r) { | |
219 mf = mfcreate(NULL, 0); | |
220 if (!(mode & MF_TRUNC)) { | |
221 mf->data = mfload(fp, path, &mf->size, b); | |
222 mf->alloced = mf->size; | |
223 if (!a) | |
224 fseek(fp, 0, SEEK_SET); | |
225 } | |
226 } else { | |
227 /* Write - initialise the data structures */ | |
228 mf = mfcreate(NULL, 0); | |
229 } | |
230 mf->fp = fp; | |
231 mf->mode = mode; | |
232 | |
233 if (x) { | |
234 if (ftello(fp) != -1) { | |
235 mf->mode |= MF_MODEX; | |
236 } | |
237 } | |
238 | |
239 if (a) { | |
240 mf->flush_pos = mf->size; | |
241 fseek(fp, 0, SEEK_END); | |
242 } | |
243 | |
244 return mf; | |
245 } | |
246 | |
247 /* | |
248 * Opens a file. If we have read access (r or a+) then it loads the entire | |
249 * file into memory. If We have write access then the pathname is stored. | |
250 * We do not actually write until an mfclose, which then checks this pathname. | |
251 */ | |
252 mFILE *mfopen(const char *path, const char *mode) { | |
253 FILE *fp; | |
254 | |
255 if (NULL == (fp = fopen(path, mode))) | |
256 return NULL; | |
257 return mfreopen(path, mode, fp); | |
258 } | |
259 | |
260 /* | |
261 * Closes an mFILE. If the filename is known (implying write access) then this | |
262 * also writes the data to disk. | |
263 * | |
264 * Stdout is handled by calling mfflush which writes to stdout if appropriate. | |
265 */ | |
266 int mfclose(mFILE *mf) { | |
267 if (!mf) | |
268 return -1; | |
269 | |
270 mfflush(mf); | |
271 | |
272 if (mf->fp) | |
273 fclose(mf->fp); | |
274 | |
275 mfdestroy(mf); | |
276 | |
277 return 0; | |
278 } | |
279 | |
280 /* | |
281 * Closes the file pointer contained within the mFILE without destroying | |
282 * the in-memory data. | |
283 */ | |
284 int mfdetach(mFILE *mf) { | |
285 if (!mf) | |
286 return -1; | |
287 | |
288 mfflush(mf); | |
289 | |
290 if (mf->fp) { | |
291 fclose(mf->fp); | |
292 mf->fp = NULL; | |
293 } | |
294 | |
295 return 0; | |
296 } | |
297 | |
298 /* | |
299 * Destroys an mFILE structure but does not flush or close it | |
300 */ | |
301 int mfdestroy(mFILE *mf) { | |
302 if (!mf) | |
303 return -1; | |
304 | |
305 if (mf->data) | |
306 free(mf->data); | |
307 free(mf); | |
308 | |
309 return 0; | |
310 } | |
311 | |
312 /* | |
313 * Seek/tell functions. Nothing more than updating and reporting an | |
314 * in-memory index. NB we can seek on stdin or stdout even provided we | |
315 * haven't been flushing. | |
316 */ | |
317 int mfseek(mFILE *mf, long offset, int whence) { | |
318 switch (whence) { | |
319 case SEEK_SET: | |
320 mf->offset = offset; | |
321 break; | |
322 case SEEK_CUR: | |
323 mf->offset += offset; | |
324 break; | |
325 case SEEK_END: | |
326 mf->offset = mf->size + offset; | |
327 break; | |
328 default: | |
329 errno = EINVAL; | |
330 return -1; | |
331 } | |
332 | |
333 mf->eof = 0; | |
334 return 0; | |
335 } | |
336 | |
337 long mftell(mFILE *mf) { | |
338 return mf->offset; | |
339 } | |
340 | |
341 void mrewind(mFILE *mf) { | |
342 mf->offset = 0; | |
343 mf->eof = 0; | |
344 } | |
345 | |
346 /* | |
347 * mftruncate is not directly a translation of ftruncate as the latter | |
348 * takes a file descriptor instead of a FILE *. It performs the analogous | |
349 * role though. | |
350 * | |
351 * If offset is -1 then the file is truncated to be the current file | |
352 * offset. | |
353 */ | |
354 void mftruncate(mFILE *mf, long offset) { | |
355 mf->size = offset != -1 ? offset : mf->offset; | |
356 if (mf->offset > mf->size) | |
357 mf->offset = mf->size; | |
358 } | |
359 | |
360 int mfeof(mFILE *mf) { | |
361 return mf->eof; | |
362 } | |
363 | |
364 /* | |
365 * mFILE read/write functions. Basically these turn fread/fwrite syntax | |
366 * into memcpy statements, with appropriate memory handling for writing. | |
367 */ | |
368 size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) { | |
369 size_t len; | |
370 char *cptr = (char *)ptr; | |
371 | |
372 if (mf == m_channel[0]) init_mstdin(); | |
373 | |
374 if (mf->size <= mf->offset) | |
375 return 0; | |
376 | |
377 len = size * nmemb <= mf->size - mf->offset | |
378 ? size * nmemb | |
379 : mf->size - mf->offset; | |
380 if (!size) | |
381 return 0; | |
382 | |
383 memcpy(cptr, &mf->data[mf->offset], len); | |
384 mf->offset += len; | |
385 cptr += len; | |
386 | |
387 if (len != size * nmemb) { | |
388 mf->eof = 1; | |
389 } | |
390 | |
391 return len / size; | |
392 } | |
393 | |
394 size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) { | |
395 if (!(mf->mode & MF_WRITE)) | |
396 return 0; | |
397 | |
398 /* Append mode => forced all writes to end of file */ | |
399 if (mf->mode & MF_APPEND) | |
400 mf->offset = mf->size; | |
401 | |
402 /* Make sure we have enough room */ | |
403 while (size * nmemb + mf->offset > mf->alloced) { | |
404 mf->alloced = mf->alloced ? mf->alloced * 2 : 1024; | |
405 mf->data = (void *)realloc(mf->data, mf->alloced); | |
406 } | |
407 | |
408 /* Record where we need to reflush from */ | |
409 if (mf->offset < mf->flush_pos) | |
410 mf->flush_pos = mf->offset; | |
411 | |
412 /* Copy the data over */ | |
413 memcpy(&mf->data[mf->offset], ptr, size * nmemb); | |
414 mf->offset += size * nmemb; | |
415 if (mf->size < mf->offset) | |
416 mf->size = mf->offset; | |
417 | |
418 return nmemb; | |
419 } | |
420 | |
421 int mfgetc(mFILE *mf) { | |
422 if (mf == m_channel[0]) init_mstdin(); | |
423 if (mf->offset < mf->size) { | |
424 return (unsigned char)mf->data[mf->offset++]; | |
425 } | |
426 | |
427 mf->eof = 1; | |
428 return -1; | |
429 } | |
430 | |
431 int mungetc(int c, mFILE *mf) { | |
432 if (mf->offset > 0) { | |
433 mf->data[--mf->offset] = c; | |
434 return c; | |
435 } | |
436 | |
437 mf->eof = 1; | |
438 return -1; | |
439 } | |
440 | |
441 char *mfgets(char *s, int size, mFILE *mf) { | |
442 int i; | |
443 | |
444 if (mf == m_channel[0]) init_mstdin(); | |
445 *s = 0; | |
446 for (i = 0; i < size-1;) { | |
447 if (mf->offset < mf->size) { | |
448 s[i] = mf->data[mf->offset++]; | |
449 if (s[i++] == '\n') | |
450 break; | |
451 } else { | |
452 mf->eof = 1; | |
453 break; | |
454 } | |
455 } | |
456 | |
457 s[i] = 0; | |
458 return i ? s : NULL; | |
459 } | |
460 | |
461 /* | |
462 * Flushes an mFILE. If this is a real open of a file in write mode then | |
463 * mFILE->fp will be set. We then write out any new data in mFILE since the | |
464 * last flush. We cannot tell what may have been modified as we don't keep | |
465 * track of that, so we typically rewrite out the entire file contents between | |
466 * the last flush_pos and the end of file. | |
467 * | |
468 * For stderr/stdout we also reset the offsets so we cannot modify things | |
469 * we've already output. | |
470 */ | |
471 int mfflush(mFILE *mf) { | |
472 if (!mf->fp) | |
473 return 0; | |
474 | |
475 /* FIXME: only do this when opened in write mode */ | |
476 if (mf == m_channel[1] || mf == m_channel[2]) { | |
477 fwrite(mf->data + mf->flush_pos, 1, mf->size - mf->flush_pos, mf->fp); | |
478 fflush(mf->fp); | |
479 | |
480 /* Stdout & stderr are non-seekable streams so throw away the data */ | |
481 mf->offset = mf->size = mf->flush_pos = 0; | |
482 } | |
483 | |
484 /* only flush when opened in write mode */ | |
485 if (mf->mode & MF_WRITE) { | |
486 if (mf->flush_pos < mf->size) { | |
487 if (!mf->mode & MF_MODEX) | |
488 fseek(mf->fp, mf->flush_pos, SEEK_SET); | |
489 fwrite(mf->data + mf->flush_pos, 1, | |
490 mf->size - mf->flush_pos, mf->fp); | |
491 fflush(mf->fp); | |
492 } | |
493 ftruncate(fileno(mf->fp), ftell(mf->fp)); | |
494 mf->flush_pos = mf->size; | |
495 } | |
496 | |
497 return 0; | |
498 } | |
499 | |
500 /* | |
501 * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to | |
502 * estimate how many additional bytes of storage will be required for the | |
503 * vsprintf to work. | |
504 */ | |
505 int mfprintf(mFILE *mf, char *fmt, ...) { | |
506 int ret; | |
507 size_t est_length; | |
508 va_list args; | |
509 | |
510 va_start(args, fmt); | |
511 est_length = vflen(fmt, args); | |
512 va_end(args); | |
513 while (est_length + mf->offset > mf->alloced) { | |
514 mf->alloced = mf->alloced ? mf->alloced * 2 : 1024; | |
515 mf->data = (void *)realloc(mf->data, mf->alloced); | |
516 } | |
517 | |
518 va_start(args, fmt); | |
519 ret = vsprintf(&mf->data[mf->offset], fmt, args); | |
520 va_end(args); | |
521 | |
522 if (ret > 0) { | |
523 mf->offset += ret; | |
524 if (mf->size < mf->offset) | |
525 mf->size = mf->offset; | |
526 } | |
527 | |
528 if (mf->fp == stderr) { | |
529 /* Auto-flush for stderr */ | |
530 mfflush(mf); | |
531 } | |
532 | |
533 return ret; | |
534 } | |
535 | |
536 /* | |
537 * Converts an mFILE from binary to ascii mode by replacing all | |
538 * cr-nl with nl. | |
539 * | |
540 * Primarily used on windows when we've uncompressed a binary file which | |
541 * happens to be a text file (eg Experiment File). Previously we would have | |
542 * seeked back to the start and used _setmode(fileno(fp), _O_TEXT). | |
543 * | |
544 * Side effect: resets offset and flush_pos back to the start. | |
545 */ | |
546 void mfascii(mFILE *mf) { | |
547 size_t p1, p2; | |
548 | |
549 for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) { | |
550 if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') { | |
551 p2--; /* delete the \r */ | |
552 } | |
553 mf->data[p2] = mf->data[p1]; | |
554 } | |
555 mf->size = p2; | |
556 | |
557 mf->offset = mf->flush_pos = 0; | |
558 } |