Mercurial > repos > youngkim > ezbamqc
comparison ezBAMQC/src/htslib/hfile.c @ 0:dfa3745e5fd8
Uploaded
author | youngkim |
---|---|
date | Thu, 24 Mar 2016 17:12:52 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfa3745e5fd8 |
---|---|
1 /* hfile.c -- buffered low-level input/output streams. | |
2 | |
3 Copyright (C) 2013-2015 Genome Research Ltd. | |
4 | |
5 Author: John Marshall <jm18@sanger.ac.uk> | |
6 | |
7 Permission is hereby granted, free of charge, to any person obtaining a copy | |
8 of this software and associated documentation files (the "Software"), to deal | |
9 in the Software without restriction, including without limitation the rights | |
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
11 copies of the Software, and to permit persons to whom the Software is | |
12 furnished to do so, subject to the following conditions: | |
13 | |
14 The above copyright notice and this permission notice shall be included in | |
15 all copies or substantial portions of the Software. | |
16 | |
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
23 DEALINGS IN THE SOFTWARE. */ | |
24 | |
25 #include <stdio.h> | |
26 #include <stdlib.h> | |
27 #include <string.h> | |
28 #include <errno.h> | |
29 | |
30 #include "htslib/hfile.h" | |
31 #include "hfile_internal.h" | |
32 | |
33 /* hFILE fields are used as follows: | |
34 | |
35 char *buffer; // Pointer to the start of the I/O buffer | |
36 char *begin; // First not-yet-read character / unused position | |
37 char *end; // First unfilled/unfillable position | |
38 char *limit; // Pointer to the first position past the buffer | |
39 | |
40 const hFILE_backend *backend; // Methods to refill/flush I/O buffer | |
41 | |
42 off_t offset; // Offset within the stream of buffer position 0 | |
43 int at_eof:1; // For reading, whether EOF has been seen | |
44 int has_errno; // Error number from the last failure on this stream | |
45 | |
46 For reading, begin is the first unread character in the buffer and end is the | |
47 first unfilled position: | |
48 | |
49 -----------ABCDEFGHIJKLMNO--------------- | |
50 ^buffer ^begin ^end ^limit | |
51 | |
52 For writing, begin is the first unused position and end is unused so remains | |
53 equal to buffer: | |
54 | |
55 ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------- | |
56 ^buffer ^begin ^limit | |
57 ^end | |
58 | |
59 Thus if begin > end then there is a non-empty write buffer, if begin < end | |
60 then there is a non-empty read buffer, and if begin == end then both buffers | |
61 are empty. In all cases, the stream's file position indicator corresponds | |
62 to the position pointed to by begin. */ | |
63 | |
64 hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity) | |
65 { | |
66 hFILE *fp = (hFILE *) malloc(struct_size); | |
67 if (fp == NULL) goto error; | |
68 | |
69 if (capacity == 0) capacity = 32768; | |
70 // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory | |
71 if (strchr(mode, 'r') && capacity > 32768) capacity = 32768; | |
72 | |
73 fp->buffer = (char *) malloc(capacity); | |
74 if (fp->buffer == NULL) goto error; | |
75 | |
76 fp->begin = fp->end = fp->buffer; | |
77 fp->limit = &fp->buffer[capacity]; | |
78 | |
79 fp->offset = 0; | |
80 fp->at_eof = 0; | |
81 fp->has_errno = 0; | |
82 return fp; | |
83 | |
84 error: | |
85 hfile_destroy(fp); | |
86 return NULL; | |
87 } | |
88 | |
89 void hfile_destroy(hFILE *fp) | |
90 { | |
91 int save = errno; | |
92 if (fp) free(fp->buffer); | |
93 free(fp); | |
94 errno = save; | |
95 } | |
96 | |
97 static inline int writebuffer_is_nonempty(hFILE *fp) | |
98 { | |
99 return fp->begin > fp->end; | |
100 } | |
101 | |
102 /* Refills the read buffer from the backend (once, so may only partially | |
103 fill the buffer), returning the number of additional characters read | |
104 (which might be 0), or negative when an error occurred. */ | |
105 static ssize_t refill_buffer(hFILE *fp) | |
106 { | |
107 ssize_t n; | |
108 | |
109 // Move any unread characters to the start of the buffer | |
110 if (fp->begin > fp->buffer) { | |
111 fp->offset += fp->begin - fp->buffer; | |
112 memmove(fp->buffer, fp->begin, fp->end - fp->begin); | |
113 fp->end = &fp->buffer[fp->end - fp->begin]; | |
114 fp->begin = fp->buffer; | |
115 } | |
116 | |
117 // Read into the available buffer space at fp->[end,limit) | |
118 if (fp->at_eof || fp->end == fp->limit) n = 0; | |
119 else { | |
120 n = fp->backend->read(fp, fp->end, fp->limit - fp->end); | |
121 if (n < 0) { fp->has_errno = errno; return n; } | |
122 else if (n == 0) fp->at_eof = 1; | |
123 } | |
124 | |
125 fp->end += n; | |
126 return n; | |
127 } | |
128 | |
129 /* Called only from hgetc(), when our buffer is empty. */ | |
130 int hgetc2(hFILE *fp) | |
131 { | |
132 return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF; | |
133 } | |
134 | |
135 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) | |
136 { | |
137 size_t n = fp->end - fp->begin; | |
138 while (n < nbytes) { | |
139 ssize_t ret = refill_buffer(fp); | |
140 if (ret < 0) return ret; | |
141 else if (ret == 0) break; | |
142 else n += ret; | |
143 } | |
144 | |
145 if (n > nbytes) n = nbytes; | |
146 memcpy(buffer, fp->begin, n); | |
147 return n; | |
148 } | |
149 | |
150 /* Called only from hread(); when called, our buffer is empty and nread bytes | |
151 have already been placed in the destination buffer. */ | |
152 ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread) | |
153 { | |
154 const size_t capacity = fp->limit - fp->buffer; | |
155 char *dest = (char *) destv; | |
156 dest += nread, nbytes -= nread; | |
157 | |
158 // Read large requests directly into the destination buffer | |
159 while (nbytes * 2 >= capacity && !fp->at_eof) { | |
160 ssize_t n = fp->backend->read(fp, dest, nbytes); | |
161 if (n < 0) { fp->has_errno = errno; return n; } | |
162 else if (n == 0) fp->at_eof = 1; | |
163 fp->offset += n; | |
164 dest += n, nbytes -= n; | |
165 nread += n; | |
166 } | |
167 | |
168 while (nbytes > 0 && !fp->at_eof) { | |
169 size_t n; | |
170 ssize_t ret = refill_buffer(fp); | |
171 if (ret < 0) return ret; | |
172 | |
173 n = fp->end - fp->begin; | |
174 if (n > nbytes) n = nbytes; | |
175 memcpy(dest, fp->begin, n); | |
176 fp->begin += n; | |
177 dest += n, nbytes -= n; | |
178 nread += n; | |
179 } | |
180 | |
181 return nread; | |
182 } | |
183 | |
184 /* Flushes the write buffer, fp->[buffer,begin), out through the backend | |
185 returning 0 on success or negative if an error occurred. */ | |
186 static ssize_t flush_buffer(hFILE *fp) | |
187 { | |
188 const char *buffer = fp->buffer; | |
189 while (buffer < fp->begin) { | |
190 ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer); | |
191 if (n < 0) { fp->has_errno = errno; return n; } | |
192 buffer += n; | |
193 fp->offset += n; | |
194 } | |
195 | |
196 fp->begin = fp->buffer; // Leave the buffer empty | |
197 return 0; | |
198 } | |
199 | |
200 int hflush(hFILE *fp) | |
201 { | |
202 if (flush_buffer(fp) < 0) return EOF; | |
203 if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; } | |
204 return 0; | |
205 } | |
206 | |
207 /* Called only from hputc(), when our buffer is already full. */ | |
208 int hputc2(int c, hFILE *fp) | |
209 { | |
210 if (flush_buffer(fp) < 0) return EOF; | |
211 *(fp->begin++) = c; | |
212 return c; | |
213 } | |
214 | |
215 /* Called only from hwrite() and hputs2(); when called, our buffer is full and | |
216 ncopied bytes from the source have already been copied to our buffer. */ | |
217 ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied) | |
218 { | |
219 const char *src = (const char *) srcv; | |
220 ssize_t ret; | |
221 const size_t capacity = fp->limit - fp->buffer; | |
222 size_t remaining = totalbytes - ncopied; | |
223 src += ncopied; | |
224 | |
225 ret = flush_buffer(fp); | |
226 if (ret < 0) return ret; | |
227 | |
228 // Write large blocks out directly from the source buffer | |
229 while (remaining * 2 >= capacity) { | |
230 ssize_t n = fp->backend->write(fp, src, remaining); | |
231 if (n < 0) { fp->has_errno = errno; return n; } | |
232 fp->offset += n; | |
233 src += n, remaining -= n; | |
234 } | |
235 | |
236 // Just buffer any remaining characters | |
237 memcpy(fp->begin, src, remaining); | |
238 fp->begin += remaining; | |
239 | |
240 return totalbytes; | |
241 } | |
242 | |
243 /* Called only from hputs(), when our buffer is already full. */ | |
244 int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp) | |
245 { | |
246 return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF; | |
247 } | |
248 | |
249 off_t hseek(hFILE *fp, off_t offset, int whence) | |
250 { | |
251 off_t pos; | |
252 | |
253 if (writebuffer_is_nonempty(fp)) { | |
254 int ret = flush_buffer(fp); | |
255 if (ret < 0) return ret; | |
256 } | |
257 else { | |
258 // Convert relative offsets from being relative to the hFILE's stream | |
259 // position (at begin) to being relative to the backend's physical | |
260 // stream position (at end, due to the buffering read-ahead). | |
261 if (whence == SEEK_CUR) offset -= fp->end - fp->begin; | |
262 } | |
263 | |
264 pos = fp->backend->seek(fp, offset, whence); | |
265 if (pos < 0) { fp->has_errno = errno; return pos; } | |
266 | |
267 // Seeking succeeded, so discard any non-empty read buffer | |
268 fp->begin = fp->end = fp->buffer; | |
269 fp->at_eof = 0; | |
270 | |
271 fp->offset = pos; | |
272 return pos; | |
273 } | |
274 | |
275 int hclose(hFILE *fp) | |
276 { | |
277 int err = fp->has_errno; | |
278 | |
279 if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno; | |
280 if (fp->backend->close(fp) < 0) err = errno; | |
281 hfile_destroy(fp); | |
282 | |
283 if (err) { | |
284 errno = err; | |
285 return EOF; | |
286 } | |
287 else return 0; | |
288 } | |
289 | |
290 void hclose_abruptly(hFILE *fp) | |
291 { | |
292 int save = errno; | |
293 if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ } | |
294 hfile_destroy(fp); | |
295 errno = save; | |
296 } | |
297 | |
298 | |
299 /*************************** | |
300 * File descriptor backend * | |
301 ***************************/ | |
302 | |
303 #include <sys/socket.h> | |
304 #include <sys/stat.h> | |
305 #include <fcntl.h> | |
306 #include <unistd.h> | |
307 | |
308 #ifdef _WIN32 | |
309 #define HAVE_CLOSESOCKET | |
310 #endif | |
311 | |
312 /* For Unix, it doesn't matter whether a file descriptor is a socket. | |
313 However Windows insists on send()/recv() and its own closesocket() | |
314 being used when fd happens to be a socket. */ | |
315 | |
316 typedef struct { | |
317 hFILE base; | |
318 int fd; | |
319 int is_socket:1; | |
320 } hFILE_fd; | |
321 | |
322 static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes) | |
323 { | |
324 hFILE_fd *fp = (hFILE_fd *) fpv; | |
325 ssize_t n; | |
326 do { | |
327 n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0) | |
328 : read(fp->fd, buffer, nbytes); | |
329 } while (n < 0 && errno == EINTR); | |
330 return n; | |
331 } | |
332 | |
333 static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes) | |
334 { | |
335 hFILE_fd *fp = (hFILE_fd *) fpv; | |
336 ssize_t n; | |
337 do { | |
338 n = fp->is_socket? send(fp->fd, buffer, nbytes, 0) | |
339 : write(fp->fd, buffer, nbytes); | |
340 } while (n < 0 && errno == EINTR); | |
341 return n; | |
342 } | |
343 | |
344 static off_t fd_seek(hFILE *fpv, off_t offset, int whence) | |
345 { | |
346 hFILE_fd *fp = (hFILE_fd *) fpv; | |
347 return lseek(fp->fd, offset, whence); | |
348 } | |
349 | |
350 static int fd_flush(hFILE *fpv) | |
351 { | |
352 hFILE_fd *fp = (hFILE_fd *) fpv; | |
353 int ret; | |
354 do { | |
355 #ifdef HAVE_FDATASYNC | |
356 ret = fdatasync(fp->fd); | |
357 #else | |
358 ret = fsync(fp->fd); | |
359 #endif | |
360 // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe, | |
361 // and operation-not-supported errors (Mac OS X) | |
362 if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0; | |
363 } while (ret < 0 && errno == EINTR); | |
364 return ret; | |
365 } | |
366 | |
367 static int fd_close(hFILE *fpv) | |
368 { | |
369 hFILE_fd *fp = (hFILE_fd *) fpv; | |
370 int ret; | |
371 do { | |
372 #ifdef HAVE_CLOSESOCKET | |
373 ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd); | |
374 #else | |
375 ret = close(fp->fd); | |
376 #endif | |
377 } while (ret < 0 && errno == EINTR); | |
378 return ret; | |
379 } | |
380 | |
381 static const struct hFILE_backend fd_backend = | |
382 { | |
383 fd_read, fd_write, fd_seek, fd_flush, fd_close | |
384 }; | |
385 | |
386 static size_t blksize(int fd) | |
387 { | |
388 struct stat sbuf; | |
389 if (fstat(fd, &sbuf) != 0) return 0; | |
390 return sbuf.st_blksize; | |
391 } | |
392 | |
393 static hFILE *hopen_fd(const char *filename, const char *mode) | |
394 { | |
395 hFILE_fd *fp = NULL; | |
396 int fd = open(filename, hfile_oflags(mode), 0666); | |
397 if (fd < 0) goto error; | |
398 | |
399 fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); | |
400 if (fp == NULL) goto error; | |
401 | |
402 fp->fd = fd; | |
403 fp->is_socket = 0; | |
404 fp->base.backend = &fd_backend; | |
405 return &fp->base; | |
406 | |
407 error: | |
408 if (fd >= 0) { int save = errno; (void) close(fd); errno = save; } | |
409 hfile_destroy((hFILE *) fp); | |
410 return NULL; | |
411 } | |
412 | |
413 hFILE *hdopen(int fd, const char *mode) | |
414 { | |
415 hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); | |
416 if (fp == NULL) return NULL; | |
417 | |
418 fp->fd = fd; | |
419 fp->is_socket = (strchr(mode, 's') != NULL); | |
420 fp->base.backend = &fd_backend; | |
421 return &fp->base; | |
422 } | |
423 | |
424 static hFILE *hopen_fd_stdinout(const char *mode) | |
425 { | |
426 int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO; | |
427 // TODO Set binary mode (for Windows) | |
428 return hdopen(fd, mode); | |
429 } | |
430 | |
431 int hfile_oflags(const char *mode) | |
432 { | |
433 int rdwr = 0, flags = 0; | |
434 const char *s; | |
435 for (s = mode; *s; s++) | |
436 switch (*s) { | |
437 case 'r': rdwr = O_RDONLY; break; | |
438 case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break; | |
439 case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break; | |
440 case '+': rdwr = O_RDWR; break; | |
441 default: break; | |
442 } | |
443 | |
444 #ifdef O_BINARY | |
445 flags |= O_BINARY; | |
446 #endif | |
447 | |
448 return rdwr | flags; | |
449 } | |
450 | |
451 | |
452 /********************* | |
453 * In-memory backend * | |
454 *********************/ | |
455 | |
456 typedef struct { | |
457 hFILE base; | |
458 const char *buffer; | |
459 size_t length, pos; | |
460 } hFILE_mem; | |
461 | |
462 static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes) | |
463 { | |
464 hFILE_mem *fp = (hFILE_mem *) fpv; | |
465 size_t avail = fp->length - fp->pos; | |
466 if (nbytes > avail) nbytes = avail; | |
467 memcpy(buffer, fp->buffer + fp->pos, nbytes); | |
468 fp->pos += nbytes; | |
469 return nbytes; | |
470 } | |
471 | |
472 static off_t mem_seek(hFILE *fpv, off_t offset, int whence) | |
473 { | |
474 hFILE_mem *fp = (hFILE_mem *) fpv; | |
475 size_t absoffset = (offset >= 0)? offset : -offset; | |
476 size_t origin; | |
477 | |
478 switch (whence) { | |
479 case SEEK_SET: origin = 0; break; | |
480 case SEEK_CUR: origin = fp->pos; break; | |
481 case SEEK_END: origin = fp->length; break; | |
482 default: errno = EINVAL; return -1; | |
483 } | |
484 | |
485 if ((offset < 0 && absoffset > origin) || | |
486 (offset >= 0 && absoffset > fp->length - origin)) { | |
487 errno = EINVAL; | |
488 return -1; | |
489 } | |
490 | |
491 fp->pos = origin + offset; | |
492 return fp->pos; | |
493 } | |
494 | |
495 static int mem_close(hFILE *fpv) | |
496 { | |
497 return 0; | |
498 } | |
499 | |
500 static const struct hFILE_backend mem_backend = | |
501 { | |
502 mem_read, NULL, mem_seek, NULL, mem_close | |
503 }; | |
504 | |
505 static hFILE *hopen_mem(const char *data, const char *mode) | |
506 { | |
507 // TODO Implement write modes, which will require memory allocation | |
508 if (strchr(mode, 'r') == NULL) { errno = EINVAL; return NULL; } | |
509 | |
510 hFILE_mem *fp = (hFILE_mem *) hfile_init(sizeof (hFILE_mem), mode, 0); | |
511 if (fp == NULL) return NULL; | |
512 | |
513 fp->buffer = data; | |
514 fp->length = strlen(data); | |
515 fp->pos = 0; | |
516 fp->base.backend = &mem_backend; | |
517 return &fp->base; | |
518 } | |
519 | |
520 | |
521 /****************************** | |
522 * hopen() backend dispatcher * | |
523 ******************************/ | |
524 | |
525 hFILE *hopen(const char *fname, const char *mode) | |
526 { | |
527 if (strncmp(fname, "http://", 7) == 0 || | |
528 strncmp(fname, "ftp://", 6) == 0) return hopen_net(fname, mode); | |
529 #ifdef HAVE_IRODS | |
530 else if (strncmp(fname, "irods:", 6) == 0) return hopen_irods(fname, mode); | |
531 #endif | |
532 else if (strncmp(fname, "data:", 5) == 0) return hopen_mem(fname + 5, mode); | |
533 else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode); | |
534 else return hopen_fd(fname, mode); | |
535 } | |
536 | |
537 int hisremote(const char *fname) | |
538 { | |
539 // FIXME Make a new backend entry to return this | |
540 if (strncmp(fname, "http://", 7) == 0 || | |
541 strncmp(fname, "https://", 8) == 0 || | |
542 strncmp(fname, "ftp://", 6) == 0) return 1; | |
543 #ifdef HAVE_IRODS | |
544 else if (strncmp(fname, "irods:", 6) == 0) return 1; | |
545 #endif | |
546 else return 0; | |
547 } |