0
|
1 /* hfile.c -- buffered low-level input/output streams.
|
|
2
|
|
3 Copyright (C) 2013-2015 Genome Research Ltd.
|
|
4
|
|
5 Author: John Marshall <jm18@sanger.ac.uk>
|
|
6
|
|
7 Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8 of this software and associated documentation files (the "Software"), to deal
|
|
9 in the Software without restriction, including without limitation the rights
|
|
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11 copies of the Software, and to permit persons to whom the Software is
|
|
12 furnished to do so, subject to the following conditions:
|
|
13
|
|
14 The above copyright notice and this permission notice shall be included in
|
|
15 all copies or substantial portions of the Software.
|
|
16
|
|
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
20 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
22 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
23 DEALINGS IN THE SOFTWARE. */
|
|
24
|
|
25 #include <stdio.h>
|
|
26 #include <stdlib.h>
|
|
27 #include <string.h>
|
|
28 #include <errno.h>
|
|
29
|
|
30 #include "htslib/hfile.h"
|
|
31 #include "hfile_internal.h"
|
|
32
|
|
33 /* hFILE fields are used as follows:
|
|
34
|
|
35 char *buffer; // Pointer to the start of the I/O buffer
|
|
36 char *begin; // First not-yet-read character / unused position
|
|
37 char *end; // First unfilled/unfillable position
|
|
38 char *limit; // Pointer to the first position past the buffer
|
|
39
|
|
40 const hFILE_backend *backend; // Methods to refill/flush I/O buffer
|
|
41
|
|
42 off_t offset; // Offset within the stream of buffer position 0
|
|
43 int at_eof:1; // For reading, whether EOF has been seen
|
|
44 int has_errno; // Error number from the last failure on this stream
|
|
45
|
|
46 For reading, begin is the first unread character in the buffer and end is the
|
|
47 first unfilled position:
|
|
48
|
|
49 -----------ABCDEFGHIJKLMNO---------------
|
|
50 ^buffer ^begin ^end ^limit
|
|
51
|
|
52 For writing, begin is the first unused position and end is unused so remains
|
|
53 equal to buffer:
|
|
54
|
|
55 ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
|
|
56 ^buffer ^begin ^limit
|
|
57 ^end
|
|
58
|
|
59 Thus if begin > end then there is a non-empty write buffer, if begin < end
|
|
60 then there is a non-empty read buffer, and if begin == end then both buffers
|
|
61 are empty. In all cases, the stream's file position indicator corresponds
|
|
62 to the position pointed to by begin. */
|
|
63
|
|
64 hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
|
|
65 {
|
|
66 hFILE *fp = (hFILE *) malloc(struct_size);
|
|
67 if (fp == NULL) goto error;
|
|
68
|
|
69 if (capacity == 0) capacity = 32768;
|
|
70 // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
|
|
71 if (strchr(mode, 'r') && capacity > 32768) capacity = 32768;
|
|
72
|
|
73 fp->buffer = (char *) malloc(capacity);
|
|
74 if (fp->buffer == NULL) goto error;
|
|
75
|
|
76 fp->begin = fp->end = fp->buffer;
|
|
77 fp->limit = &fp->buffer[capacity];
|
|
78
|
|
79 fp->offset = 0;
|
|
80 fp->at_eof = 0;
|
|
81 fp->has_errno = 0;
|
|
82 return fp;
|
|
83
|
|
84 error:
|
|
85 hfile_destroy(fp);
|
|
86 return NULL;
|
|
87 }
|
|
88
|
|
89 void hfile_destroy(hFILE *fp)
|
|
90 {
|
|
91 int save = errno;
|
|
92 if (fp) free(fp->buffer);
|
|
93 free(fp);
|
|
94 errno = save;
|
|
95 }
|
|
96
|
|
97 static inline int writebuffer_is_nonempty(hFILE *fp)
|
|
98 {
|
|
99 return fp->begin > fp->end;
|
|
100 }
|
|
101
|
|
102 /* Refills the read buffer from the backend (once, so may only partially
|
|
103 fill the buffer), returning the number of additional characters read
|
|
104 (which might be 0), or negative when an error occurred. */
|
|
105 static ssize_t refill_buffer(hFILE *fp)
|
|
106 {
|
|
107 ssize_t n;
|
|
108
|
|
109 // Move any unread characters to the start of the buffer
|
|
110 if (fp->begin > fp->buffer) {
|
|
111 fp->offset += fp->begin - fp->buffer;
|
|
112 memmove(fp->buffer, fp->begin, fp->end - fp->begin);
|
|
113 fp->end = &fp->buffer[fp->end - fp->begin];
|
|
114 fp->begin = fp->buffer;
|
|
115 }
|
|
116
|
|
117 // Read into the available buffer space at fp->[end,limit)
|
|
118 if (fp->at_eof || fp->end == fp->limit) n = 0;
|
|
119 else {
|
|
120 n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
|
|
121 if (n < 0) { fp->has_errno = errno; return n; }
|
|
122 else if (n == 0) fp->at_eof = 1;
|
|
123 }
|
|
124
|
|
125 fp->end += n;
|
|
126 return n;
|
|
127 }
|
|
128
|
|
129 /* Called only from hgetc(), when our buffer is empty. */
|
|
130 int hgetc2(hFILE *fp)
|
|
131 {
|
|
132 return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
|
|
133 }
|
|
134
|
|
135 ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
|
|
136 {
|
|
137 size_t n = fp->end - fp->begin;
|
|
138 while (n < nbytes) {
|
|
139 ssize_t ret = refill_buffer(fp);
|
|
140 if (ret < 0) return ret;
|
|
141 else if (ret == 0) break;
|
|
142 else n += ret;
|
|
143 }
|
|
144
|
|
145 if (n > nbytes) n = nbytes;
|
|
146 memcpy(buffer, fp->begin, n);
|
|
147 return n;
|
|
148 }
|
|
149
|
|
150 /* Called only from hread(); when called, our buffer is empty and nread bytes
|
|
151 have already been placed in the destination buffer. */
|
|
152 ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
|
|
153 {
|
|
154 const size_t capacity = fp->limit - fp->buffer;
|
|
155 char *dest = (char *) destv;
|
|
156 dest += nread, nbytes -= nread;
|
|
157
|
|
158 // Read large requests directly into the destination buffer
|
|
159 while (nbytes * 2 >= capacity && !fp->at_eof) {
|
|
160 ssize_t n = fp->backend->read(fp, dest, nbytes);
|
|
161 if (n < 0) { fp->has_errno = errno; return n; }
|
|
162 else if (n == 0) fp->at_eof = 1;
|
|
163 fp->offset += n;
|
|
164 dest += n, nbytes -= n;
|
|
165 nread += n;
|
|
166 }
|
|
167
|
|
168 while (nbytes > 0 && !fp->at_eof) {
|
|
169 size_t n;
|
|
170 ssize_t ret = refill_buffer(fp);
|
|
171 if (ret < 0) return ret;
|
|
172
|
|
173 n = fp->end - fp->begin;
|
|
174 if (n > nbytes) n = nbytes;
|
|
175 memcpy(dest, fp->begin, n);
|
|
176 fp->begin += n;
|
|
177 dest += n, nbytes -= n;
|
|
178 nread += n;
|
|
179 }
|
|
180
|
|
181 return nread;
|
|
182 }
|
|
183
|
|
184 /* Flushes the write buffer, fp->[buffer,begin), out through the backend
|
|
185 returning 0 on success or negative if an error occurred. */
|
|
186 static ssize_t flush_buffer(hFILE *fp)
|
|
187 {
|
|
188 const char *buffer = fp->buffer;
|
|
189 while (buffer < fp->begin) {
|
|
190 ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
|
|
191 if (n < 0) { fp->has_errno = errno; return n; }
|
|
192 buffer += n;
|
|
193 fp->offset += n;
|
|
194 }
|
|
195
|
|
196 fp->begin = fp->buffer; // Leave the buffer empty
|
|
197 return 0;
|
|
198 }
|
|
199
|
|
200 int hflush(hFILE *fp)
|
|
201 {
|
|
202 if (flush_buffer(fp) < 0) return EOF;
|
|
203 if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
|
|
204 return 0;
|
|
205 }
|
|
206
|
|
207 /* Called only from hputc(), when our buffer is already full. */
|
|
208 int hputc2(int c, hFILE *fp)
|
|
209 {
|
|
210 if (flush_buffer(fp) < 0) return EOF;
|
|
211 *(fp->begin++) = c;
|
|
212 return c;
|
|
213 }
|
|
214
|
|
215 /* Called only from hwrite() and hputs2(); when called, our buffer is full and
|
|
216 ncopied bytes from the source have already been copied to our buffer. */
|
|
217 ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
|
|
218 {
|
|
219 const char *src = (const char *) srcv;
|
|
220 ssize_t ret;
|
|
221 const size_t capacity = fp->limit - fp->buffer;
|
|
222 size_t remaining = totalbytes - ncopied;
|
|
223 src += ncopied;
|
|
224
|
|
225 ret = flush_buffer(fp);
|
|
226 if (ret < 0) return ret;
|
|
227
|
|
228 // Write large blocks out directly from the source buffer
|
|
229 while (remaining * 2 >= capacity) {
|
|
230 ssize_t n = fp->backend->write(fp, src, remaining);
|
|
231 if (n < 0) { fp->has_errno = errno; return n; }
|
|
232 fp->offset += n;
|
|
233 src += n, remaining -= n;
|
|
234 }
|
|
235
|
|
236 // Just buffer any remaining characters
|
|
237 memcpy(fp->begin, src, remaining);
|
|
238 fp->begin += remaining;
|
|
239
|
|
240 return totalbytes;
|
|
241 }
|
|
242
|
|
243 /* Called only from hputs(), when our buffer is already full. */
|
|
244 int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
|
|
245 {
|
|
246 return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
|
|
247 }
|
|
248
|
|
249 off_t hseek(hFILE *fp, off_t offset, int whence)
|
|
250 {
|
|
251 off_t pos;
|
|
252
|
|
253 if (writebuffer_is_nonempty(fp)) {
|
|
254 int ret = flush_buffer(fp);
|
|
255 if (ret < 0) return ret;
|
|
256 }
|
|
257 else {
|
|
258 // Convert relative offsets from being relative to the hFILE's stream
|
|
259 // position (at begin) to being relative to the backend's physical
|
|
260 // stream position (at end, due to the buffering read-ahead).
|
|
261 if (whence == SEEK_CUR) offset -= fp->end - fp->begin;
|
|
262 }
|
|
263
|
|
264 pos = fp->backend->seek(fp, offset, whence);
|
|
265 if (pos < 0) { fp->has_errno = errno; return pos; }
|
|
266
|
|
267 // Seeking succeeded, so discard any non-empty read buffer
|
|
268 fp->begin = fp->end = fp->buffer;
|
|
269 fp->at_eof = 0;
|
|
270
|
|
271 fp->offset = pos;
|
|
272 return pos;
|
|
273 }
|
|
274
|
|
275 int hclose(hFILE *fp)
|
|
276 {
|
|
277 int err = fp->has_errno;
|
|
278
|
|
279 if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
|
|
280 if (fp->backend->close(fp) < 0) err = errno;
|
|
281 hfile_destroy(fp);
|
|
282
|
|
283 if (err) {
|
|
284 errno = err;
|
|
285 return EOF;
|
|
286 }
|
|
287 else return 0;
|
|
288 }
|
|
289
|
|
290 void hclose_abruptly(hFILE *fp)
|
|
291 {
|
|
292 int save = errno;
|
|
293 if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
|
|
294 hfile_destroy(fp);
|
|
295 errno = save;
|
|
296 }
|
|
297
|
|
298
|
|
299 /***************************
|
|
300 * File descriptor backend *
|
|
301 ***************************/
|
|
302
|
|
303 #include <sys/socket.h>
|
|
304 #include <sys/stat.h>
|
|
305 #include <fcntl.h>
|
|
306 #include <unistd.h>
|
|
307
|
|
308 #ifdef _WIN32
|
|
309 #define HAVE_CLOSESOCKET
|
|
310 #endif
|
|
311
|
|
312 /* For Unix, it doesn't matter whether a file descriptor is a socket.
|
|
313 However Windows insists on send()/recv() and its own closesocket()
|
|
314 being used when fd happens to be a socket. */
|
|
315
|
|
316 typedef struct {
|
|
317 hFILE base;
|
|
318 int fd;
|
|
319 int is_socket:1;
|
|
320 } hFILE_fd;
|
|
321
|
|
322 static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
|
|
323 {
|
|
324 hFILE_fd *fp = (hFILE_fd *) fpv;
|
|
325 ssize_t n;
|
|
326 do {
|
|
327 n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
|
|
328 : read(fp->fd, buffer, nbytes);
|
|
329 } while (n < 0 && errno == EINTR);
|
|
330 return n;
|
|
331 }
|
|
332
|
|
333 static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
|
|
334 {
|
|
335 hFILE_fd *fp = (hFILE_fd *) fpv;
|
|
336 ssize_t n;
|
|
337 do {
|
|
338 n = fp->is_socket? send(fp->fd, buffer, nbytes, 0)
|
|
339 : write(fp->fd, buffer, nbytes);
|
|
340 } while (n < 0 && errno == EINTR);
|
|
341 return n;
|
|
342 }
|
|
343
|
|
344 static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
|
|
345 {
|
|
346 hFILE_fd *fp = (hFILE_fd *) fpv;
|
|
347 return lseek(fp->fd, offset, whence);
|
|
348 }
|
|
349
|
|
350 static int fd_flush(hFILE *fpv)
|
|
351 {
|
|
352 hFILE_fd *fp = (hFILE_fd *) fpv;
|
|
353 int ret;
|
|
354 do {
|
|
355 #ifdef HAVE_FDATASYNC
|
|
356 ret = fdatasync(fp->fd);
|
|
357 #else
|
|
358 ret = fsync(fp->fd);
|
|
359 #endif
|
|
360 // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
|
|
361 // and operation-not-supported errors (Mac OS X)
|
|
362 if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
|
|
363 } while (ret < 0 && errno == EINTR);
|
|
364 return ret;
|
|
365 }
|
|
366
|
|
367 static int fd_close(hFILE *fpv)
|
|
368 {
|
|
369 hFILE_fd *fp = (hFILE_fd *) fpv;
|
|
370 int ret;
|
|
371 do {
|
|
372 #ifdef HAVE_CLOSESOCKET
|
|
373 ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
|
|
374 #else
|
|
375 ret = close(fp->fd);
|
|
376 #endif
|
|
377 } while (ret < 0 && errno == EINTR);
|
|
378 return ret;
|
|
379 }
|
|
380
|
|
381 static const struct hFILE_backend fd_backend =
|
|
382 {
|
|
383 fd_read, fd_write, fd_seek, fd_flush, fd_close
|
|
384 };
|
|
385
|
|
386 static size_t blksize(int fd)
|
|
387 {
|
|
388 struct stat sbuf;
|
|
389 if (fstat(fd, &sbuf) != 0) return 0;
|
|
390 return sbuf.st_blksize;
|
|
391 }
|
|
392
|
|
393 static hFILE *hopen_fd(const char *filename, const char *mode)
|
|
394 {
|
|
395 hFILE_fd *fp = NULL;
|
|
396 int fd = open(filename, hfile_oflags(mode), 0666);
|
|
397 if (fd < 0) goto error;
|
|
398
|
|
399 fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
|
|
400 if (fp == NULL) goto error;
|
|
401
|
|
402 fp->fd = fd;
|
|
403 fp->is_socket = 0;
|
|
404 fp->base.backend = &fd_backend;
|
|
405 return &fp->base;
|
|
406
|
|
407 error:
|
|
408 if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
|
|
409 hfile_destroy((hFILE *) fp);
|
|
410 return NULL;
|
|
411 }
|
|
412
|
|
413 hFILE *hdopen(int fd, const char *mode)
|
|
414 {
|
|
415 hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
|
|
416 if (fp == NULL) return NULL;
|
|
417
|
|
418 fp->fd = fd;
|
|
419 fp->is_socket = (strchr(mode, 's') != NULL);
|
|
420 fp->base.backend = &fd_backend;
|
|
421 return &fp->base;
|
|
422 }
|
|
423
|
|
424 static hFILE *hopen_fd_stdinout(const char *mode)
|
|
425 {
|
|
426 int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
|
|
427 // TODO Set binary mode (for Windows)
|
|
428 return hdopen(fd, mode);
|
|
429 }
|
|
430
|
|
431 int hfile_oflags(const char *mode)
|
|
432 {
|
|
433 int rdwr = 0, flags = 0;
|
|
434 const char *s;
|
|
435 for (s = mode; *s; s++)
|
|
436 switch (*s) {
|
|
437 case 'r': rdwr = O_RDONLY; break;
|
|
438 case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break;
|
|
439 case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break;
|
|
440 case '+': rdwr = O_RDWR; break;
|
|
441 default: break;
|
|
442 }
|
|
443
|
|
444 #ifdef O_BINARY
|
|
445 flags |= O_BINARY;
|
|
446 #endif
|
|
447
|
|
448 return rdwr | flags;
|
|
449 }
|
|
450
|
|
451
|
|
452 /*********************
|
|
453 * In-memory backend *
|
|
454 *********************/
|
|
455
|
|
456 typedef struct {
|
|
457 hFILE base;
|
|
458 const char *buffer;
|
|
459 size_t length, pos;
|
|
460 } hFILE_mem;
|
|
461
|
|
462 static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes)
|
|
463 {
|
|
464 hFILE_mem *fp = (hFILE_mem *) fpv;
|
|
465 size_t avail = fp->length - fp->pos;
|
|
466 if (nbytes > avail) nbytes = avail;
|
|
467 memcpy(buffer, fp->buffer + fp->pos, nbytes);
|
|
468 fp->pos += nbytes;
|
|
469 return nbytes;
|
|
470 }
|
|
471
|
|
472 static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
|
|
473 {
|
|
474 hFILE_mem *fp = (hFILE_mem *) fpv;
|
|
475 size_t absoffset = (offset >= 0)? offset : -offset;
|
|
476 size_t origin;
|
|
477
|
|
478 switch (whence) {
|
|
479 case SEEK_SET: origin = 0; break;
|
|
480 case SEEK_CUR: origin = fp->pos; break;
|
|
481 case SEEK_END: origin = fp->length; break;
|
|
482 default: errno = EINVAL; return -1;
|
|
483 }
|
|
484
|
|
485 if ((offset < 0 && absoffset > origin) ||
|
|
486 (offset >= 0 && absoffset > fp->length - origin)) {
|
|
487 errno = EINVAL;
|
|
488 return -1;
|
|
489 }
|
|
490
|
|
491 fp->pos = origin + offset;
|
|
492 return fp->pos;
|
|
493 }
|
|
494
|
|
495 static int mem_close(hFILE *fpv)
|
|
496 {
|
|
497 return 0;
|
|
498 }
|
|
499
|
|
500 static const struct hFILE_backend mem_backend =
|
|
501 {
|
|
502 mem_read, NULL, mem_seek, NULL, mem_close
|
|
503 };
|
|
504
|
|
505 static hFILE *hopen_mem(const char *data, const char *mode)
|
|
506 {
|
|
507 // TODO Implement write modes, which will require memory allocation
|
|
508 if (strchr(mode, 'r') == NULL) { errno = EINVAL; return NULL; }
|
|
509
|
|
510 hFILE_mem *fp = (hFILE_mem *) hfile_init(sizeof (hFILE_mem), mode, 0);
|
|
511 if (fp == NULL) return NULL;
|
|
512
|
|
513 fp->buffer = data;
|
|
514 fp->length = strlen(data);
|
|
515 fp->pos = 0;
|
|
516 fp->base.backend = &mem_backend;
|
|
517 return &fp->base;
|
|
518 }
|
|
519
|
|
520
|
|
521 /******************************
|
|
522 * hopen() backend dispatcher *
|
|
523 ******************************/
|
|
524
|
|
525 hFILE *hopen(const char *fname, const char *mode)
|
|
526 {
|
|
527 if (strncmp(fname, "http://", 7) == 0 ||
|
|
528 strncmp(fname, "ftp://", 6) == 0) return hopen_net(fname, mode);
|
|
529 #ifdef HAVE_IRODS
|
|
530 else if (strncmp(fname, "irods:", 6) == 0) return hopen_irods(fname, mode);
|
|
531 #endif
|
|
532 else if (strncmp(fname, "data:", 5) == 0) return hopen_mem(fname + 5, mode);
|
|
533 else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
|
|
534 else return hopen_fd(fname, mode);
|
|
535 }
|
|
536
|
|
537 int hisremote(const char *fname)
|
|
538 {
|
|
539 // FIXME Make a new backend entry to return this
|
|
540 if (strncmp(fname, "http://", 7) == 0 ||
|
|
541 strncmp(fname, "https://", 8) == 0 ||
|
|
542 strncmp(fname, "ftp://", 6) == 0) return 1;
|
|
543 #ifdef HAVE_IRODS
|
|
544 else if (strncmp(fname, "irods:", 6) == 0) return 1;
|
|
545 #endif
|
|
546 else return 0;
|
|
547 }
|