annotate PsiCLASS-1.0.2/samtools-0.1.19/bgzf.h @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 /* The MIT License
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 2011, 2012 Attractive Chaos <attractor@live.co.uk>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 Permission is hereby granted, free of charge, to any person obtaining a copy
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 of this software and associated documentation files (the "Software"), to deal
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 in the Software without restriction, including without limitation the rights
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 copies of the Software, and to permit persons to whom the Software is
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 furnished to do so, subject to the following conditions:
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 The above copyright notice and this permission notice shall be included in
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 all copies or substantial portions of the Software.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 THE SOFTWARE.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 /* The BGZF library was originally written by Bob Handsaker from the Broad
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 * Institute. It was later improved by the SAMtools developers. */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 #ifndef __BGZF_H
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 #define __BGZF_H
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 #include <stdint.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 #include <zlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 #include <sys/types.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 #define BGZF_BLOCK_SIZE 0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 #define BGZF_MAX_BLOCK_SIZE 0x10000
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 #define BGZF_ERR_ZLIB 1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 #define BGZF_ERR_HEADER 2
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 #define BGZF_ERR_IO 4
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 #define BGZF_ERR_MISUSE 8
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 typedef struct {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 int errcode:16, is_write:2, compress_level:14;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 int cache_size;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 int block_length, block_offset;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 int64_t block_address;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 void *uncompressed_block, *compressed_block;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 void *cache; // a pointer to a hash table
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 void *mt; // only used for multi-threading
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 } BGZF;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 #ifndef KSTRING_T
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 #define KSTRING_T kstring_t
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 typedef struct __kstring_t {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 size_t l, m;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 char *s;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 } kstring_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 #ifdef __cplusplus
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 extern "C" {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 /******************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 * Basic routines *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 ******************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 * Open an existing file descriptor for reading or writing.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 * @param fd file descriptor
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 * @param mode mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 * the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 * @return BGZF file handler; 0 on error
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 BGZF* bgzf_dopen(int fd, const char *mode);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 #define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 * Open the specified file for reading or writing.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 BGZF* bgzf_open(const char* path, const char *mode);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 * Close the BGZF and free all associated resources.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 * @param fp BGZF file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 * @return 0 on success and -1 on error
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 int bgzf_close(BGZF *fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 * Read up to _length_ bytes from the file storing into _data_.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 * @param fp BGZF file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 * @param data data array to read into
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 * @param length size of data to read
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 * @return number of bytes actually read; 0 on end-of-file and -1 on error
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 * Write _length_ bytes from _data_ to the file.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 * @param fp BGZF file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 * @param data data array to write
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 * @param length size of data to write
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 * @return number of bytes actually written; -1 on error
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 * Write the data in the buffer to the file.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 int bgzf_flush(BGZF *fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 * Return a virtual file pointer to the current location in the file.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 * No interpetation of the value should be made, other than a subsequent
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 * call to bgzf_seek can be used to position the file at the same point.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 * Return value is non-negative on success.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 #define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 * Set the file to read from the location specified by _pos_.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 * @param fp BGZF file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 * @param pos virtual file offset returned by bgzf_tell()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 * @param whence must be SEEK_SET
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 * @return 0 on success and -1 on error
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 * Check if the BGZF end-of-file (EOF) marker is present
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 * @param fp BGZF file handler opened for reading
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 * @return 1 if EOF is present; 0 if not or on I/O error
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 int bgzf_check_EOF(BGZF *fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 * Check if a file is in the BGZF format
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 * @param fn file name
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 * @return 1 if _fn_ is BGZF; 0 if not or on I/O error
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 int bgzf_is_bgzf(const char *fn);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 /*********************
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 * Advanced routines *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 *********************/
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160 * Set the cache size. Only effective when compiled with -DBGZF_CACHE.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 * @param fp BGZF file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 * @param size size of cache in bytes; 0 to disable caching (default)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 void bgzf_set_cache_size(BGZF *fp, int size);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 * Flush the file if the remaining buffer size is smaller than _size_
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 int bgzf_flush_try(BGZF *fp, ssize_t size);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 * Read one byte from a BGZF file. It is faster than bgzf_read()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
174 * @param fp BGZF file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
175 * @return byte read; -1 on end-of-file or error
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
176 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
177 int bgzf_getc(BGZF *fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
178
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
179 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
180 * Read one line from a BGZF file. It is faster than bgzf_getc()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
181 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
182 * @param fp BGZF file handler
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
183 * @param delim delimitor
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
184 * @param str string to write to; must be initialized
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
185 * @return length of the string; 0 on end-of-file; negative on error
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
186 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
187 int bgzf_getline(BGZF *fp, int delim, kstring_t *str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
188
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
189 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
190 * Read the next BGZF block.
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
191 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
192 int bgzf_read_block(BGZF *fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
193
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
194 /**
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
195 * Enable multi-threading (only effective on writing)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
196 *
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
197 * @param fp BGZF file handler; must be opened for writing
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
198 * @param n_threads #threads used for writing
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
199 * @param n_sub_blks #blocks processed by each thread; a value 64-256 is recommended
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
200 */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
201 int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
202
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
203 #ifdef __cplusplus
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
204 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
205 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
206
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
207 #endif