0
|
1 /* The MIT License
|
|
2
|
|
3 Copyright (c) 2008, by Heng Li <lh3@sanger.ac.uk>
|
|
4
|
|
5 Permission is hereby granted, free of charge, to any person obtaining
|
|
6 a copy of this software and associated documentation files (the
|
|
7 "Software"), to deal in the Software without restriction, including
|
|
8 without limitation the rights to use, copy, modify, merge, publish,
|
|
9 distribute, sublicense, and/or sell copies of the Software, and to
|
|
10 permit persons to whom the Software is furnished to do so, subject to
|
|
11 the following conditions:
|
|
12
|
|
13 The above copyright notice and this permission notice shall be
|
|
14 included in all copies or substantial portions of the Software.
|
|
15
|
|
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23 SOFTWARE.
|
|
24 */
|
|
25
|
|
26 #ifndef AC_KSEQ_H
|
|
27 #define AC_KSEQ_H
|
|
28
|
|
29 #include <ctype.h>
|
|
30 #include <string.h>
|
|
31 #include <stdlib.h>
|
|
32
|
|
33 #define __KS_TYPE(type_t) \
|
|
34 typedef struct __kstream_t { \
|
|
35 char *buf; \
|
|
36 int begin, end, is_eof; \
|
|
37 type_t f; \
|
|
38 } kstream_t;
|
|
39
|
|
40 #define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
|
|
41 #define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
|
|
42
|
|
43 #define __KS_BASIC(type_t, __bufsize) \
|
|
44 static inline kstream_t *ks_init(type_t f) \
|
|
45 { \
|
|
46 kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
|
|
47 ks->f = f; \
|
|
48 ks->buf = (char*)malloc(__bufsize); \
|
|
49 return ks; \
|
|
50 } \
|
|
51 static inline void ks_destroy(kstream_t *ks) \
|
|
52 { \
|
|
53 if (ks) { \
|
|
54 free(ks->buf); \
|
|
55 free(ks); \
|
|
56 } \
|
|
57 }
|
|
58
|
|
59 #define __KS_GETC(__read, __bufsize) \
|
|
60 static inline int ks_getc(kstream_t *ks) \
|
|
61 { \
|
|
62 if (ks->is_eof && ks->begin >= ks->end) return -1; \
|
|
63 if (ks->begin >= ks->end) { \
|
|
64 ks->begin = 0; \
|
|
65 ks->end = __read(ks->f, ks->buf, __bufsize); \
|
|
66 if (ks->end < __bufsize) ks->is_eof = 1; \
|
|
67 if (ks->end == 0) return -1; \
|
|
68 } \
|
|
69 return (int)ks->buf[ks->begin++]; \
|
|
70 }
|
|
71
|
|
72 #ifndef KSTRING_T
|
|
73 #define KSTRING_T kstring_t
|
|
74 typedef struct __kstring_t {
|
|
75 size_t l, m;
|
|
76 char *s;
|
|
77 } kstring_t;
|
|
78 #endif
|
|
79
|
|
80 #ifndef kroundup32
|
|
81 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
|
82 #endif
|
|
83
|
|
84 #define __KS_GETUNTIL(__read, __bufsize) \
|
|
85 static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
|
|
86 { \
|
|
87 if (dret) *dret = 0; \
|
|
88 str->l = 0; \
|
|
89 if (ks->begin >= ks->end && ks->is_eof) return -1; \
|
|
90 for (;;) { \
|
|
91 int i; \
|
|
92 if (ks->begin >= ks->end) { \
|
|
93 if (!ks->is_eof) { \
|
|
94 ks->begin = 0; \
|
|
95 ks->end = __read(ks->f, ks->buf, __bufsize); \
|
|
96 if (ks->end < __bufsize) ks->is_eof = 1; \
|
|
97 if (ks->end == 0) break; \
|
|
98 } else break; \
|
|
99 } \
|
|
100 if (delimiter) { \
|
|
101 for (i = ks->begin; i < ks->end; ++i) \
|
|
102 if (ks->buf[i] == delimiter) break; \
|
|
103 } else { \
|
|
104 for (i = ks->begin; i < ks->end; ++i) \
|
|
105 if (isspace(ks->buf[i])) break; \
|
|
106 } \
|
|
107 if (str->m - str->l < i - ks->begin + 1) { \
|
|
108 str->m = str->l + (i - ks->begin) + 1; \
|
|
109 kroundup32(str->m); \
|
|
110 str->s = (char*)realloc(str->s, str->m); \
|
|
111 } \
|
|
112 memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
|
|
113 str->l = str->l + (i - ks->begin); \
|
|
114 ks->begin = i + 1; \
|
|
115 if (i < ks->end) { \
|
|
116 if (dret) *dret = ks->buf[i]; \
|
|
117 break; \
|
|
118 } \
|
|
119 } \
|
|
120 str->s[str->l] = '\0'; \
|
|
121 return str->l; \
|
|
122 }
|
|
123
|
|
124 #define KSTREAM_INIT(type_t, __read, __bufsize) \
|
|
125 __KS_TYPE(type_t) \
|
|
126 __KS_BASIC(type_t, __bufsize) \
|
|
127 __KS_GETC(__read, __bufsize) \
|
|
128 __KS_GETUNTIL(__read, __bufsize)
|
|
129
|
|
130 #define __KSEQ_BASIC(type_t) \
|
|
131 static inline kseq_t *kseq_init(type_t fd) \
|
|
132 { \
|
|
133 kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
|
|
134 s->f = ks_init(fd); \
|
|
135 return s; \
|
|
136 } \
|
|
137 static inline void kseq_rewind(kseq_t *ks) \
|
|
138 { \
|
|
139 ks->last_char = 0; \
|
|
140 ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
|
|
141 } \
|
|
142 static inline void kseq_destroy(kseq_t *ks) \
|
|
143 { \
|
|
144 if (!ks) return; \
|
|
145 free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
|
|
146 ks_destroy(ks->f); \
|
|
147 free(ks); \
|
|
148 }
|
|
149
|
|
150 /* Return value:
|
|
151 >=0 length of the sequence (normal)
|
|
152 -1 end-of-file
|
|
153 -2 truncated quality string
|
|
154 */
|
|
155 #define __KSEQ_READ \
|
|
156 static int kseq_read(kseq_t *seq) \
|
|
157 { \
|
|
158 int c; \
|
|
159 kstream_t *ks = seq->f; \
|
|
160 if (seq->last_char == 0) { /* then jump to the next header line */ \
|
|
161 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
|
|
162 if (c == -1) return -1; /* end of file */ \
|
|
163 seq->last_char = c; \
|
|
164 } /* the first header char has been read */ \
|
|
165 seq->comment.l = seq->seq.l = seq->qual.l = 0; \
|
|
166 if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; \
|
|
167 if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); \
|
|
168 while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
|
|
169 if (isgraph(c)) { /* printable non-space character */ \
|
|
170 if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
|
|
171 seq->seq.m = seq->seq.l + 2; \
|
|
172 kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
|
|
173 seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
|
|
174 } \
|
|
175 seq->seq.s[seq->seq.l++] = (char)c; \
|
|
176 } \
|
|
177 } \
|
|
178 if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
|
|
179 seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
|
|
180 if (c != '+') return seq->seq.l; /* FASTA */ \
|
|
181 if (seq->qual.m < seq->seq.m) { /* allocate enough memory */ \
|
|
182 seq->qual.m = seq->seq.m; \
|
|
183 seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
|
|
184 } \
|
|
185 while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
|
|
186 if (c == -1) return -2; /* we should not stop here */ \
|
|
187 while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l) \
|
|
188 if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c; \
|
|
189 seq->qual.s[seq->qual.l] = 0; /* null terminated string */ \
|
|
190 seq->last_char = 0; /* we have not come to the next header line */ \
|
|
191 if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
|
|
192 return seq->seq.l; \
|
|
193 }
|
|
194
|
|
195 #define __KSEQ_TYPE(type_t) \
|
|
196 typedef struct { \
|
|
197 kstring_t name, comment, seq, qual; \
|
|
198 int last_char; \
|
|
199 kstream_t *f; \
|
|
200 } kseq_t;
|
|
201
|
|
202 #define KSEQ_INIT(type_t, __read) \
|
|
203 KSTREAM_INIT(type_t, __read, 4096) \
|
|
204 __KSEQ_TYPE(type_t) \
|
|
205 __KSEQ_BASIC(type_t) \
|
|
206 __KSEQ_READ
|
|
207
|
|
208 #endif
|