0
|
1 #include <string.h>
|
|
2 #include <stdlib.h>
|
|
3 #include "glf.h"
|
|
4
|
|
5 #ifdef _NO_BGZF
|
|
6 // then alias bgzf_*() functions
|
|
7 #endif
|
|
8
|
|
9 static int glf3_is_BE = 0;
|
|
10
|
|
11 static inline uint32_t bam_swap_endian_4(uint32_t v)
|
|
12 {
|
|
13 v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
|
|
14 return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
|
|
15 }
|
|
16
|
|
17 static inline uint16_t bam_swap_endian_2(uint16_t v)
|
|
18 {
|
|
19 return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
|
|
20 }
|
|
21
|
|
22 static inline int bam_is_big_endian()
|
|
23 {
|
|
24 long one= 1;
|
|
25 return !(*((char *)(&one)));
|
|
26 }
|
|
27
|
|
28 glf3_header_t *glf3_header_init()
|
|
29 {
|
|
30 glf3_is_BE = bam_is_big_endian();
|
|
31 return (glf3_header_t*)calloc(1, sizeof(glf3_header_t));
|
|
32 }
|
|
33
|
|
34 glf3_header_t *glf3_header_read(glfFile fp)
|
|
35 {
|
|
36 glf3_header_t *h;
|
|
37 char magic[4];
|
|
38 h = glf3_header_init();
|
|
39 bgzf_read(fp, magic, 4);
|
|
40 if (strncmp(magic, "GLF\3", 4)) {
|
|
41 fprintf(stderr, "[glf3_header_read] invalid magic.\n");
|
|
42 glf3_header_destroy(h);
|
|
43 return 0;
|
|
44 }
|
|
45 bgzf_read(fp, &h->l_text, 4);
|
|
46 if (glf3_is_BE) h->l_text = bam_swap_endian_4(h->l_text);
|
|
47 if (h->l_text) {
|
|
48 h->text = (uint8_t*)calloc(h->l_text + 1, 1);
|
|
49 bgzf_read(fp, h->text, h->l_text);
|
|
50 }
|
|
51 return h;
|
|
52 }
|
|
53
|
|
54 void glf3_header_write(glfFile fp, const glf3_header_t *h)
|
|
55 {
|
|
56 int32_t x;
|
|
57 bgzf_write(fp, "GLF\3", 4);
|
|
58 x = glf3_is_BE? bam_swap_endian_4(h->l_text) : h->l_text;
|
|
59 bgzf_write(fp, &x, 4);
|
|
60 if (h->l_text) bgzf_write(fp, h->text, h->l_text);
|
|
61 }
|
|
62
|
|
63 void glf3_header_destroy(glf3_header_t *h)
|
|
64 {
|
|
65 free(h->text);
|
|
66 free(h);
|
|
67 }
|
|
68
|
|
69 char *glf3_ref_read(glfFile fp, int *len)
|
|
70 {
|
|
71 int32_t n, x;
|
|
72 char *str;
|
|
73 *len = 0;
|
|
74 if (bgzf_read(fp, &n, 4) != 4) return 0;
|
|
75 if (glf3_is_BE) n = bam_swap_endian_4(n);
|
|
76 if (n < 0) {
|
|
77 fprintf(stderr, "[glf3_ref_read] invalid reference name length: %d.\n", n);
|
|
78 return 0;
|
|
79 }
|
|
80 str = (char*)calloc(n + 1, 1); // not necesarily n+1 in fact
|
|
81 x = bgzf_read(fp, str, n);
|
|
82 x += bgzf_read(fp, len, 4);
|
|
83 if (x != n + 4) {
|
|
84 free(str); *len = -1; return 0; // truncated
|
|
85 }
|
|
86 if (glf3_is_BE) *len = bam_swap_endian_4(*len);
|
|
87 return str;
|
|
88 }
|
|
89
|
|
90 void glf3_ref_write(glfFile fp, const char *str, int len)
|
|
91 {
|
|
92 int32_t m, n = strlen(str) + 1;
|
|
93 m = glf3_is_BE? bam_swap_endian_4(n) : n;
|
|
94 bgzf_write(fp, &m, 4);
|
|
95 bgzf_write(fp, str, n);
|
|
96 if (glf3_is_BE) len = bam_swap_endian_4(len);
|
|
97 bgzf_write(fp, &len, 4);
|
|
98 }
|
|
99
|
|
100 void glf3_view1(const char *ref_name, const glf3_t *g3, int pos)
|
|
101 {
|
|
102 int j;
|
|
103 if (g3->rtype == GLF3_RTYPE_END) return;
|
|
104 printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, pos + 1,
|
|
105 g3->rtype == GLF3_RTYPE_INDEL? '*' : "XACMGRSVTWYHKDBN"[g3->ref_base],
|
|
106 g3->depth, g3->rms_mapQ, g3->min_lk);
|
|
107 if (g3->rtype == GLF3_RTYPE_SUB)
|
|
108 for (j = 0; j != 10; ++j) printf("\t%d", g3->lk[j]);
|
|
109 else {
|
|
110 printf("\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t", g3->lk[0], g3->lk[1], g3->lk[2], g3->indel_len[0], g3->indel_len[1],
|
|
111 g3->indel_len[0]? g3->indel_seq[0] : "*", g3->indel_len[1]? g3->indel_seq[1] : "*");
|
|
112 }
|
|
113 printf("\n");
|
|
114 }
|
|
115
|
|
116 int glf3_write1(glfFile fp, const glf3_t *g3)
|
|
117 {
|
|
118 int r;
|
|
119 uint8_t c;
|
|
120 uint32_t y[2];
|
|
121 c = g3->rtype<<4 | g3->ref_base;
|
|
122 r = bgzf_write(fp, &c, 1);
|
|
123 if (g3->rtype == GLF3_RTYPE_END) return r;
|
|
124 y[0] = g3->offset;
|
|
125 y[1] = g3->min_lk<<24 | g3->depth;
|
|
126 if (glf3_is_BE) {
|
|
127 y[0] = bam_swap_endian_4(y[0]);
|
|
128 y[1] = bam_swap_endian_4(y[1]);
|
|
129 }
|
|
130 r += bgzf_write(fp, y, 8);
|
|
131 r += bgzf_write(fp, &g3->rms_mapQ, 1);
|
|
132 if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_write(fp, g3->lk, 10);
|
|
133 else {
|
|
134 int16_t x[2];
|
|
135 r += bgzf_write(fp, g3->lk, 3);
|
|
136 x[0] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[0]) : g3->indel_len[0];
|
|
137 x[1] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[1]) : g3->indel_len[1];
|
|
138 r += bgzf_write(fp, x, 4);
|
|
139 if (g3->indel_len[0]) r += bgzf_write(fp, g3->indel_seq[0], abs(g3->indel_len[0]));
|
|
140 if (g3->indel_len[1]) r += bgzf_write(fp, g3->indel_seq[1], abs(g3->indel_len[1]));
|
|
141 }
|
|
142 return r;
|
|
143 }
|
|
144
|
|
145 #ifndef kv_roundup32
|
|
146 #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
|
147 #endif
|
|
148
|
|
149 int glf3_read1(glfFile fp, glf3_t *g3)
|
|
150 {
|
|
151 int r;
|
|
152 uint8_t c;
|
|
153 uint32_t y[2];
|
|
154 r = bgzf_read(fp, &c, 1);
|
|
155 if (r == 0) return 0;
|
|
156 g3->ref_base = c & 0xf;
|
|
157 g3->rtype = c>>4;
|
|
158 if (g3->rtype == GLF3_RTYPE_END) return r;
|
|
159 r += bgzf_read(fp, y, 8);
|
|
160 if (glf3_is_BE) {
|
|
161 y[0] = bam_swap_endian_4(y[0]);
|
|
162 y[1] = bam_swap_endian_4(y[1]);
|
|
163 }
|
|
164 g3->offset = y[0];
|
|
165 g3->min_lk = y[1]>>24;
|
|
166 g3->depth = y[1]<<8>>8;
|
|
167 r += bgzf_read(fp, &g3->rms_mapQ, 1);
|
|
168 if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_read(fp, g3->lk, 10);
|
|
169 else {
|
|
170 int16_t x[2], max;
|
|
171 r += bgzf_read(fp, g3->lk, 3);
|
|
172 r += bgzf_read(fp, x, 4);
|
|
173 if (glf3_is_BE) {
|
|
174 x[0] = bam_swap_endian_2(x[0]);
|
|
175 x[1] = bam_swap_endian_2(x[1]);
|
|
176 }
|
|
177 g3->indel_len[0] = x[0];
|
|
178 g3->indel_len[1] = x[1];
|
|
179 x[0] = abs(x[0]); x[1] = abs(x[1]);
|
|
180 max = (x[0] > x[1]? x[0] : x[1]) + 1;
|
|
181 if (g3->max_len < max) {
|
|
182 g3->max_len = max;
|
|
183 kv_roundup32(g3->max_len);
|
|
184 g3->indel_seq[0] = (char*)realloc(g3->indel_seq[0], g3->max_len);
|
|
185 g3->indel_seq[1] = (char*)realloc(g3->indel_seq[1], g3->max_len);
|
|
186 }
|
|
187 r += bgzf_read(fp, g3->indel_seq[0], x[0]);
|
|
188 r += bgzf_read(fp, g3->indel_seq[1], x[1]);
|
|
189 g3->indel_seq[0][x[0]] = g3->indel_seq[1][x[1]] = 0;
|
|
190 }
|
|
191 return r;
|
|
192 }
|
|
193
|
|
194 void glf3_view(glfFile fp)
|
|
195 {
|
|
196 glf3_header_t *h;
|
|
197 char *name;
|
|
198 glf3_t *g3;
|
|
199 int len;
|
|
200 h = glf3_header_read(fp);
|
|
201 g3 = glf3_init1();
|
|
202 while ((name = glf3_ref_read(fp, &len)) != 0) {
|
|
203 int pos = 0;
|
|
204 while (glf3_read1(fp, g3) && g3->rtype != GLF3_RTYPE_END) {
|
|
205 pos += g3->offset;
|
|
206 glf3_view1(name, g3, pos);
|
|
207 }
|
|
208 free(name);
|
|
209 }
|
|
210 glf3_header_destroy(h);
|
|
211 glf3_destroy1(g3);
|
|
212 }
|
|
213
|
|
214 int glf3_view_main(int argc, char *argv[])
|
|
215 {
|
|
216 glfFile fp;
|
|
217 if (argc == 1) {
|
|
218 fprintf(stderr, "Usage: glfview <in.glf>\n");
|
|
219 return 1;
|
|
220 }
|
|
221 fp = (strcmp(argv[1], "-") == 0)? bgzf_fdopen(fileno(stdin), "r") : bgzf_open(argv[1], "r");
|
|
222 if (fp == 0) {
|
|
223 fprintf(stderr, "Fail to open file '%s'\n", argv[1]);
|
|
224 return 1;
|
|
225 }
|
|
226 glf3_view(fp);
|
|
227 bgzf_close(fp);
|
|
228 return 0;
|
|
229 }
|
|
230
|
|
231 #ifdef GLFVIEW_MAIN
|
|
232 int main(int argc, char *argv[])
|
|
233 {
|
|
234 return glf3_view_main(argc, argv);
|
|
235 }
|
|
236 #endif
|