Mercurial > repos > siyuan > prada
comparison pyPRADA_1.2/tools/samtools-0.1.16/glf.c @ 0:acc2ca1a3ba4
Uploaded
author | siyuan |
---|---|
date | Thu, 20 Feb 2014 00:44:58 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acc2ca1a3ba4 |
---|---|
1 #include <string.h> | |
2 #include <stdlib.h> | |
3 #include "glf.h" | |
4 | |
5 #ifdef _NO_BGZF | |
6 // then alias bgzf_*() functions | |
7 #endif | |
8 | |
9 static int glf3_is_BE = 0; | |
10 | |
11 static inline uint32_t bam_swap_endian_4(uint32_t v) | |
12 { | |
13 v = ((v & 0x0000FFFFU) << 16) | (v >> 16); | |
14 return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); | |
15 } | |
16 | |
17 static inline uint16_t bam_swap_endian_2(uint16_t v) | |
18 { | |
19 return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); | |
20 } | |
21 | |
22 static inline int bam_is_big_endian() | |
23 { | |
24 long one= 1; | |
25 return !(*((char *)(&one))); | |
26 } | |
27 | |
28 glf3_header_t *glf3_header_init() | |
29 { | |
30 glf3_is_BE = bam_is_big_endian(); | |
31 return (glf3_header_t*)calloc(1, sizeof(glf3_header_t)); | |
32 } | |
33 | |
34 glf3_header_t *glf3_header_read(glfFile fp) | |
35 { | |
36 glf3_header_t *h; | |
37 char magic[4]; | |
38 h = glf3_header_init(); | |
39 bgzf_read(fp, magic, 4); | |
40 if (strncmp(magic, "GLF\3", 4)) { | |
41 fprintf(stderr, "[glf3_header_read] invalid magic.\n"); | |
42 glf3_header_destroy(h); | |
43 return 0; | |
44 } | |
45 bgzf_read(fp, &h->l_text, 4); | |
46 if (glf3_is_BE) h->l_text = bam_swap_endian_4(h->l_text); | |
47 if (h->l_text) { | |
48 h->text = (uint8_t*)calloc(h->l_text + 1, 1); | |
49 bgzf_read(fp, h->text, h->l_text); | |
50 } | |
51 return h; | |
52 } | |
53 | |
54 void glf3_header_write(glfFile fp, const glf3_header_t *h) | |
55 { | |
56 int32_t x; | |
57 bgzf_write(fp, "GLF\3", 4); | |
58 x = glf3_is_BE? bam_swap_endian_4(h->l_text) : h->l_text; | |
59 bgzf_write(fp, &x, 4); | |
60 if (h->l_text) bgzf_write(fp, h->text, h->l_text); | |
61 } | |
62 | |
63 void glf3_header_destroy(glf3_header_t *h) | |
64 { | |
65 free(h->text); | |
66 free(h); | |
67 } | |
68 | |
69 char *glf3_ref_read(glfFile fp, int *len) | |
70 { | |
71 int32_t n, x; | |
72 char *str; | |
73 *len = 0; | |
74 if (bgzf_read(fp, &n, 4) != 4) return 0; | |
75 if (glf3_is_BE) n = bam_swap_endian_4(n); | |
76 if (n < 0) { | |
77 fprintf(stderr, "[glf3_ref_read] invalid reference name length: %d.\n", n); | |
78 return 0; | |
79 } | |
80 str = (char*)calloc(n + 1, 1); // not necesarily n+1 in fact | |
81 x = bgzf_read(fp, str, n); | |
82 x += bgzf_read(fp, len, 4); | |
83 if (x != n + 4) { | |
84 free(str); *len = -1; return 0; // truncated | |
85 } | |
86 if (glf3_is_BE) *len = bam_swap_endian_4(*len); | |
87 return str; | |
88 } | |
89 | |
90 void glf3_ref_write(glfFile fp, const char *str, int len) | |
91 { | |
92 int32_t m, n = strlen(str) + 1; | |
93 m = glf3_is_BE? bam_swap_endian_4(n) : n; | |
94 bgzf_write(fp, &m, 4); | |
95 bgzf_write(fp, str, n); | |
96 if (glf3_is_BE) len = bam_swap_endian_4(len); | |
97 bgzf_write(fp, &len, 4); | |
98 } | |
99 | |
100 void glf3_view1(const char *ref_name, const glf3_t *g3, int pos) | |
101 { | |
102 int j; | |
103 if (g3->rtype == GLF3_RTYPE_END) return; | |
104 printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, pos + 1, | |
105 g3->rtype == GLF3_RTYPE_INDEL? '*' : "XACMGRSVTWYHKDBN"[g3->ref_base], | |
106 g3->depth, g3->rms_mapQ, g3->min_lk); | |
107 if (g3->rtype == GLF3_RTYPE_SUB) | |
108 for (j = 0; j != 10; ++j) printf("\t%d", g3->lk[j]); | |
109 else { | |
110 printf("\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t", g3->lk[0], g3->lk[1], g3->lk[2], g3->indel_len[0], g3->indel_len[1], | |
111 g3->indel_len[0]? g3->indel_seq[0] : "*", g3->indel_len[1]? g3->indel_seq[1] : "*"); | |
112 } | |
113 printf("\n"); | |
114 } | |
115 | |
116 int glf3_write1(glfFile fp, const glf3_t *g3) | |
117 { | |
118 int r; | |
119 uint8_t c; | |
120 uint32_t y[2]; | |
121 c = g3->rtype<<4 | g3->ref_base; | |
122 r = bgzf_write(fp, &c, 1); | |
123 if (g3->rtype == GLF3_RTYPE_END) return r; | |
124 y[0] = g3->offset; | |
125 y[1] = g3->min_lk<<24 | g3->depth; | |
126 if (glf3_is_BE) { | |
127 y[0] = bam_swap_endian_4(y[0]); | |
128 y[1] = bam_swap_endian_4(y[1]); | |
129 } | |
130 r += bgzf_write(fp, y, 8); | |
131 r += bgzf_write(fp, &g3->rms_mapQ, 1); | |
132 if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_write(fp, g3->lk, 10); | |
133 else { | |
134 int16_t x[2]; | |
135 r += bgzf_write(fp, g3->lk, 3); | |
136 x[0] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[0]) : g3->indel_len[0]; | |
137 x[1] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[1]) : g3->indel_len[1]; | |
138 r += bgzf_write(fp, x, 4); | |
139 if (g3->indel_len[0]) r += bgzf_write(fp, g3->indel_seq[0], abs(g3->indel_len[0])); | |
140 if (g3->indel_len[1]) r += bgzf_write(fp, g3->indel_seq[1], abs(g3->indel_len[1])); | |
141 } | |
142 return r; | |
143 } | |
144 | |
145 #ifndef kv_roundup32 | |
146 #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) | |
147 #endif | |
148 | |
149 int glf3_read1(glfFile fp, glf3_t *g3) | |
150 { | |
151 int r; | |
152 uint8_t c; | |
153 uint32_t y[2]; | |
154 r = bgzf_read(fp, &c, 1); | |
155 if (r == 0) return 0; | |
156 g3->ref_base = c & 0xf; | |
157 g3->rtype = c>>4; | |
158 if (g3->rtype == GLF3_RTYPE_END) return r; | |
159 r += bgzf_read(fp, y, 8); | |
160 if (glf3_is_BE) { | |
161 y[0] = bam_swap_endian_4(y[0]); | |
162 y[1] = bam_swap_endian_4(y[1]); | |
163 } | |
164 g3->offset = y[0]; | |
165 g3->min_lk = y[1]>>24; | |
166 g3->depth = y[1]<<8>>8; | |
167 r += bgzf_read(fp, &g3->rms_mapQ, 1); | |
168 if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_read(fp, g3->lk, 10); | |
169 else { | |
170 int16_t x[2], max; | |
171 r += bgzf_read(fp, g3->lk, 3); | |
172 r += bgzf_read(fp, x, 4); | |
173 if (glf3_is_BE) { | |
174 x[0] = bam_swap_endian_2(x[0]); | |
175 x[1] = bam_swap_endian_2(x[1]); | |
176 } | |
177 g3->indel_len[0] = x[0]; | |
178 g3->indel_len[1] = x[1]; | |
179 x[0] = abs(x[0]); x[1] = abs(x[1]); | |
180 max = (x[0] > x[1]? x[0] : x[1]) + 1; | |
181 if (g3->max_len < max) { | |
182 g3->max_len = max; | |
183 kv_roundup32(g3->max_len); | |
184 g3->indel_seq[0] = (char*)realloc(g3->indel_seq[0], g3->max_len); | |
185 g3->indel_seq[1] = (char*)realloc(g3->indel_seq[1], g3->max_len); | |
186 } | |
187 r += bgzf_read(fp, g3->indel_seq[0], x[0]); | |
188 r += bgzf_read(fp, g3->indel_seq[1], x[1]); | |
189 g3->indel_seq[0][x[0]] = g3->indel_seq[1][x[1]] = 0; | |
190 } | |
191 return r; | |
192 } | |
193 | |
194 void glf3_view(glfFile fp) | |
195 { | |
196 glf3_header_t *h; | |
197 char *name; | |
198 glf3_t *g3; | |
199 int len; | |
200 h = glf3_header_read(fp); | |
201 g3 = glf3_init1(); | |
202 while ((name = glf3_ref_read(fp, &len)) != 0) { | |
203 int pos = 0; | |
204 while (glf3_read1(fp, g3) && g3->rtype != GLF3_RTYPE_END) { | |
205 pos += g3->offset; | |
206 glf3_view1(name, g3, pos); | |
207 } | |
208 free(name); | |
209 } | |
210 glf3_header_destroy(h); | |
211 glf3_destroy1(g3); | |
212 } | |
213 | |
214 int glf3_view_main(int argc, char *argv[]) | |
215 { | |
216 glfFile fp; | |
217 if (argc == 1) { | |
218 fprintf(stderr, "Usage: glfview <in.glf>\n"); | |
219 return 1; | |
220 } | |
221 fp = (strcmp(argv[1], "-") == 0)? bgzf_fdopen(fileno(stdin), "r") : bgzf_open(argv[1], "r"); | |
222 if (fp == 0) { | |
223 fprintf(stderr, "Fail to open file '%s'\n", argv[1]); | |
224 return 1; | |
225 } | |
226 glf3_view(fp); | |
227 bgzf_close(fp); | |
228 return 0; | |
229 } | |
230 | |
231 #ifdef GLFVIEW_MAIN | |
232 int main(int argc, char *argv[]) | |
233 { | |
234 return glf3_view_main(argc, argv); | |
235 } | |
236 #endif |