Mercurial > repos > ryanmorin > nextgen_variant_identification
comparison SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/samtools-0.1.6/bam_aux.c @ 0:74f5ea818cea
Uploaded
author | ryanmorin |
---|---|
date | Wed, 12 Oct 2011 19:50:38 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:74f5ea818cea |
---|---|
1 #include <ctype.h> | |
2 #include "bam.h" | |
3 #include "khash.h" | |
4 typedef char *str_p; | |
5 KHASH_MAP_INIT_STR(s, int) | |
6 KHASH_MAP_INIT_STR(r2l, str_p) | |
7 | |
8 void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data) | |
9 { | |
10 int ori_len = b->data_len; | |
11 b->data_len += 3 + len; | |
12 b->l_aux += 3 + len; | |
13 if (b->m_data < b->data_len) { | |
14 b->m_data = b->data_len; | |
15 kroundup32(b->m_data); | |
16 b->data = (uint8_t*)realloc(b->data, b->m_data); | |
17 } | |
18 b->data[ori_len] = tag[0]; b->data[ori_len + 1] = tag[1]; | |
19 b->data[ori_len + 2] = type; | |
20 memcpy(b->data + ori_len + 3, data, len); | |
21 } | |
22 | |
23 uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) | |
24 { | |
25 return bam_aux_get(b, tag); | |
26 } | |
27 | |
28 #define __skip_tag(s) do { \ | |
29 int type = toupper(*(s)); \ | |
30 ++(s); \ | |
31 if (type == 'C' || type == 'A') ++(s); \ | |
32 else if (type == 'S') (s) += 2; \ | |
33 else if (type == 'I' || type == 'F') (s) += 4; \ | |
34 else if (type == 'D') (s) += 8; \ | |
35 else if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \ | |
36 } while (0) | |
37 | |
38 uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) | |
39 { | |
40 uint8_t *s; | |
41 int y = tag[0]<<8 | tag[1]; | |
42 s = bam1_aux(b); | |
43 while (s < b->data + b->data_len) { | |
44 int x = (int)s[0]<<8 | s[1]; | |
45 s += 2; | |
46 if (x == y) return s; | |
47 __skip_tag(s); | |
48 } | |
49 return 0; | |
50 } | |
51 // s MUST BE returned by bam_aux_get() | |
52 int bam_aux_del(bam1_t *b, uint8_t *s) | |
53 { | |
54 uint8_t *p, *aux; | |
55 aux = bam1_aux(b); | |
56 p = s - 2; | |
57 __skip_tag(s); | |
58 memmove(p, s, b->l_aux - (s - aux)); | |
59 b->data_len -= s - p; | |
60 b->l_aux -= s - p; | |
61 return 0; | |
62 } | |
63 | |
64 void bam_init_header_hash(bam_header_t *header) | |
65 { | |
66 if (header->hash == 0) { | |
67 int ret, i; | |
68 khiter_t iter; | |
69 khash_t(s) *h; | |
70 header->hash = h = kh_init(s); | |
71 for (i = 0; i < header->n_targets; ++i) { | |
72 iter = kh_put(s, h, header->target_name[i], &ret); | |
73 kh_value(h, iter) = i; | |
74 } | |
75 } | |
76 } | |
77 | |
78 void bam_destroy_header_hash(bam_header_t *header) | |
79 { | |
80 if (header->hash) | |
81 kh_destroy(s, (khash_t(s)*)header->hash); | |
82 } | |
83 | |
84 int32_t bam_get_tid(const bam_header_t *header, const char *seq_name) | |
85 { | |
86 khint_t k; | |
87 khash_t(s) *h = (khash_t(s)*)header->hash; | |
88 k = kh_get(s, h, seq_name); | |
89 return k == kh_end(h)? -1 : kh_value(h, k); | |
90 } | |
91 | |
92 int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end) | |
93 { | |
94 char *s, *p; | |
95 int i, l, k; | |
96 khiter_t iter; | |
97 khash_t(s) *h; | |
98 | |
99 bam_init_header_hash(header); | |
100 h = (khash_t(s)*)header->hash; | |
101 | |
102 l = strlen(str); | |
103 p = s = (char*)malloc(l+1); | |
104 /* squeeze out "," */ | |
105 for (i = k = 0; i != l; ++i) | |
106 if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i]; | |
107 s[k] = 0; | |
108 for (i = 0; i != k; ++i) if (s[i] == ':') break; | |
109 s[i] = 0; | |
110 iter = kh_get(s, h, s); /* get the ref_id */ | |
111 if (iter == kh_end(h)) { // name not found | |
112 *ref_id = -1; free(s); | |
113 return -1; | |
114 } | |
115 *ref_id = kh_value(h, iter); | |
116 if (i == k) { /* dump the whole sequence */ | |
117 *begin = 0; *end = 1<<29; free(s); | |
118 return -1; | |
119 } | |
120 for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break; | |
121 *begin = atoi(p); | |
122 if (i < k) { | |
123 p = s + i + 1; | |
124 *end = atoi(p); | |
125 } else *end = 1<<29; | |
126 if (*begin > 0) --*begin; | |
127 free(s); | |
128 if (*begin > *end) { | |
129 fprintf(stderr, "[bam_parse_region] invalid region.\n"); | |
130 return -1; | |
131 } | |
132 return 0; | |
133 } | |
134 | |
135 int32_t bam_aux2i(const uint8_t *s) | |
136 { | |
137 int type; | |
138 if (s == 0) return 0; | |
139 type = *s++; | |
140 if (type == 'c') return (int32_t)*(int8_t*)s; | |
141 else if (type == 'C') return (int32_t)*(uint8_t*)s; | |
142 else if (type == 's') return (int32_t)*(int16_t*)s; | |
143 else if (type == 'S') return (int32_t)*(uint16_t*)s; | |
144 else if (type == 'i' || type == 'I') return *(int32_t*)s; | |
145 else return 0; | |
146 } | |
147 | |
148 float bam_aux2f(const uint8_t *s) | |
149 { | |
150 int type; | |
151 type = *s++; | |
152 if (s == 0) return 0.0; | |
153 if (type == 'f') return *(float*)s; | |
154 else return 0.0; | |
155 } | |
156 | |
157 double bam_aux2d(const uint8_t *s) | |
158 { | |
159 int type; | |
160 type = *s++; | |
161 if (s == 0) return 0.0; | |
162 if (type == 'd') return *(double*)s; | |
163 else return 0.0; | |
164 } | |
165 | |
166 char bam_aux2A(const uint8_t *s) | |
167 { | |
168 int type; | |
169 type = *s++; | |
170 if (s == 0) return 0; | |
171 if (type == 'A') return *(char*)s; | |
172 else return 0; | |
173 } | |
174 | |
175 char *bam_aux2Z(const uint8_t *s) | |
176 { | |
177 int type; | |
178 type = *s++; | |
179 if (s == 0) return 0; | |
180 if (type == 'Z' || type == 'H') return (char*)s; | |
181 else return 0; | |
182 } | |
183 | |
184 /****************** | |
185 * rg2lib related * | |
186 ******************/ | |
187 | |
188 int bam_strmap_put(void *rg2lib, const char *rg, const char *lib) | |
189 { | |
190 int ret; | |
191 khint_t k; | |
192 khash_t(r2l) *h = (khash_t(r2l)*)rg2lib; | |
193 char *key; | |
194 if (h == 0) return 1; | |
195 key = strdup(rg); | |
196 k = kh_put(r2l, h, key, &ret); | |
197 if (ret) kh_val(h, k) = strdup(lib); | |
198 else { | |
199 fprintf(stderr, "[bam_rg2lib_put] duplicated @RG ID: %s\n", rg); | |
200 free(key); | |
201 } | |
202 return 0; | |
203 } | |
204 | |
205 const char *bam_strmap_get(const void *rg2lib, const char *rg) | |
206 { | |
207 const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib; | |
208 khint_t k; | |
209 if (h == 0) return 0; | |
210 k = kh_get(r2l, h, rg); | |
211 if (k != kh_end(h)) return (const char*)kh_val(h, k); | |
212 else return 0; | |
213 } | |
214 | |
215 void *bam_strmap_dup(const void *rg2lib) | |
216 { | |
217 const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib; | |
218 khash_t(r2l) *g; | |
219 khint_t k, l; | |
220 int ret; | |
221 if (h == 0) return 0; | |
222 g = kh_init(r2l); | |
223 for (k = kh_begin(h); k < kh_end(h); ++k) { | |
224 if (kh_exist(h, k)) { | |
225 char *key = strdup(kh_key(h, k)); | |
226 l = kh_put(r2l, g, key, &ret); | |
227 kh_val(g, l) = strdup(kh_val(h, k)); | |
228 } | |
229 } | |
230 return g; | |
231 } | |
232 | |
233 void *bam_strmap_init() | |
234 { | |
235 return (void*)kh_init(r2l); | |
236 } | |
237 | |
238 void bam_strmap_destroy(void *rg2lib) | |
239 { | |
240 khash_t(r2l) *h = (khash_t(r2l)*)rg2lib; | |
241 khint_t k; | |
242 if (h == 0) return; | |
243 for (k = kh_begin(h); k < kh_end(h); ++k) { | |
244 if (kh_exist(h, k)) { | |
245 free((char*)kh_key(h, k)); free(kh_val(h, k)); | |
246 } | |
247 } | |
248 kh_destroy(r2l, h); | |
249 } |