comparison SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/samtools-0.1.6/bam_aux.c @ 0:74f5ea818cea

Uploaded
author ryanmorin
date Wed, 12 Oct 2011 19:50:38 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:74f5ea818cea
1 #include <ctype.h>
2 #include "bam.h"
3 #include "khash.h"
4 typedef char *str_p;
5 KHASH_MAP_INIT_STR(s, int)
6 KHASH_MAP_INIT_STR(r2l, str_p)
7
8 void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data)
9 {
10 int ori_len = b->data_len;
11 b->data_len += 3 + len;
12 b->l_aux += 3 + len;
13 if (b->m_data < b->data_len) {
14 b->m_data = b->data_len;
15 kroundup32(b->m_data);
16 b->data = (uint8_t*)realloc(b->data, b->m_data);
17 }
18 b->data[ori_len] = tag[0]; b->data[ori_len + 1] = tag[1];
19 b->data[ori_len + 2] = type;
20 memcpy(b->data + ori_len + 3, data, len);
21 }
22
23 uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2])
24 {
25 return bam_aux_get(b, tag);
26 }
27
28 #define __skip_tag(s) do { \
29 int type = toupper(*(s)); \
30 ++(s); \
31 if (type == 'C' || type == 'A') ++(s); \
32 else if (type == 'S') (s) += 2; \
33 else if (type == 'I' || type == 'F') (s) += 4; \
34 else if (type == 'D') (s) += 8; \
35 else if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \
36 } while (0)
37
38 uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
39 {
40 uint8_t *s;
41 int y = tag[0]<<8 | tag[1];
42 s = bam1_aux(b);
43 while (s < b->data + b->data_len) {
44 int x = (int)s[0]<<8 | s[1];
45 s += 2;
46 if (x == y) return s;
47 __skip_tag(s);
48 }
49 return 0;
50 }
51 // s MUST BE returned by bam_aux_get()
52 int bam_aux_del(bam1_t *b, uint8_t *s)
53 {
54 uint8_t *p, *aux;
55 aux = bam1_aux(b);
56 p = s - 2;
57 __skip_tag(s);
58 memmove(p, s, b->l_aux - (s - aux));
59 b->data_len -= s - p;
60 b->l_aux -= s - p;
61 return 0;
62 }
63
64 void bam_init_header_hash(bam_header_t *header)
65 {
66 if (header->hash == 0) {
67 int ret, i;
68 khiter_t iter;
69 khash_t(s) *h;
70 header->hash = h = kh_init(s);
71 for (i = 0; i < header->n_targets; ++i) {
72 iter = kh_put(s, h, header->target_name[i], &ret);
73 kh_value(h, iter) = i;
74 }
75 }
76 }
77
78 void bam_destroy_header_hash(bam_header_t *header)
79 {
80 if (header->hash)
81 kh_destroy(s, (khash_t(s)*)header->hash);
82 }
83
84 int32_t bam_get_tid(const bam_header_t *header, const char *seq_name)
85 {
86 khint_t k;
87 khash_t(s) *h = (khash_t(s)*)header->hash;
88 k = kh_get(s, h, seq_name);
89 return k == kh_end(h)? -1 : kh_value(h, k);
90 }
91
92 int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end)
93 {
94 char *s, *p;
95 int i, l, k;
96 khiter_t iter;
97 khash_t(s) *h;
98
99 bam_init_header_hash(header);
100 h = (khash_t(s)*)header->hash;
101
102 l = strlen(str);
103 p = s = (char*)malloc(l+1);
104 /* squeeze out "," */
105 for (i = k = 0; i != l; ++i)
106 if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i];
107 s[k] = 0;
108 for (i = 0; i != k; ++i) if (s[i] == ':') break;
109 s[i] = 0;
110 iter = kh_get(s, h, s); /* get the ref_id */
111 if (iter == kh_end(h)) { // name not found
112 *ref_id = -1; free(s);
113 return -1;
114 }
115 *ref_id = kh_value(h, iter);
116 if (i == k) { /* dump the whole sequence */
117 *begin = 0; *end = 1<<29; free(s);
118 return -1;
119 }
120 for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break;
121 *begin = atoi(p);
122 if (i < k) {
123 p = s + i + 1;
124 *end = atoi(p);
125 } else *end = 1<<29;
126 if (*begin > 0) --*begin;
127 free(s);
128 if (*begin > *end) {
129 fprintf(stderr, "[bam_parse_region] invalid region.\n");
130 return -1;
131 }
132 return 0;
133 }
134
135 int32_t bam_aux2i(const uint8_t *s)
136 {
137 int type;
138 if (s == 0) return 0;
139 type = *s++;
140 if (type == 'c') return (int32_t)*(int8_t*)s;
141 else if (type == 'C') return (int32_t)*(uint8_t*)s;
142 else if (type == 's') return (int32_t)*(int16_t*)s;
143 else if (type == 'S') return (int32_t)*(uint16_t*)s;
144 else if (type == 'i' || type == 'I') return *(int32_t*)s;
145 else return 0;
146 }
147
148 float bam_aux2f(const uint8_t *s)
149 {
150 int type;
151 type = *s++;
152 if (s == 0) return 0.0;
153 if (type == 'f') return *(float*)s;
154 else return 0.0;
155 }
156
157 double bam_aux2d(const uint8_t *s)
158 {
159 int type;
160 type = *s++;
161 if (s == 0) return 0.0;
162 if (type == 'd') return *(double*)s;
163 else return 0.0;
164 }
165
166 char bam_aux2A(const uint8_t *s)
167 {
168 int type;
169 type = *s++;
170 if (s == 0) return 0;
171 if (type == 'A') return *(char*)s;
172 else return 0;
173 }
174
175 char *bam_aux2Z(const uint8_t *s)
176 {
177 int type;
178 type = *s++;
179 if (s == 0) return 0;
180 if (type == 'Z' || type == 'H') return (char*)s;
181 else return 0;
182 }
183
184 /******************
185 * rg2lib related *
186 ******************/
187
188 int bam_strmap_put(void *rg2lib, const char *rg, const char *lib)
189 {
190 int ret;
191 khint_t k;
192 khash_t(r2l) *h = (khash_t(r2l)*)rg2lib;
193 char *key;
194 if (h == 0) return 1;
195 key = strdup(rg);
196 k = kh_put(r2l, h, key, &ret);
197 if (ret) kh_val(h, k) = strdup(lib);
198 else {
199 fprintf(stderr, "[bam_rg2lib_put] duplicated @RG ID: %s\n", rg);
200 free(key);
201 }
202 return 0;
203 }
204
205 const char *bam_strmap_get(const void *rg2lib, const char *rg)
206 {
207 const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib;
208 khint_t k;
209 if (h == 0) return 0;
210 k = kh_get(r2l, h, rg);
211 if (k != kh_end(h)) return (const char*)kh_val(h, k);
212 else return 0;
213 }
214
215 void *bam_strmap_dup(const void *rg2lib)
216 {
217 const khash_t(r2l) *h = (const khash_t(r2l)*)rg2lib;
218 khash_t(r2l) *g;
219 khint_t k, l;
220 int ret;
221 if (h == 0) return 0;
222 g = kh_init(r2l);
223 for (k = kh_begin(h); k < kh_end(h); ++k) {
224 if (kh_exist(h, k)) {
225 char *key = strdup(kh_key(h, k));
226 l = kh_put(r2l, g, key, &ret);
227 kh_val(g, l) = strdup(kh_val(h, k));
228 }
229 }
230 return g;
231 }
232
233 void *bam_strmap_init()
234 {
235 return (void*)kh_init(r2l);
236 }
237
238 void bam_strmap_destroy(void *rg2lib)
239 {
240 khash_t(r2l) *h = (khash_t(r2l)*)rg2lib;
241 khint_t k;
242 if (h == 0) return;
243 for (k = kh_begin(h); k < kh_end(h); ++k) {
244 if (kh_exist(h, k)) {
245 free((char*)kh_key(h, k)); free(kh_val(h, k));
246 }
247 }
248 kh_destroy(r2l, h);
249 }