comparison bwa-0.6.2/khash.h @ 2:a294fbfcb1db draft default tip

Uploaded BWA
author ashvark
date Fri, 18 Jul 2014 07:55:59 -0400
parents dd1186b11b3b
children
comparison
equal deleted inserted replaced
1:a9636dc1e99a 2:a294fbfcb1db
1 /* The MIT License
2
3 Copyright (c) 2008, 2009 by attractor <attractor@live.co.uk>
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24 */
25
26 /*
27 An example:
28
29 #include "khash.h"
30 KHASH_MAP_INIT_INT(32, char)
31 int main() {
32 int ret, is_missing;
33 khiter_t k;
34 khash_t(32) *h = kh_init(32);
35 k = kh_put(32, h, 5, &ret);
36 if (!ret) kh_del(32, h, k);
37 kh_value(h, k) = 10;
38 k = kh_get(32, h, 10);
39 is_missing = (k == kh_end(h));
40 k = kh_get(32, h, 5);
41 kh_del(32, h, k);
42 for (k = kh_begin(h); k != kh_end(h); ++k)
43 if (kh_exist(h, k)) kh_value(h, k) = 1;
44 kh_destroy(32, h);
45 return 0;
46 }
47 */
48
49 /*
50 2009-09-26 (0.2.4):
51
52 * Improve portability
53
54 2008-09-19 (0.2.3):
55
56 * Corrected the example
57 * Improved interfaces
58
59 2008-09-11 (0.2.2):
60
61 * Improved speed a little in kh_put()
62
63 2008-09-10 (0.2.1):
64
65 * Added kh_clear()
66 * Fixed a compiling error
67
68 2008-09-02 (0.2.0):
69
70 * Changed to token concatenation which increases flexibility.
71
72 2008-08-31 (0.1.2):
73
74 * Fixed a bug in kh_get(), which has not been tested previously.
75
76 2008-08-31 (0.1.1):
77
78 * Added destructor
79 */
80
81
82 #ifndef __AC_KHASH_H
83 #define __AC_KHASH_H
84
85 /*!
86 @header
87
88 Generic hash table library.
89
90 @copyright Heng Li
91 */
92
93 #define AC_VERSION_KHASH_H "0.2.4"
94
95 #include <stdlib.h>
96 #include <string.h>
97 #include <limits.h>
98
99 /* compipler specific configuration */
100
101 #if UINT_MAX == 0xffffffffu
102 typedef unsigned int khint32_t;
103 #elif ULONG_MAX == 0xffffffffu
104 typedef unsigned long khint32_t;
105 #endif
106
107 #if ULONG_MAX == ULLONG_MAX
108 typedef unsigned long khint64_t;
109 #else
110 typedef unsigned long long khint64_t;
111 #endif
112
113 #ifdef _MSC_VER
114 #define inline __inline
115 #endif
116
117 typedef khint32_t khint_t;
118 typedef khint_t khiter_t;
119
120 #define __ac_HASH_PRIME_SIZE 32
121 static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
122 {
123 0ul, 3ul, 11ul, 23ul, 53ul,
124 97ul, 193ul, 389ul, 769ul, 1543ul,
125 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
126 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
127 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
128 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
129 3221225473ul, 4294967291ul
130 };
131
132 #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
133 #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
134 #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
135 #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
136 #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
137 #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
138 #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
139
140 static const double __ac_HASH_UPPER = 0.77;
141
142 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
143 typedef struct { \
144 khint_t n_buckets, size, n_occupied, upper_bound; \
145 khint32_t *flags; \
146 khkey_t *keys; \
147 khval_t *vals; \
148 } kh_##name##_t; \
149 static inline kh_##name##_t *kh_init_##name() { \
150 return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
151 } \
152 static inline void kh_destroy_##name(kh_##name##_t *h) \
153 { \
154 if (h) { \
155 free(h->keys); free(h->flags); \
156 free(h->vals); \
157 free(h); \
158 } \
159 } \
160 static inline void kh_clear_##name(kh_##name##_t *h) \
161 { \
162 if (h && h->flags) { \
163 memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \
164 h->size = h->n_occupied = 0; \
165 } \
166 } \
167 static inline khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
168 { \
169 if (h->n_buckets) { \
170 khint_t inc, k, i, last; \
171 k = __hash_func(key); i = k % h->n_buckets; \
172 inc = 1 + k % (h->n_buckets - 1); last = i; \
173 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
174 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
175 else i += inc; \
176 if (i == last) return h->n_buckets; \
177 } \
178 return __ac_iseither(h->flags, i)? h->n_buckets : i; \
179 } else return 0; \
180 } \
181 static inline void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
182 { \
183 khint32_t *new_flags = 0; \
184 khint_t j = 1; \
185 { \
186 khint_t t = __ac_HASH_PRIME_SIZE - 1; \
187 while (__ac_prime_list[t] > new_n_buckets) --t; \
188 new_n_buckets = __ac_prime_list[t+1]; \
189 if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
190 else { \
191 new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
192 memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
193 if (h->n_buckets < new_n_buckets) { \
194 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
195 if (kh_is_map) \
196 h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
197 } \
198 } \
199 } \
200 if (j) { \
201 for (j = 0; j != h->n_buckets; ++j) { \
202 if (__ac_iseither(h->flags, j) == 0) { \
203 khkey_t key = h->keys[j]; \
204 khval_t val; \
205 if (kh_is_map) val = h->vals[j]; \
206 __ac_set_isdel_true(h->flags, j); \
207 while (1) { \
208 khint_t inc, k, i; \
209 k = __hash_func(key); \
210 i = k % new_n_buckets; \
211 inc = 1 + k % (new_n_buckets - 1); \
212 while (!__ac_isempty(new_flags, i)) { \
213 if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
214 else i += inc; \
215 } \
216 __ac_set_isempty_false(new_flags, i); \
217 if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
218 { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
219 if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
220 __ac_set_isdel_true(h->flags, i); \
221 } else { \
222 h->keys[i] = key; \
223 if (kh_is_map) h->vals[i] = val; \
224 break; \
225 } \
226 } \
227 } \
228 } \
229 if (h->n_buckets > new_n_buckets) { \
230 h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
231 if (kh_is_map) \
232 h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
233 } \
234 free(h->flags); \
235 h->flags = new_flags; \
236 h->n_buckets = new_n_buckets; \
237 h->n_occupied = h->size; \
238 h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
239 } \
240 } \
241 static inline khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
242 { \
243 khint_t x; \
244 if (h->n_occupied >= h->upper_bound) { \
245 if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
246 else kh_resize_##name(h, h->n_buckets + 1); \
247 } \
248 { \
249 khint_t inc, k, i, site, last; \
250 x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
251 if (__ac_isempty(h->flags, i)) x = i; \
252 else { \
253 inc = 1 + k % (h->n_buckets - 1); last = i; \
254 while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
255 if (__ac_isdel(h->flags, i)) site = i; \
256 if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
257 else i += inc; \
258 if (i == last) { x = site; break; } \
259 } \
260 if (x == h->n_buckets) { \
261 if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
262 else x = i; \
263 } \
264 } \
265 } \
266 if (__ac_isempty(h->flags, x)) { \
267 h->keys[x] = key; \
268 __ac_set_isboth_false(h->flags, x); \
269 ++h->size; ++h->n_occupied; \
270 *ret = 1; \
271 } else if (__ac_isdel(h->flags, x)) { \
272 h->keys[x] = key; \
273 __ac_set_isboth_false(h->flags, x); \
274 ++h->size; \
275 *ret = 2; \
276 } else *ret = 0; \
277 return x; \
278 } \
279 static inline void kh_del_##name(kh_##name##_t *h, khint_t x) \
280 { \
281 if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
282 __ac_set_isdel_true(h->flags, x); \
283 --h->size; \
284 } \
285 }
286
287 /* --- BEGIN OF HASH FUNCTIONS --- */
288
289 /*! @function
290 @abstract Integer hash function
291 @param key The integer [khint32_t]
292 @return The hash value [khint_t]
293 */
294 #define kh_int_hash_func(key) (khint32_t)(key)
295 /*! @function
296 @abstract Integer comparison function
297 */
298 #define kh_int_hash_equal(a, b) ((a) == (b))
299 /*! @function
300 @abstract 64-bit integer hash function
301 @param key The integer [khint64_t]
302 @return The hash value [khint_t]
303 */
304 #define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
305 /*! @function
306 @abstract 64-bit integer comparison function
307 */
308 #define kh_int64_hash_equal(a, b) ((a) == (b))
309 /*! @function
310 @abstract const char* hash function
311 @param s Pointer to a null terminated string
312 @return The hash value
313 */
314 static inline khint_t __ac_X31_hash_string(const char *s)
315 {
316 khint_t h = *s;
317 if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
318 return h;
319 }
320 /*! @function
321 @abstract Another interface to const char* hash function
322 @param key Pointer to a null terminated string [const char*]
323 @return The hash value [khint_t]
324 */
325 #define kh_str_hash_func(key) __ac_X31_hash_string(key)
326 /*! @function
327 @abstract Const char* comparison function
328 */
329 #define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
330
331 /* --- END OF HASH FUNCTIONS --- */
332
333 /* Other necessary macros... */
334
335 /*!
336 @abstract Type of the hash table.
337 @param name Name of the hash table [symbol]
338 */
339 #define khash_t(name) kh_##name##_t
340
341 /*! @function
342 @abstract Initiate a hash table.
343 @param name Name of the hash table [symbol]
344 @return Pointer to the hash table [khash_t(name)*]
345 */
346 #define kh_init(name) kh_init_##name()
347
348 /*! @function
349 @abstract Destroy a hash table.
350 @param name Name of the hash table [symbol]
351 @param h Pointer to the hash table [khash_t(name)*]
352 */
353 #define kh_destroy(name, h) kh_destroy_##name(h)
354
355 /*! @function
356 @abstract Reset a hash table without deallocating memory.
357 @param name Name of the hash table [symbol]
358 @param h Pointer to the hash table [khash_t(name)*]
359 */
360 #define kh_clear(name, h) kh_clear_##name(h)
361
362 /*! @function
363 @abstract Resize a hash table.
364 @param name Name of the hash table [symbol]
365 @param h Pointer to the hash table [khash_t(name)*]
366 @param s New size [khint_t]
367 */
368 #define kh_resize(name, h, s) kh_resize_##name(h, s)
369
370 /*! @function
371 @abstract Insert a key to the hash table.
372 @param name Name of the hash table [symbol]
373 @param h Pointer to the hash table [khash_t(name)*]
374 @param k Key [type of keys]
375 @param r Extra return code: 0 if the key is present in the hash table;
376 1 if the bucket is empty (never used); 2 if the element in
377 the bucket has been deleted [int*]
378 @return Iterator to the inserted element [khint_t]
379 */
380 #define kh_put(name, h, k, r) kh_put_##name(h, k, r)
381
382 /*! @function
383 @abstract Retrieve a key from the hash table.
384 @param name Name of the hash table [symbol]
385 @param h Pointer to the hash table [khash_t(name)*]
386 @param k Key [type of keys]
387 @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
388 */
389 #define kh_get(name, h, k) kh_get_##name(h, k)
390
391 /*! @function
392 @abstract Remove a key from the hash table.
393 @param name Name of the hash table [symbol]
394 @param h Pointer to the hash table [khash_t(name)*]
395 @param k Iterator to the element to be deleted [khint_t]
396 */
397 #define kh_del(name, h, k) kh_del_##name(h, k)
398
399
400 /*! @function
401 @abstract Test whether a bucket contains data.
402 @param h Pointer to the hash table [khash_t(name)*]
403 @param x Iterator to the bucket [khint_t]
404 @return 1 if containing data; 0 otherwise [int]
405 */
406 #define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
407
408 /*! @function
409 @abstract Get key given an iterator
410 @param h Pointer to the hash table [khash_t(name)*]
411 @param x Iterator to the bucket [khint_t]
412 @return Key [type of keys]
413 */
414 #define kh_key(h, x) ((h)->keys[x])
415
416 /*! @function
417 @abstract Get value given an iterator
418 @param h Pointer to the hash table [khash_t(name)*]
419 @param x Iterator to the bucket [khint_t]
420 @return Value [type of values]
421 @discussion For hash sets, calling this results in segfault.
422 */
423 #define kh_val(h, x) ((h)->vals[x])
424
425 /*! @function
426 @abstract Alias of kh_val()
427 */
428 #define kh_value(h, x) ((h)->vals[x])
429
430 /*! @function
431 @abstract Get the start iterator
432 @param h Pointer to the hash table [khash_t(name)*]
433 @return The start iterator [khint_t]
434 */
435 #define kh_begin(h) (khint_t)(0)
436
437 /*! @function
438 @abstract Get the end iterator
439 @param h Pointer to the hash table [khash_t(name)*]
440 @return The end iterator [khint_t]
441 */
442 #define kh_end(h) ((h)->n_buckets)
443
444 /*! @function
445 @abstract Get the number of elements in the hash table
446 @param h Pointer to the hash table [khash_t(name)*]
447 @return Number of elements in the hash table [khint_t]
448 */
449 #define kh_size(h) ((h)->size)
450
451 /*! @function
452 @abstract Get the number of buckets in the hash table
453 @param h Pointer to the hash table [khash_t(name)*]
454 @return Number of buckets in the hash table [khint_t]
455 */
456 #define kh_n_buckets(h) ((h)->n_buckets)
457
458 /* More conenient interfaces */
459
460 /*! @function
461 @abstract Instantiate a hash set containing integer keys
462 @param name Name of the hash table [symbol]
463 */
464 #define KHASH_SET_INIT_INT(name) \
465 KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
466
467 /*! @function
468 @abstract Instantiate a hash map containing integer keys
469 @param name Name of the hash table [symbol]
470 @param khval_t Type of values [type]
471 */
472 #define KHASH_MAP_INIT_INT(name, khval_t) \
473 KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
474
475 /*! @function
476 @abstract Instantiate a hash map containing 64-bit integer keys
477 @param name Name of the hash table [symbol]
478 */
479 #define KHASH_SET_INIT_INT64(name) \
480 KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
481
482 /*! @function
483 @abstract Instantiate a hash map containing 64-bit integer keys
484 @param name Name of the hash table [symbol]
485 @param khval_t Type of values [type]
486 */
487 #define KHASH_MAP_INIT_INT64(name, khval_t) \
488 KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
489
490 typedef const char *kh_cstr_t;
491 /*! @function
492 @abstract Instantiate a hash map containing const char* keys
493 @param name Name of the hash table [symbol]
494 */
495 #define KHASH_SET_INIT_STR(name) \
496 KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
497
498 /*! @function
499 @abstract Instantiate a hash map containing const char* keys
500 @param name Name of the hash table [symbol]
501 @param khval_t Type of values [type]
502 */
503 #define KHASH_MAP_INIT_STR(name, khval_t) \
504 KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
505
506 #endif /* __AC_KHASH_H */