comparison srf2fastq/io_lib-1.12.2/io_lib/hash_table.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 #ifdef HAVE_CONFIG_H
2 #include "io_lib_config.h"
3 #endif
4
5 #include <stdio.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include "io_lib/os.h"
9 #include "io_lib/hash_table.h"
10 #include "io_lib/jenkins_lookup3.h"
11
12 /* =========================================================================
13 * TCL's hash function. Basically hash*9 + char.
14 * =========================================================================
15 */
16
17 uint32_t HashTcl(uint8_t *data, int len) {
18 uint32_t hash = 0;
19 int i;
20
21 for (i = 0; i < len; i++) {
22 hash += (hash<<3) + data[i];
23 }
24
25 return hash;
26 }
27
28 /* =========================================================================
29 * Paul Hsieh's hash function
30 * http://www.azillionmonkeys.com/qed/hash.html
31 * =========================================================================
32 */
33
34 #undef get16bits
35 #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
36 || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
37 #define get16bits(d) (*((const uint16_t *) (d)))
38 #endif
39
40 #if !defined (get16bits)
41 #define get16bits(d) ((((const uint8_t *)(d))[1] << 8UL)\
42 +((const uint8_t *)(d))[0])
43 #endif
44
45 uint32_t HashHsieh(uint8_t *data, int len) {
46 uint32_t hash = 0, tmp;
47 int rem;
48
49 if (len <= 0 || data == NULL) return 0;
50
51 rem = len & 3;
52 len >>= 2;
53
54 /* Main loop */
55 for (;len > 0; len--) {
56 hash += get16bits (data);
57 tmp = (get16bits (data+2) << 11) ^ hash;
58 hash = (hash << 16) ^ tmp;
59 data += 2*sizeof (uint16_t);
60 hash += hash >> 11;
61 }
62
63 /* Handle end cases */
64 switch (rem) {
65 case 3: hash += get16bits (data);
66 hash ^= hash << 16;
67 hash ^= data[sizeof (uint16_t)] << 18;
68 hash += hash >> 11;
69 break;
70 case 2: hash += get16bits (data);
71 hash ^= hash << 11;
72 hash += hash >> 17;
73 break;
74 case 1: hash += *data;
75 hash ^= hash << 10;
76 hash += hash >> 1;
77 }
78
79 /* Force "avalanching" of final 127 bits */
80 hash ^= hash << 3;
81 hash += hash >> 5;
82 hash ^= hash << 2;
83 hash += hash >> 15;
84 hash ^= hash << 10;
85
86 return hash;
87 }
88
89 /* =========================================================================
90 * Bob Jenkins' hash function
91 * http://burtleburtle.net/bob/hash/doobs.html
92 *
93 * See jenkins_lookup3.c for a new version of this that has good hash
94 * characteristics for a full 64-bit hash value.
95 * =========================================================================
96 */
97
98 #define hashsize(n) ((uint32_t)1<<(n))
99 #define hashmask(n) (hashsize(n)-1)
100
101 /*
102 --------------------------------------------------------------------
103 mix -- mix 3 32-bit values reversibly.
104 For every delta with one or two bits set, and the deltas of all three
105 high bits or all three low bits, whether the original value of a,b,c
106 is almost all zero or is uniformly distributed,
107 * If mix() is run forward or backward, at least 32 bits in a,b,c
108 have at least 1/4 probability of changing.
109 * If mix() is run forward, every bit of c will change between 1/3 and
110 2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
111 mix() was built out of 36 single-cycle latency instructions in a
112 structure that could supported 2x parallelism, like so:
113 a -= b;
114 a -= c; x = (c>>13);
115 b -= c; a ^= x;
116 b -= a; x = (a<<8);
117 c -= a; b ^= x;
118 c -= b; x = (b>>13);
119 ...
120 Unfortunately, superscalar Pentiums and Sparcs can't take advantage
121 of that parallelism. They've also turned some of those single-cycle
122 latency instructions into multi-cycle latency instructions. Still,
123 this is the fastest good hash I could find. There were about 2^^68
124 to choose from. I only looked at a billion or so.
125 --------------------------------------------------------------------
126 */
127 #define mix(a,b,c) \
128 { \
129 a -= b; a -= c; a ^= (c>>13); \
130 b -= c; b -= a; b ^= (a<<8); \
131 c -= a; c -= b; c ^= (b>>13); \
132 a -= b; a -= c; a ^= (c>>12); \
133 b -= c; b -= a; b ^= (a<<16); \
134 c -= a; c -= b; c ^= (b>>5); \
135 a -= b; a -= c; a ^= (c>>3); \
136 b -= c; b -= a; b ^= (a<<10); \
137 c -= a; c -= b; c ^= (b>>15); \
138 }
139
140 /*
141 --------------------------------------------------------------------
142 hash() -- hash a variable-length key into a 32-bit value
143 k : the key (the unaligned variable-length array of bytes)
144 len : the length of the key, counting by bytes
145 initval : can be any 4-byte value
146 Returns a 32-bit value. Every bit of the key affects every bit of
147 the return value. Every 1-bit and 2-bit delta achieves avalanche.
148 About 6*len+35 instructions.
149
150 The best hash table sizes are powers of 2. There is no need to do
151 mod a prime (mod is sooo slow!). If you need less than 32 bits,
152 use a bitmask. For example, if you need only 10 bits, do
153 h = (h & hashmask(10));
154 In which case, the hash table should have hashsize(10) elements.
155
156 If you are hashing n strings (uint8_t **)k, do it like this:
157 for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
158
159 By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
160 code any way you wish, private, educational, or commercial. It's free.
161
162 See http://burtleburtle.net/bob/hash/evahash.html
163 Use for hash table lookup, or anything where one collision in 2^^32 is
164 acceptable. Do NOT use for cryptographic purposes.
165 --------------------------------------------------------------------
166 */
167
168 uint32_t HashJenkins(uint8_t *k, int length /*, uint32_t initval */)
169 {
170 register uint32_t a,b,c,len;
171
172 /* Set up the internal state */
173 len = length;
174 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
175 c = 0; /* initval; */ /* the previous hash value */
176
177 /*---------------------------------------- handle most of the key */
178 while (len >= 12)
179 {
180 a += (k[0] +((uint32_t)k[1]<<8) +((uint32_t)k[2]<<16) +((uint32_t)k[3]<<24));
181 b += (k[4] +((uint32_t)k[5]<<8) +((uint32_t)k[6]<<16) +((uint32_t)k[7]<<24));
182 c += (k[8] +((uint32_t)k[9]<<8) +((uint32_t)k[10]<<16)+((uint32_t)k[11]<<24));
183 mix(a,b,c);
184 k += 12; len -= 12;
185 }
186
187 /*------------------------------------- handle the last 11 bytes */
188 c += length;
189 switch(len) /* all the case statements fall through */
190 {
191 case 11: c+=((uint32_t)k[10]<<24);
192 case 10: c+=((uint32_t)k[9]<<16);
193 case 9 : c+=((uint32_t)k[8]<<8);
194 /* the first byte of c is reserved for the length */
195 case 8 : b+=((uint32_t)k[7]<<24);
196 case 7 : b+=((uint32_t)k[6]<<16);
197 case 6 : b+=((uint32_t)k[5]<<8);
198 case 5 : b+=k[4];
199 case 4 : a+=((uint32_t)k[3]<<24);
200 case 3 : a+=((uint32_t)k[2]<<16);
201 case 2 : a+=((uint32_t)k[1]<<8);
202 case 1 : a+=k[0];
203 /* case 0: nothing left to add */
204 }
205 mix(a,b,c);
206 /*-------------------------------------------- report the result */
207 return c;
208 }
209
210 /*
211 * An interface to the above hash functions.
212 * Returns:
213 * A 32-bit hash key, suitable for masking down to smaller bit sizes
214 */
215 uint32_t hash(int func, uint8_t *key, int key_len) {
216 switch (func) {
217 case HASH_FUNC_HSIEH:
218 return HashHsieh(key, key_len);
219
220 case HASH_FUNC_TCL:
221 return HashTcl(key, key_len);
222
223 case HASH_FUNC_JENKINS:
224 return HashJenkins(key, key_len);
225
226 case HASH_FUNC_JENKINS3:
227 {
228 uint32_t pc = 0, pb = 0;
229 HashJenkins3(key, key_len, &pc, &pb);
230 return pc;
231 }
232 }
233
234 return 0;
235 }
236
237 /*
238 * As per hash() above but returns a 64-bit key. For 32-bit hash functions
239 * this is simply a duplication of the 32-bit value.
240 */
241 uint64_t hash64(int func, uint8_t *key, int key_len) {
242 uint32_t pc = 0, pb = 0;
243
244 switch (func) {
245 case HASH_FUNC_HSIEH:
246 pb = pc = HashHsieh(key, key_len);
247 break;
248
249 case HASH_FUNC_TCL:
250 pb = pc = HashTcl(key, key_len);
251 break;
252
253 case HASH_FUNC_JENKINS:
254 pb = pc = HashJenkins(key, key_len);
255 break;
256
257 case HASH_FUNC_JENKINS3:
258 HashJenkins3(key, key_len, &pc, &pb);
259 break;
260 }
261
262 return pc + (((uint64_t)pb)<<32);
263 }
264
265 /* =========================================================================
266 * Hash Table handling code
267 * =========================================================================
268 */
269
270 /* Multiplicative factors indicating when to grow or shrink the hash table */
271 #define HASH_TABLE_RESIZE 3
272
273 /*
274 * Creates a HashItem for use with HashTable h.
275 *
276 * Returns:
277 * A pointer to new HashItem on success
278 * NULL on failure.
279 */
280 static HashItem *HashItemCreate(HashTable *h) {
281 HashItem *hi;
282
283 hi = (h->options & HASH_POOL_ITEMS
284 ? pool_alloc(h->hi_pool) : malloc(sizeof(*hi)));
285 if (NULL == hi) return NULL;
286
287 hi->data.p = NULL;
288 hi->data.i = 0;
289 hi->next = NULL;
290 hi->key = NULL;
291 hi->key_len = 0;
292
293 h->nused++;
294
295 return hi;
296 }
297
298 /*
299 * Deallocates a HashItem created via HashItemCreate.
300 *
301 * This function will not remove the item from the HashTable so be sure to
302 * call HashTableDel() first if appropriate.
303 */
304 static void HashItemDestroy(HashTable *h, HashItem *hi, int deallocate_data) {
305 if (!hi) return;
306
307 if (!(h->options & HASH_NONVOLATILE_KEYS) || (h->options & HASH_OWN_KEYS))
308 if (hi->key)
309 free(hi->key);
310
311 if (deallocate_data && hi->data.p)
312 free(hi->data.p);
313
314 if (h->options & HASH_POOL_ITEMS) {
315 pool_free(h->hi_pool, hi);
316 } else {
317 free(hi);
318 }
319
320 h->nused--;
321 }
322
323 /*
324 * Creates a new HashTable object. Size will be rounded up to the next
325 * power of 2. It is a starting point and hash tables may be grown or shrunk
326 * as needed (if HASH_DYNAMIC_SIZE is used).
327 *
328 * If HASH_POOL_ITEMS is used, HashItems will be allocated in blocks to reduce
329 * malloc overhead in the case where a large number of items is required.
330 * HashItems allocated this way will be put on a free list when destroyed; the
331 * memory will only be reclaimed when the entire hash table is destroyed.
332 *
333 * Options are as defined in the header file (see HASH_* macros).
334 *
335 * Returns:
336 * A pointer to a HashTable on success
337 * NULL on failure
338 */
339 HashTable *HashTableCreate(int size, int options) {
340 HashTable *h;
341 int i, bits;
342 uint32_t mask;
343
344 if (!(h = (HashTable *)malloc(sizeof(*h))))
345 return NULL;
346
347 if (options & HASH_POOL_ITEMS) {
348 h->hi_pool = pool_create(sizeof(HashItem));
349 if (NULL == h->hi_pool) {
350 free(h);
351 return NULL;
352 }
353 } else {
354 h->hi_pool = NULL;
355 }
356
357 if (size < 4)
358 size = 4; /* an inconsequential minimum size */
359
360 /* Round the requested size to the next power of 2 */
361 bits = 0;
362 size--;
363 while (size) {
364 size /= 2;
365 bits++;
366 }
367 size = 1<<bits;
368 mask = size-1;
369
370 h->nbuckets = size;
371 h->mask = mask;
372 h->options = options;
373 h->nused = 0;
374 h->bucket = (HashItem **)malloc(sizeof(*h->bucket) * size);
375 if (NULL == h->bucket) {
376 HashTableDestroy(h, 0);
377 return NULL;
378 }
379
380 for (i = 0; i < size; i++) {
381 h->bucket[i] = NULL;
382 }
383
384 return h;
385 }
386
387 /*
388 * Deallocates a HashTable object (created by HashTableCreate).
389 *
390 * The deallocate_data parameter is a boolean to indicate whether the
391 * data attached to the hash table should also be free()d. DO NOT USE
392 * this if the HashData attached was not a pointer allocated using
393 * malloc().
394 */
395 void HashTableDestroy(HashTable *h, int deallocate_data) {
396 int i;
397
398 if (!h)
399 return;
400
401 if (h->bucket) {
402 for (i = 0; i < h->nbuckets; i++) {
403 HashItem *hi = h->bucket[i], *next = NULL;
404 for (hi = h->bucket[i]; hi; hi = next) {
405 next = hi->next;
406 HashItemDestroy(h, hi, deallocate_data);
407 }
408 }
409
410 free(h->bucket);
411 }
412
413 if (h->hi_pool) pool_destroy(h->hi_pool);
414
415 free(h);
416 }
417
418 /*
419 * Resizes a HashTable to have 'newsize' buckets.
420 * This is called automatically when adding or removing items so that the
421 * hash table keeps at a sensible scale.
422 *
423 * FIXME: Halving the size of the hash table is simply a matter of coaelescing
424 * every other bucket. Instead we currently rehash (which is slower).
425 * Doubling the size of the hash table currently requires rehashing, but this
426 * too could be optimised by storing the full 32-bit hash of the key along
427 * with the key itself. This then means that it's just a matter of seeing what
428 * the next significant bit is. It's a memory vs speed tradeoff though and
429 * re-hashing is pretty quick.
430 *
431 * Returns 0 for success
432 * -1 for failure
433 */
434 int HashTableResize(HashTable *h, int newsize) {
435 HashTable *h2;
436 int i;
437
438 /* fprintf(stderr, "Resizing to %d\n", newsize); */
439
440 /* Create a new hash table and rehash everything into it */
441 h2 = HashTableCreate(newsize, h->options);
442
443 for (i = 0; i < h->nbuckets; i++) {
444 HashItem *hi, *next;
445 for (hi = h->bucket[i]; hi; hi = next) {
446 uint64_t hv = hash64(h2->options & HASH_FUNC_MASK,
447 (uint8_t *)hi->key, hi->key_len) & h2->mask;
448 next = hi->next;
449 hi->next = h2->bucket[hv];
450 h2->bucket[hv] = hi;
451 }
452 }
453
454 /* Swap the links over & free */
455 free(h->bucket);
456 h->bucket = h2->bucket;
457 h->nbuckets = h2->nbuckets;
458 h->mask = h2->mask;
459 free(h2);
460
461 return 0;
462 }
463
464 /*
465 * Adds a HashData item to HashTable h with a specific key. Key can be binary
466 * data, but if key_len is passed as zero then strlen() will be used to
467 * determine the key length.
468 *
469 * The "new" pointer may be passed as NULL. When not NULL it is filled out
470 * as a boolean to indicate whether the key is already in this hash table.
471 *
472 * The HASH_ALLOW_DUP_KEYS option (specified when using HashTableCreate)
473 * will allow duplicate keys to be stored, and hence *new is also zero.
474 * By default duplicate keys are disallowed.
475 *
476 * Keys are considered to be volatile memory (ie temporary storage) and so the
477 * hash table takes separate copies of them. To avoid this use the
478 * HASH_NONVOLATILE_KEYS option.
479 *
480 * If the HASH_OWN_KEYS option was specified when creating the table then
481 * keys will be considered to be owned by the hash table. In this case
482 * the key will be freed when the table is destroyed regardless of
483 * whether the HASH_NONVOLATILE_KEYS option was used to allocate its
484 * own private copy.
485 *
486 * Returns:
487 * The HashItem created (or matching if a duplicate) on success
488 * NULL on failure
489 */
490 HashItem *HashTableAdd(HashTable *h, char *key, int key_len, HashData data,
491 int *new) {
492 uint64_t hv;
493 HashItem *hi;
494
495 if (!key_len)
496 key_len = strlen(key);
497
498 hv = hash64(h->options & HASH_FUNC_MASK, (uint8_t *)key, key_len)
499 & h->mask;
500
501 /* Already exists? */
502 if (!(h->options & HASH_ALLOW_DUP_KEYS)) {
503 for (hi = h->bucket[hv]; hi; hi = hi->next) {
504 if (key_len == hi->key_len &&
505 memcmp(key, hi->key, key_len) == 0) {
506 if (new) *new = 0;
507 return hi;
508 }
509 }
510 }
511
512 /* No, so create a new one and link it in */
513 if (NULL == (hi = HashItemCreate(h)))
514 return NULL;
515
516 if (h->options & HASH_NONVOLATILE_KEYS)
517 hi->key = key;
518 else {
519 hi->key = (char *)malloc(key_len+1);
520 memcpy(hi->key, key, key_len);
521 hi->key[key_len] = 0; /* null terminate incase others print keys */
522 }
523 hi->key_len = key_len;
524 hi->data = data;
525 hi->next = h->bucket[hv];
526 h->bucket[hv] = hi;
527
528 if ((h->options & HASH_DYNAMIC_SIZE) &&
529 h->nused > HASH_TABLE_RESIZE * h->nbuckets)
530 HashTableResize(h, h->nbuckets*4);
531
532 if (new) *new = 1;
533
534 return hi;
535 }
536
537
538 /*
539 * Removes a specified HashItem from the HashTable. (To perform this it needs
540 * to rehash based on the hash key as hash_item only has a next pointer and
541 * not a previous pointer.)
542 *
543 * The HashItem itself is also destroyed (by an internal call to
544 * HashItemDestroy). The deallocate_data parameter controls whether the data
545 * associated with the HashItem should also be free()d.
546 *
547 * See also the HashTableRemove() function to remove by key instead of
548 * HashItem.
549 *
550 * Returns 0 on success
551 * -1 on failure (eg HashItem not in the HashTable);
552 */
553 int HashTableDel(HashTable *h, HashItem *hi, int deallocate_data) {
554 uint64_t hv;
555 HashItem *next, *last;
556
557 hv = hash64(h->options & HASH_FUNC_MASK,
558 (uint8_t *)hi->key, hi->key_len) & h->mask;
559
560 for (last = NULL, next = h->bucket[hv]; next;
561 last = next, next = next->next) {
562 if (next == hi) {
563 /* Link last to next->next */
564 if (last)
565 last->next = next->next;
566 else
567 h->bucket[hv] = next->next;
568
569 HashItemDestroy(h, hi, deallocate_data);
570
571 return 0;
572 }
573 }
574
575 return -1;
576 }
577
578
579 /*
580 * Searches the HashTable for the data registered with 'key' and removes
581 * these items from the HashTable. In essence this is a combination of
582 * HashTableSearch and HashTableDel functions.
583 *
584 * If HASH_ALLOW_DUP_KEYS is used this will remove all items matching 'key',
585 * otherwise just a single item will be removed.
586 *
587 * If 'deallocate_data' is true the data associated with the HashItem will
588 * be free()d.
589 *
590 * Returns
591 * 0 on success (at least one item found)
592 * -1 on failure (no items found).
593 */
594 int HashTableRemove(HashTable *h, char *key, int key_len,
595 int deallocate_data) {
596 uint64_t hv;
597 HashItem *last, *next, *hi;
598 int retval = -1;
599
600 if (!key_len)
601 key_len = strlen(key);
602
603 hv = hash64(h->options & HASH_FUNC_MASK, (uint8_t *)key, key_len)
604 & h->mask;
605
606 last = NULL;
607 next = h->bucket[hv];
608
609 while (next) {
610 hi = next;
611 if (key_len == hi->key_len &&
612 memcmp(key, hi->key, key_len) == 0) {
613 /* An item to remove, adjust links and destroy */
614 if (last)
615 last->next = hi->next;
616 else
617 h->bucket[hv] = hi->next;
618
619 next = hi->next;
620 HashItemDestroy(h, hi, deallocate_data);
621
622 retval = 0;
623 if (!(h->options & HASH_ALLOW_DUP_KEYS))
624 break;
625
626 } else {
627 /* We only update last when it's something we haven't destroyed */
628 last = hi;
629 next = hi->next;
630 }
631 }
632
633 return retval;
634 }
635
636 /*
637 * Searches the HashTable for the data registered with 'key'.
638 * If HASH_ALLOW_DUP_KEYS is used this will just be the first one found.
639 * You will then need to use HashTableNext to iterate through the matches.
640 *
641 * Returns
642 * HashItem if found
643 * NULL if not found
644 */
645 HashItem *HashTableSearch(HashTable *h, char *key, int key_len) {
646 uint64_t hv;
647 HashItem *hi;
648
649 if (!key_len)
650 key_len = strlen(key);
651
652 hv = hash64(h->options & HASH_FUNC_MASK, (uint8_t *)key, key_len)
653 & h->mask;
654 for (hi = h->bucket[hv]; hi; hi = hi->next) {
655 if (key_len == hi->key_len &&
656 memcmp(key, hi->key, key_len) == 0)
657 return hi;
658 }
659
660 return NULL;
661 }
662
663 /*
664 * Find the next HashItem (starting from 'hi') to also match this key.
665 * This is only valid when the HASH_ALLOW_DUP_KEYS is in use.
666 *
667 * Returns
668 * HashItem if found
669 * NULL if not found
670 */
671 HashItem *HashTableNext(HashItem *hi, char *key, int key_len) {
672 if (!hi)
673 return NULL;
674
675 for (hi = hi->next; hi; hi = hi->next) {
676 if (key_len == hi->key_len &&
677 memcmp(key, hi->key, key_len) == 0)
678 return hi;
679 }
680
681 return NULL;
682 }
683
684 /*
685 * Dumps a textual represenation of the hash table to stdout.
686 */
687 void HashTableDump(HashTable *h, FILE *fp) {
688 int i;
689 for (i = 0; i < h->nbuckets; i++) {
690 HashItem *hi;
691 for (hi = h->bucket[i]; hi; hi = hi->next) {
692 fprintf(fp, "%.*s\n", hi->key_len, hi->key);
693 }
694 }
695 }
696
697 /*
698 * Produces some simple statistics on the hash table population.
699 */
700 void HashTableStats(HashTable *h, FILE *fp) {
701 int i;
702 double avg = (double)h->nused / h->nbuckets;
703 double var = 0;
704 int maxlen = 0;
705 int filled = 0;
706 int clen[51];
707
708 for (i = 0; i <= 50; i++)
709 clen[i] = 0;
710
711 for (i = 0; i < h->nbuckets; i++) {
712 int len = 0;
713 HashItem *hi;
714 for (hi = h->bucket[i]; hi; hi = hi->next) {
715 len++;
716 }
717 if (len > 0) {
718 filled++;
719 if (len > maxlen)
720 maxlen = len;
721 }
722 clen[len <= 50 ? len : 50]++;
723 var += (len-avg) * (len-avg);
724 }
725 var /= h->nbuckets;
726 /* sd = sqrt(var); */
727
728 fprintf(fp, "Nbuckets = %d\n", h->nbuckets);
729 fprintf(fp, "Nused = %d\n", h->nused);
730 fprintf(fp, "Avg chain = %f\n", avg);
731 fprintf(fp, "Chain var.= %f\n", var);
732 fprintf(fp, "%%age full = %f\n", (100.0*filled)/h->nbuckets);
733 fprintf(fp, "max len = %d\n", maxlen);
734 for (i = 0; i <= maxlen; i++) {
735 fprintf(fp, "Chain %2d = %d\n", i, clen[i]);
736 }
737 }
738
739 /*
740 * --------------------------------------------------------------------
741 * Below we have a specialisation of the HashTable code where the data
742 * attached to the hash table is a position,size pair. This allows for the
743 * hash table to encode positions and sizes of items within a file archive.
744 * --------------------------------------------------------------------
745 */
746
747 /*
748 * Writes the HashTable structures to 'fp'.
749 * This is a specialisation of the HashTable where the HashData is a
750 * position,size tuple.
751 *
752 * This consists of the following format:
753 * Header:
754 * ".hsh" (magic numebr)
755 * x4 (1-bytes of version code, eg "1.00")
756 * x1 (HASH_FUNC_? function used)
757 * x1 (number of file headers: FH. These count from 1 to FH inclusive)
758 * x1 (number of file footers: FF. These count from 1 to FF inclusive)
759 * x1 (reserved - zero for now)
760 * x4 (4-bytes big-endian; number of hash buckets)
761 * x8 (offset to add to item positions. eg size of this index)
762 * x4 (4-bytes big-endian; number of bytes in hash file, inc. header)
763 * Archive name:
764 * x1 (length, zero => no name)
765 * ? (archive filename)
766 * File headers (FH copies of):
767 * x8 (position)
768 * x4 (size)
769 * File footers (FH copies of):
770 * x8 (position)
771 * x4 (size)
772 * Buckets (multiples of)
773 * x4 (4-byte offset of linked list pos, rel. to the start of the hdr)
774 * Items (per bucket chain, not written if Bucket[?]==0)
775 * x1 (key length, zero => end of chain)
776 * ? (key)
777 * x0.5 (File header to use. zero => none) top 4 bits
778 * x0.5 (File footer to use. zero => none) bottom 4 bits
779 * x8 (position)
780 * x4 (size)
781 * ... arbitrary gap (but likely none)
782 * Index footer:
783 * ".hsh" (magic number)
784 * x8 (offset to Hash Header. +ve = absolute, -ve = relative to end)
785 *
786 * It is designed such that on-disk querying of the hash table can be done
787 * purely by forward seeks. (This is generally faster due to pre-fetching of
788 * the subsequent blocks by many disk controllers.)
789 *
790 * Returns: the number of bytes written on success
791 * -1 for error
792 */
793 uint64_t HashFileSave(HashFile *hf, FILE *fp, int64_t offset) {
794 int i;
795 HashItem *hi;
796 uint32_t *bucket_pos;
797 uint64_t hfsize = 0, be_hfsize;
798 HashTable *h = hf->h;
799 HashFileFooter foot;
800
801 /* Compute the coordinates of the hash items */
802 hfsize = HHSIZE; /* header */
803 hfsize += 1 + (hf->archive
804 ? strlen(hf->archive)
805 : 0); /* filename */
806 hfsize += h->nbuckets * 4; /* buckets */
807 for (i = 0; i < hf->nheaders; i++) /* headers */
808 hfsize += 12;
809 for (i = 0; i < hf->nfooters; i++) /* footers */
810 hfsize += 12;
811 bucket_pos = (uint32_t *)calloc(h->nbuckets, sizeof(uint32_t));
812 for (i = 0; i < h->nbuckets; i++) {
813 bucket_pos[i] = hfsize;
814
815 if (!(hi = h->bucket[i]))
816 continue;
817 for (; hi; hi = hi->next) {
818 hfsize += 1 + 1 + hi->key_len + 8 + 4; /* keys, pos, size */
819 }
820 hfsize++; /* list-end marker */
821 }
822 hfsize += sizeof(foot);
823
824 /* Write the header: */
825 memcpy(hf->hh.magic, HASHFILE_MAGIC, 4);
826 memcpy(hf->hh.vers, HASHFILE_VERSION, 4);
827 hf->hh.hfunc = h->options & HASH_FUNC_MASK;
828 hf->hh.nheaders = hf->nheaders;
829 hf->hh.nfooters = hf->nfooters;
830 hf->hh.nbuckets = be_int4(h->nbuckets);
831 hf->hh.offset = offset == HASHFILE_PREPEND
832 ? be_int8(hfsize) /* archive will be append to this file */
833 : be_int8(offset);
834 hf->hh.size = be_int4(hfsize);
835 fwrite(&hf->hh, HHSIZE, 1, fp);
836
837 /* Write the archive filename, if known */
838 if (hf->archive && *hf->archive) {
839 fputc(strlen(hf->archive), fp);
840 fputs(hf->archive, fp);
841 } else {
842 fputc(0, fp);
843 }
844
845 /* Write out the headers and footers */
846 for (i = 0; i < hf->nheaders; i++) {
847 HashFileSection hs;
848 hs.pos = be_int8(hf->headers[i].pos);
849 fwrite(&hs.pos, 8, 1, fp);
850 hs.size = be_int4(hf->headers[i].size);
851 fwrite(&hs.size, 4, 1, fp);
852 }
853
854 for (i = 0; i < hf->nfooters; i++) {
855 HashFileSection hs;
856 hs.pos = be_int8(hf->footers[i].pos);
857 fwrite(&hs.pos, 8, 1, fp);
858 hs.size = be_int4(hf->footers[i].size);
859 fwrite(&hs.size, 4, 1, fp);
860 }
861
862 /* Write out hash buckets */
863 for (i = 0; i < h->nbuckets; i++) {
864 uint32_t zero = 0;
865 uint32_t be32;
866
867 if (!(hi = h->bucket[i])) {
868 fwrite(&zero, 4, 1, fp);
869 continue;
870 }
871
872 be32 = be_int4(bucket_pos[i]);
873 fwrite(&be32, 4, 1, fp);
874 }
875 free(bucket_pos);
876
877 /*
878 * Write the hash_item linked lists. The first item is the
879 * hash key length. We append a zero to the end of the list so we
880 * can check this key length to determine the end of this hash
881 * item list.
882 */
883 for (i = 0; i < h->nbuckets; i++) {
884 if (!(hi = h->bucket[i]))
885 continue;
886 for (; hi; hi = hi->next) {
887 uint64_t be64;
888 uint32_t be32;
889 HashFileItem *hfi = (HashFileItem *)hi->data.p;
890 unsigned char headfoot = 0;
891
892 fprintf(fp, "%c%.*s", hi->key_len,
893 hi->key_len, hi->key);
894 headfoot = (((hfi->header) & 0xf) << 4) | ((hfi->footer) & 0xf);
895 fwrite(&headfoot, 1, 1, fp);
896 be64 = be_int8(hfi->pos);
897 fwrite(&be64, 8, 1, fp);
898 be32 = be_int4(hfi->size);
899 fwrite(&be32, 4, 1, fp);
900 }
901 fputc(0, fp);
902 }
903
904 /* Finally write the footer referencing back to the header start */
905 memcpy(foot.magic, HASHFILE_MAGIC, 4);
906 be_hfsize = be_int8(-hfsize);
907 memcpy(foot.offset, &be_hfsize, 8);
908 fwrite(&foot, sizeof(foot), 1, fp);
909
910 return hfsize;
911 }
912
913 /*
914 * Reads an entire HashTable from fp.
915 *
916 * Returns:
917 * A filled out HashTable pointer on success
918 * NULL on failure
919 */
920 HashFile *HashFileLoad_old(FILE *fp) {
921 int i;
922 HashTable *h;
923 HashItem *hi;
924 HashFile *hf;
925 uint32_t *bucket_pos;
926 unsigned char *htable;
927 int htable_pos;
928 int fnamelen;
929
930 if (NULL == (hf = (HashFile *)calloc(1, sizeof(HashFile))))
931 return NULL;
932 if (NULL == (htable = (unsigned char *)malloc(HHSIZE)))
933 return NULL;
934
935 /* Read and create the hash table header */
936 if (HHSIZE != fread(htable, 1, HHSIZE, fp))
937 return NULL;
938 memcpy(&hf->hh, htable, HHSIZE);
939 hf->hh.nbuckets = be_int4(hf->hh.nbuckets);
940 hf->hh.offset = be_int8(hf->hh.offset);
941 hf->hh.size = be_int4(hf->hh.size);
942 hf->h = h = HashTableCreate(hf->hh.nbuckets, hf->hh.hfunc);
943 bucket_pos = (uint32_t *)calloc(h->nbuckets, sizeof(uint32_t));
944
945 /* Load the archive filename */
946 fnamelen = fgetc(fp);
947 if (fnamelen) {
948 hf->archive = (char *)malloc(fnamelen+1);
949 fread(hf->archive, 1, fnamelen, fp);
950 hf->archive[fnamelen] = 0;
951 if (hf->archive[0] == 0) {
952 free(hf->archive);
953 hf->archive = NULL;
954 }
955 }
956
957 /* Load the rest of the hash table to memory */
958 htable_pos = HHSIZE + fnamelen + 1;
959 if (NULL == (htable = (unsigned char *)realloc(htable, hf->hh.size)))
960 return NULL;
961 if (hf->hh.size-htable_pos !=
962 fread(&htable[htable_pos], 1, hf->hh.size-htable_pos, fp))
963 return NULL;
964
965 /* Read the header / footer items */
966 for (i = 0; i < hf->hh.nheaders; i++)
967 htable_pos += 8; /* skip them for now */
968 for (i = 0; i < hf->hh.nfooters; i++)
969 htable_pos += 8; /* skip them for now */
970
971 /* Identify the "bucket pos". Detemines which buckets have data */
972 for (i = 0; i < h->nbuckets; i++) {
973 memcpy(&bucket_pos[i], &htable[htable_pos], 4);
974 bucket_pos[i] = be_int4(bucket_pos[i]);
975 htable_pos += 4;
976 }
977
978 /* Read the hash table items */
979 for (i = 0; i < h->nbuckets; i++) {
980 if (!bucket_pos[i])
981 continue;
982 for (;;) {
983 int c;
984 unsigned char uc;
985 char key[256];
986 uint64_t pos;
987 uint32_t size;
988 HashFileItem *hfi;
989
990 c = htable[htable_pos++];
991 if (c == EOF || !c)
992 break;
993
994 /* key */
995 memcpy(key, &htable[htable_pos], c);
996 htable_pos += c;
997
998 /* header/footer */
999 uc = htable[htable_pos++];
1000 hfi = (HashFileItem *)malloc(sizeof(*hfi));
1001 hfi->header = (uc >> 4) & 0xf;
1002 hfi->footer = uc & 0xf;
1003
1004 /* pos */
1005 memcpy(&pos, &htable[htable_pos], 8);
1006 htable_pos += 8;
1007 hfi->pos = be_int8(pos) + hf->hh.offset;
1008
1009 /* size */
1010 memcpy(&size, &htable[htable_pos], 4);
1011 htable_pos += 4;
1012 hfi->size = be_int4(size);
1013
1014 /* Add to the hash table */
1015 hi = HashItemCreate(h);
1016 hi->next = h->bucket[i];
1017 h->bucket[i] = hi;
1018 hi->key_len = c;
1019 hi->key = (char *)malloc(c+1);
1020 memcpy(hi->key, key, c);
1021 hi->key[c] = 0; /* For debugging convenience only */
1022 hi->data.p = hfi;
1023 }
1024 }
1025
1026 fprintf(stderr, "done\n");
1027 fflush(stderr);
1028 free(bucket_pos);
1029
1030 return hf;
1031 }
1032
1033 /*
1034 * Opens a stored hash table file. It also internally keeps an open file to
1035 * hash and the archive files.
1036 *
1037 * Returns the HashFile pointer on success
1038 * NULL on failure
1039 */
1040 HashFile *HashFileFopen(FILE *fp) {
1041 HashFile *hf = HashFileCreate(0, 0);
1042 int archive_len;
1043 int i;
1044
1045 /* Set the stdio buffer to be small to avoid massive I/O wastage */
1046
1047 /* Read the header */
1048 hf->hfp = fp;
1049 hf->afp = fp;
1050 hf->hf_start = ftello(hf->hfp);
1051
1052 if (HHSIZE != fread(&hf->hh, 1, HHSIZE, hf->hfp)) {
1053 HashFileDestroy(hf);
1054 return NULL;
1055 }
1056 if (memcmp(HASHFILE_MAGIC, &hf->hh, 4) != 0) {
1057 HashFileFooter foot;
1058 int64_t offset;
1059
1060 /* Invalid magic number, try other end of file! */
1061 fseeko(hf->hfp, -(off_t)sizeof(HashFileFooter), SEEK_END);
1062 if (sizeof(foot) != fread(&foot, 1, sizeof(foot), hf->hfp)) {
1063 HashFileDestroy(hf);
1064 return NULL;
1065 }
1066 if (memcmp(HASHFILE_MAGIC, &foot.magic, 4) != 0) {
1067 HashFileDestroy(hf);
1068 return NULL;
1069 }
1070 memcpy(&offset, foot.offset, 8);
1071 offset = be_int8(offset);
1072 fseeko(hf->hfp, offset, SEEK_CUR);
1073 hf->hf_start = ftello(hf->hfp);
1074 if (HHSIZE != fread(&hf->hh, 1, HHSIZE, hf->hfp)) {
1075 HashFileDestroy(hf);
1076 return NULL;
1077 }
1078 }
1079 if (memcmp(hf->hh.vers, HASHFILE_VERSION, 4) != 0) {
1080 /* incorrect version */
1081 HashFileDestroy(hf);
1082 return NULL;
1083 }
1084
1085 hf->hh.nbuckets = be_int4(hf->hh.nbuckets);
1086 hf->hh.offset = be_int8(hf->hh.offset);
1087 hf->hh.size = be_int4(hf->hh.size);
1088
1089 /* Load the main archive filename */
1090 if ((archive_len = fgetc(hf->hfp))) {
1091 hf->archive = (char *)malloc(archive_len+1);
1092 fread(hf->archive, 1, archive_len, hf->hfp);
1093 hf->archive[archive_len] = 0;
1094 if (hf->archive[0] == 0) {
1095 free(hf->archive);
1096 hf->archive = NULL;
1097 }
1098 }
1099
1100 hf->header_size = HHSIZE + 1 + archive_len +
1101 12 * (hf->hh.nheaders + hf->hh.nfooters);
1102 hf->nheaders = hf->hh.nheaders;
1103 hf->nfooters = hf->hh.nfooters;
1104
1105 /* Load the header and footer locations */
1106 hf->headers = hf->nheaders
1107 ? (HashFileSection *)malloc(hf->nheaders * sizeof(HashFileSection))
1108 : NULL;
1109 for (i = 0; i < hf->nheaders; i++) {
1110 fread(&hf->headers[i].pos, 8, 1, hf->hfp);
1111 fread(&hf->headers[i].size, 4, 1, hf->hfp);
1112 hf->headers[i].pos = be_int8(hf->headers[i].pos) + hf->hh.offset;
1113 hf->headers[i].size = be_int4(hf->headers[i].size);
1114 hf->headers[i].cached_data = NULL;
1115 }
1116
1117 hf->footers = hf->nfooters
1118 ? (HashFileSection *)malloc(hf->nfooters * sizeof(HashFileSection))
1119 : NULL;
1120 for (i = 0; i < hf->nfooters; i++) {
1121 fread(&hf->footers[i].pos, 8, 1, hf->hfp);
1122 fread(&hf->footers[i].size, 4, 1, hf->hfp);
1123 hf->footers[i].pos = be_int8(hf->footers[i].pos) + hf->hh.offset;
1124 hf->footers[i].size = be_int4(hf->footers[i].size);
1125 hf->footers[i].cached_data = NULL;
1126 }
1127
1128 return hf;
1129 }
1130
1131 HashFile *HashFileOpen(char *fname) {
1132 FILE *fp;
1133 HashFile *hf;
1134
1135 /* Open the hash and read the header */
1136 if (NULL == (fp = fopen(fname, "rb")))
1137 return NULL;
1138
1139 if (!(hf = HashFileFopen(fp)))
1140 return NULL;
1141
1142 /* Open the main archive too */
1143 if (hf->archive) {
1144 if (NULL == (hf->afp = fopen(hf->archive, "rb"))) {
1145 /* Possibly done via a relative pathname (optimal infact) */
1146 char *cp;
1147 char aname[1024];
1148 if (NULL == (cp = strrchr(fname, '/'))) {
1149 HashFileDestroy(hf);
1150 return NULL;
1151 }
1152 sprintf(aname, "%.*s%s", (int)(cp-fname+1), fname, hf->archive);
1153 if (NULL == (hf->afp = fopen(aname, "rb"))) {
1154
1155 return NULL;
1156 }
1157 }
1158 } else {
1159 hf->afp = hf->hfp;
1160 }
1161
1162 return hf;
1163 }
1164
1165 HashFile *HashFileLoad(FILE *fp) {
1166 HashFile *hf;
1167 char *htable;
1168 off_t htable_pos;
1169 int i;
1170 HashItem *hi;
1171 HashTable *h;
1172 uint32_t *bucket_pos;
1173 uint32_t hsize;
1174
1175 /* Open the hash table */
1176 fseeko(fp, 0, SEEK_SET);
1177 if (NULL == (hf = HashFileFopen(fp)))
1178 return NULL;
1179
1180 HashTableDestroy(hf->h, 1);
1181 h = hf->h = HashTableCreate(hf->hh.nbuckets, hf->hh.hfunc);
1182 bucket_pos = (uint32_t *)calloc(h->nbuckets, sizeof(uint32_t));
1183
1184 /* Also load in the entire thing to memory */
1185 htable = (char *)malloc(hf->hh.size);
1186 fseeko(fp, hf->hf_start, SEEK_SET);
1187 hsize = fread(htable, 1, hf->hh.size, fp);
1188 if (hf->hh.size != hsize) {
1189 free(htable);
1190 return NULL;
1191 }
1192
1193 /*
1194 * HashFileOpen has already decoded the headers up to and including the
1195 * individual file header/footer sections, but not the buckets and item
1196 * lists, so we start from there.
1197 */
1198 htable_pos = hf->header_size;
1199
1200 /* Identify the "bucket pos". Detemines which buckets have data */
1201 for (i = 0; i < h->nbuckets; i++) {
1202 memcpy(&bucket_pos[i], &htable[htable_pos], 4);
1203 bucket_pos[i] = be_int4(bucket_pos[i]);
1204 htable_pos += 4;
1205 }
1206
1207 /* Read the hash table items */
1208 for (i = 0; i < h->nbuckets; i++) {
1209 if (!bucket_pos[i])
1210 continue;
1211 for (;;) {
1212 int c;
1213 unsigned char uc;
1214 char key[256];
1215 uint64_t pos;
1216 uint32_t size;
1217 HashFileItem *hfi;
1218
1219 c = htable[htable_pos++];
1220 if (c == EOF || !c)
1221 break;
1222
1223 /* key */
1224 memcpy(key, &htable[htable_pos], c);
1225 htable_pos += c;
1226
1227 /* header/footer */
1228 uc = htable[htable_pos++];
1229 hfi = (HashFileItem *)malloc(sizeof(*hfi));
1230 hfi->header = (uc >> 4) & 0xf;
1231 hfi->footer = uc & 0xf;
1232
1233 /* pos */
1234 memcpy(&pos, &htable[htable_pos], 8);
1235 htable_pos += 8;
1236 hfi->pos = be_int8(pos) + hf->hh.offset;
1237
1238 /* size */
1239 memcpy(&size, &htable[htable_pos], 4);
1240 htable_pos += 4;
1241 hfi->size = be_int4(size);
1242
1243 /* Add to the hash table */
1244 hi = HashItemCreate(h);
1245 hi->next = h->bucket[i];
1246 h->bucket[i] = hi;
1247 hi->key_len = c;
1248 hi->key = (char *)malloc(c+1);
1249 memcpy(hi->key, key, c);
1250 hi->key[c] = 0; /* For debugging convenience only */
1251 hi->data.p = hfi;
1252 }
1253 }
1254
1255 fflush(stderr);
1256 free(bucket_pos);
1257
1258 return hf;
1259 }
1260
1261 /*
1262 * Searches the named HashFile for a specific key.
1263 * When found it returns the position and size of the object in pos and size.
1264 *
1265 * Returns
1266 * 0 on success (pos & size updated)
1267 * -1 on failure
1268 */
1269 int HashFileQuery(HashFile *hf, uint8_t *key, int key_len,
1270 HashFileItem *item) {
1271 uint64_t hval;
1272 uint32_t pos;
1273 int klen;
1274 int cur_offset = 0;
1275
1276 /* Hash 'key' to compute the bucket number */
1277 hval = hash64(hf->hh.hfunc, key, key_len) & (hf->hh.nbuckets-1);
1278
1279 /* Read the bucket to find the first linked list item location */
1280 if (-1 == fseeko(hf->hfp, hf->hf_start + 4*hval + hf->header_size,SEEK_SET))
1281 return -1;
1282 if (4 != fread(&pos, 1, 4, hf->hfp))
1283 return -1;
1284 pos = be_int4(pos);
1285 cur_offset = 4*hval + 4 + hf->header_size;
1286
1287 if (0 == pos)
1288 /* No bucket pos => key not present */
1289 return -1;
1290
1291 /* Jump to the HashItems list and look through for key */
1292 if (-1 == fseeko(hf->hfp, pos - cur_offset, SEEK_CUR))
1293 return -1;
1294
1295 for (klen = fgetc(hf->hfp); klen; klen = fgetc(hf->hfp)) {
1296 char k[256];
1297 unsigned char headfoot;
1298 uint64_t pos;
1299 uint32_t size;
1300
1301 fread(k, klen, 1, hf->hfp);
1302 fread(&headfoot, 1, 1, hf->hfp);
1303 item->header = (headfoot >> 4) & 0xf;
1304 item->footer = headfoot & 0xf;
1305 fread(&pos, 8, 1, hf->hfp);
1306 pos = be_int8(pos) + hf->hh.offset;
1307 fread(&size, 4, 1, hf->hfp);
1308 size = be_int4(size);
1309 if (klen == key_len && 0 == memcmp(key, k, key_len)) {
1310 item->pos = pos;
1311 item->size = size;
1312 return 0;
1313 }
1314 }
1315
1316 return -1;
1317 }
1318
1319 HashFile *HashFileCreate(int size, int options) {
1320 HashFile *hf;
1321
1322 if (NULL == (hf = (HashFile *)calloc(1, sizeof(*hf))))
1323 return NULL;
1324 if (NULL == (hf->h = HashTableCreate(size, options)))
1325 return NULL;
1326
1327 return hf;
1328 }
1329
1330 void HashFileDestroy(HashFile *hf) {
1331 if (!hf)
1332 return;
1333
1334 if (hf->h)
1335 HashTableDestroy(hf->h, 1);
1336 if (hf->archive)
1337 free(hf->archive);
1338 if (hf->headers) {
1339 int i;
1340 for (i = 0; i < hf->nheaders; i++) {
1341 if (hf->headers[i].cached_data)
1342 free(hf->headers[i].cached_data);
1343 }
1344 free(hf->headers);
1345 }
1346 if (hf->footers) {
1347 int i;
1348 for (i = 0; i < hf->nfooters; i++) {
1349 if (hf->footers[i].cached_data)
1350 free(hf->footers[i].cached_data);
1351 }
1352 free(hf->footers);
1353 }
1354
1355 if (hf->afp)
1356 fclose(hf->afp);
1357 if (hf->hfp && hf->hfp != hf->afp)
1358 fclose(hf->hfp);
1359
1360 free(hf);
1361 }
1362
1363 /*
1364 * Extracts the contents for a file out of the HashFile.
1365 */
1366 char *HashFileExtract(HashFile *hf, char *fname, size_t *len) {
1367 HashFileItem hfi;
1368 size_t sz, pos;
1369 char *data;
1370 HashFileSection *head = NULL, *foot = NULL;
1371
1372 /* Find out if and where the item is in the archive */
1373 if (-1 == HashFileQuery(hf, (uint8_t *)fname, strlen(fname), &hfi))
1374 return NULL;
1375
1376 /* Work out the size including header/footer and allocate */
1377 sz = hfi.size;
1378 if (hfi.header) {
1379 head = &hf->headers[hfi.header-1];
1380 sz += head->size;
1381 }
1382 if (hfi.footer) {
1383 foot = &hf->footers[hfi.footer-1];
1384 sz += foot->size;
1385 }
1386 *len = sz;
1387
1388 if (NULL == (data = (char *)malloc(sz+1)))
1389 return NULL;
1390 data[sz] = 0;
1391
1392 /* Header */
1393 pos = 0;
1394 if (head) {
1395 fseeko(hf->afp, head->pos, SEEK_SET);
1396 fread(&data[pos], head->size, 1, hf->afp);
1397 pos += head->size;
1398 }
1399
1400 /* Main file */
1401 fseeko(hf->afp, hfi.pos, SEEK_SET);
1402 fread(&data[pos], hfi.size, 1, hf->afp);
1403 pos += hfi.size;
1404
1405 /* Footer */
1406 if (foot) {
1407 fseeko(hf->afp, foot->pos, SEEK_SET);
1408 fread(&data[pos], foot->size, 1, hf->afp);
1409 pos += foot->size;
1410 }
1411
1412 return data;
1413 }
1414
1415 /*
1416 * Iterates through members of a hash table returning items sequentially.
1417 *
1418 * Returns the next HashItem on success
1419 * NULL on failure.
1420 */
1421 HashItem *HashTableIterNext(HashTable *h, HashIter *iter) {
1422 do {
1423 if (iter->hi == NULL) {
1424 if (++iter->bnum >= h->nbuckets)
1425 break;
1426 iter->hi = h->bucket[iter->bnum];
1427 } else {
1428 iter->hi = iter->hi->next;
1429 }
1430 } while (!iter->hi);
1431
1432 return iter->hi;
1433 }
1434
1435 void HashTableIterReset(HashIter *iter) {
1436 if (iter) {
1437 iter->bnum = -1;
1438 iter->hi = NULL;
1439 }
1440 }
1441
1442 HashIter *HashTableIterCreate(void) {
1443 HashIter *iter = (HashIter *)malloc(sizeof(*iter));
1444
1445 HashTableIterReset(iter);
1446 return iter;
1447 }
1448
1449 void HashTableIterDestroy(HashIter *iter) {
1450 if (iter)
1451 free(iter);
1452 }