comparison srf2fastq/io_lib-1.12.2/io_lib/hash_table.h @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 #ifndef _HASH_TABLE_H_
2 #define _HASH_TABLE_H_
3
4 #include <inttypes.h>
5 #include <sys/types.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8
9 #include "io_lib/pooled_alloc.h"
10
11 #ifdef __cplusplus
12 extern "C" {
13 #endif
14
15 /* The data referenced by the hash table */
16 typedef union {
17 uint64_t i;
18 void *p;
19 } HashData;
20
21 /* A hash item with "next" pointer to use in a linked list */
22 typedef struct HashItemStruct {
23 HashData data; /* user defined data attached to this key */
24 char *key; /* key we hashed on */
25 int key_len; /* and its length */
26 struct HashItemStruct *next;
27 } HashItem;
28
29 /* The main hash table structure itself */
30 typedef struct {
31 int options; /* HASH_FUNC & HASH_OPT macros */
32 uint32_t nbuckets; /* Number of hash buckets; power of 2 */
33 uint32_t mask; /* bit-mask equiv of nbuckets */
34 int nused; /* How many hash entries we're storing */
35 HashItem **bucket; /* The bucket "list heads" themselves */
36 pool_alloc_t *hi_pool; /* Pool of allocated HashItem structs */
37 } HashTable;
38
39 /* An iterator on HashTable items */
40 typedef struct {
41 int bnum;
42 HashItem *hi;
43 } HashIter;
44
45 #define HASHFILE_MAGIC ".hsh"
46 #define HASHFILE_VERSION "1.00"
47 #define HASHFILE_PREPEND -1
48
49 /* File format: the hash table header */
50 typedef struct {
51 char magic[4];
52 char vers[4];
53 char hfunc;
54 unsigned char nheaders;
55 unsigned char nfooters;
56 char reserved; /* 0 */
57 uint32_t nbuckets;
58 int64_t offset;
59 uint32_t size;
60 } HashFileHeader;
61
62 /* sizeof(HashFileHeader) minus terminal padding */
63 #define HHSIZE 28
64
65 typedef struct {
66 char magic[4];
67 char offset[8];
68 } HashFileFooter;
69
70 /* The data block attached to the hash table */
71 typedef struct {
72 uint64_t pos;
73 uint32_t size;
74 unsigned char header; /* zero if not set */
75 unsigned char footer; /* zero if not set */
76 } HashFileItem;
77
78 /* Common headers or footers to prepend to the archive contents */
79 typedef struct {
80 uint64_t pos;
81 uint32_t size;
82 unsigned char *cached_data;
83 } HashFileSection;
84
85 /*
86 * The main structure for the HashFile functions.
87 *
88 * We obtain an existing HashFile by opening a stored hash file or by
89 * loading the entire thing.
90 * New empty ones can be created using HashFileCreate.
91 */
92 typedef struct {
93 HashFileHeader hh; /* on-disk file header */
94 HashTable *h; /* the in-memory hash table */
95 int nheaders; /* number of common file headers */
96 HashFileSection *headers; /* on-disk common file headers struct */
97 int nfooters; /* number of common file footers */
98 HashFileSection *footers; /* on-disk common file footers struct */
99 FILE *hfp; /* hash FILE */
100 FILE *afp; /* archive FILE */
101 char *archive; /* archive filename */
102 int header_size; /* size of header + filename + N(head/feet) */
103 off_t hf_start; /* location of HashFile header in file */
104 } HashFile;
105
106 /* Functions to to use HashTable.options */
107 #define HASH_FUNC_HSIEH 0
108 #define HASH_FUNC_TCL 1
109 #define HASH_FUNC_JENKINS 2
110 #define HASH_FUNC_JENKINS3 3
111 #define HASH_FUNC_MASK 7
112
113 /* Other HashTable.options values */
114 #define HASH_NONVOLATILE_KEYS (1<<3)
115 #define HASH_ALLOW_DUP_KEYS (1<<4)
116 #define HASH_DYNAMIC_SIZE (1<<5)
117 #define HASH_OWN_KEYS (1<<6)
118 #define HASH_POOL_ITEMS (1<<7)
119
120 /* Hashing prototypes */
121 uint32_t hash(int func, uint8_t *key, int key_len);
122 uint64_t hash64(int func, uint8_t *key, int key_len);
123 uint32_t HashJenkins(uint8_t *k, int length);
124 uint32_t HashTcl(uint8_t *data, int len);
125 uint32_t HashHsieh(uint8_t *k, int length);
126
127 /* HashTable management prototypes */
128 HashTable *HashTableCreate(int size, int options);
129 void HashTableDestroy(HashTable *h, int deallocate_date);
130 int HashTableResize(HashTable *h, int newsize);
131 HashItem *HashTableAdd(HashTable *h, char *key, int key_len,
132 HashData data, int *added);
133 int HashTableDel(HashTable *h, HashItem *hi, int deallocate_data);
134 int HashTableRemove(HashTable *h, char *key, int key_len, int deallocate_data);
135 HashItem *HashTableSearch(HashTable *h, char *key, int key_len);
136 HashItem *HashTableNext(HashItem *hi, char *key, int key_len);
137
138 void HashTableStats(HashTable *h, FILE *fp);
139 void HashTableDump(HashTable *h, FILE *fp);
140
141 /* Iterator prototypes */
142 HashIter *HashTableIterCreate(void);
143 void HashTableIterDestroy(HashIter *iter);
144 HashItem *HashTableIterNext(HashTable *h, HashIter *iter);
145 void HashTableIterReset(HashIter *iter);
146
147 /* HashFile prototypes */
148 uint64_t HashFileSave(HashFile *hf, FILE *fp, int64_t offset);
149 HashFile *HashFileLoad(FILE *fp);
150 int HashFileQuery(HashFile *hf, uint8_t *key, int key_len, HashFileItem *item);
151 char *HashFileExtract(HashFile *hf, char *fname, size_t *len);
152
153
154 HashFile *HashFileCreate(int size, int options);
155 void HashFileDestroy(HashFile *hf);
156 HashFile *HashFileOpen(char *fname);
157 HashFile *HashFileFopen(FILE *fp);
158
159 #ifdef __cplusplus
160 }
161 #endif
162
163 #endif /* _HASH_TABLE_H_ */