Mercurial > repos > clustalomega > clustalomega
comparison clustalomega/clustal-omega-0.2.0/src/squid/gsi64.c @ 0:ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author | clustalomega |
---|---|
date | Tue, 07 Jun 2011 17:04:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ff1768533a07 |
---|---|
1 /***************************************************************** | |
2 * SQUID - a library of functions for biological sequence analysis | |
3 * Copyright (C) 1992-2002 Washington University School of Medicine | |
4 * | |
5 * This source code is freely distributed under the terms of the | |
6 * GNU General Public License. See the files COPYRIGHT and LICENSE | |
7 * for details. | |
8 *****************************************************************/ | |
9 #ifdef USE_GSI64 | |
10 | |
11 /* gsi64.c | |
12 * Updated interfaces for GSI64 64-bit "generic sequence index" files. | |
13 * See gsi.c for old interfaces. | |
14 * This is a temporary hack! Needed for human genome project. | |
15 */ | |
16 | |
17 /* 1 + <nfiles> + <nkeys> total records. | |
18 * Each record = 42 bytes. | |
19 * | |
20 * one header record : <"GSI64" (32)> <nfiles (2)> <nkeys (8)> | |
21 * <nfiles> file records : <filename (32)> <fileno (2)> <fmt (8)> | |
22 * <nkeys> key records : <key (32)> <fileno (2)> <offset(8)> | |
23 * | |
24 * CVS $Id: gsi64.c,v 1.2 2000/12/21 23:42:59 eddy Exp) | |
25 */ | |
26 | |
27 #include <stdio.h> | |
28 #include <stdlib.h> | |
29 #include <string.h> | |
30 #ifndef SEEK_SET | |
31 #include <unistd.h> /* needed for poor crippled SunOS */ | |
32 #endif | |
33 | |
34 #include "squid.h" | |
35 #include "gsi64.h" | |
36 | |
37 /***************************************************************** | |
38 * GSI64 index file access routines | |
39 *****************************************************************/ | |
40 | |
41 /* Function: GSI64Open() | |
42 * | |
43 * Purpose: Open a GSI64 file. Returns the number of records in | |
44 * the file and a file pointer. Returns NULL on failure. | |
45 * The file pointer should be fclose()'d normally. | |
46 */ | |
47 GSI64FILE * | |
48 GSI64Open(char *gsifile) | |
49 { | |
50 GSI64FILE *gsi; | |
51 char magic[GSI64_KEYSIZE]; | |
52 | |
53 gsi = (GSI64FILE *) MallocOrDie (sizeof(GSI64FILE)); | |
54 if ((gsi->gsifp = fopen(gsifile, "r")) == NULL) | |
55 { free(gsi); squid_errno = SQERR_NOFILE; return NULL; } | |
56 | |
57 if (! fread(magic, sizeof(char), GSI64_KEYSIZE, gsi->gsifp)) | |
58 { free(gsi); squid_errno = SQERR_NODATA; return NULL; } | |
59 if (strcmp(magic, "GSI64") != 0) | |
60 { free(gsi); squid_errno = SQERR_FORMAT; return NULL; } | |
61 | |
62 if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp)) | |
63 { free(gsi); squid_errno = SQERR_NODATA; return NULL; } | |
64 if (! fread(&(gsi->recnum), sizeof(sqd_uint64), 1, gsi->gsifp)) | |
65 { free(gsi); squid_errno = SQERR_NODATA; return NULL; } | |
66 | |
67 #if 0 /* HACK! we don't byteswap */ | |
68 gsi->nfiles = sre_ntohs(gsi->nfiles); /* convert from network short */ | |
69 gsi->recnum = sre_ntohl(gsi->recnum); /* convert from network long */ | |
70 #endif | |
71 | |
72 return gsi; | |
73 } | |
74 | |
75 /* Function: GSI64GetRecord() | |
76 * | |
77 * Purpose: Each non-header record of a GSI64 index file consists | |
78 * of 42 bytes: 32 bytes of character string, a 2 byte | |
79 * short, and an 8 byte long long. This function returns the | |
80 * three values. | |
81 * | |
82 * Args: gsi - open GSI64 index file, correctly positioned at a record | |
83 * f1 - char[32], allocated by caller (or NULL if unwanted) | |
84 * f2 - pointer to short (or NULL if unwanted) | |
85 * f3 - pointer to long long (or NULL if unwanted) | |
86 * | |
87 * Return: 0 on failure and sets squid_errno. | |
88 */ | |
89 int | |
90 GSI64GetRecord(GSI64FILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint64 *f3) | |
91 { | |
92 if (f1 == NULL) fseek64(gsi->gsifp, GSI64_KEYSIZE, SEEK_CUR); | |
93 else if (! fread(f1, GSI64_KEYSIZE, 1, gsi->gsifp)) | |
94 { squid_errno = SQERR_NODATA; return 0; } | |
95 | |
96 if (f2 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint16), SEEK_CUR); | |
97 else if (! fread(f2, sizeof(sqd_uint16), 1, gsi->gsifp)) | |
98 { squid_errno = SQERR_NODATA; return 0; } | |
99 | |
100 if (f3 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint64), SEEK_CUR); | |
101 else if (! fread(f3, sizeof(sqd_uint64), 1, gsi->gsifp)) | |
102 { squid_errno = SQERR_NODATA; return 0; } | |
103 | |
104 #if 0 /* no byteswap yet! HACK! */ | |
105 if (f2 != NULL) *f2 = sre_ntohs(*f2); | |
106 if (f3 != NULL) *f3 = sre_ntohl(*f3); | |
107 #endif | |
108 | |
109 return 1; | |
110 } | |
111 | |
112 | |
113 /* Function: GSI64GetOffset() | |
114 * | |
115 * Purpose: From a key (sequence name), find a disk offset | |
116 * in an open general sequence index file by binary | |
117 * search. Presumably GSI64 indexing could be even faster | |
118 * if we used hashing. | |
119 * | |
120 * Args: gsi - GSI64 index file, opened by GSI64Open() | |
121 * key - name of key to retrieve indices for | |
122 * ret_seqfile - pre-alloced char[32] array for seqfile name | |
123 * ret_fmt - format of seqfile | |
124 * ret_offset - return: disk offset in seqfile. | |
125 */ | |
126 int | |
127 GSI64GetOffset(GSI64FILE *gsi, char *key, char *ret_seqfile, | |
128 int *ret_format, long long *ret_offset) | |
129 { | |
130 sqd_uint64 left, right, mid; | |
131 int cmp; | |
132 char name[GSI64_KEYSIZE + 1]; | |
133 sqd_uint64 offset; | |
134 sqd_uint16 filenum; | |
135 sqd_uint64 fmt; | |
136 | |
137 name[GSI64_KEYSIZE] = '\0'; | |
138 | |
139 left = gsi->nfiles + 1; | |
140 right = gsi->nfiles + gsi->recnum; | |
141 mid = (left + right) / 2; | |
142 fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET); | |
143 | |
144 while (GSI64GetRecord(gsi, name, &filenum, &offset)) | |
145 { | |
146 cmp = strcmp(name, key); | |
147 if (cmp == 0) break; /* found it! */ | |
148 else if (left >= right) return 0; /* oops, missed it; fail. */ | |
149 else if (cmp < 0) left = mid + 1; /* it's right of mid */ | |
150 else if (cmp > 0) right = mid - 1; /* it's left of mid */ | |
151 mid = (left + right) / 2; | |
152 fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET); | |
153 } | |
154 | |
155 /* Using file number, look up the sequence file and format. | |
156 */ | |
157 fseek64(gsi->gsifp, filenum * GSI64_RECSIZE, SEEK_SET); | |
158 GSI64GetRecord(gsi, ret_seqfile, NULL, &fmt); | |
159 *ret_format = (int) fmt; | |
160 *ret_offset = (long long) offset; | |
161 | |
162 return 1; | |
163 } | |
164 | |
165 /* Function: GSI64Close() | |
166 * | |
167 * Purpose: Close an open GSI64 sequence index file. | |
168 */ | |
169 void | |
170 GSI64Close(GSI64FILE *gsi) | |
171 { | |
172 fclose(gsi->gsifp); | |
173 free(gsi); | |
174 } | |
175 | |
176 | |
177 /***************************************************************** | |
178 * GSI64 index construction routines | |
179 * SRE, Wed Nov 10 11:49:14 1999 [St. Louis] | |
180 * | |
181 * API: | |
182 * g = GSI64AllocIndex(); | |
183 * | |
184 * [foreach filename, <32 char, no directory path] | |
185 * GSI64AddFileToIndex(g, filename); | |
186 * filenum++; | |
187 * [foreach key, <32 char, w/ filenum 1..nfiles, w/ 64bit offset] | |
188 * GSI64AddKeyToIndex(g, key, filenum, offset); | |
189 * | |
190 * GSI64SortIndex(g); | |
191 * GSI64WriteIndex(fp, g); | |
192 * GSI64FreeIndex(g); | |
193 *****************************************************************/ | |
194 struct gsi64index_s * | |
195 GSI64AllocIndex(void) | |
196 { | |
197 struct gsi64index_s *g; | |
198 | |
199 g = MallocOrDie(sizeof(struct gsi64index_s)); | |
200 g->filenames = MallocOrDie(sizeof(char *) * 10); | |
201 g->fmt = MallocOrDie(sizeof(int) * 10); | |
202 g->elems = MallocOrDie(sizeof(struct gsi64key_s) * 100); | |
203 g->nfiles = 0; | |
204 g->nkeys = 0; | |
205 return g; | |
206 } | |
207 void | |
208 GSI64FreeIndex(struct gsi64index_s *g) | |
209 { | |
210 int i; | |
211 for (i = 0; i < g->nfiles; i++) free(g->filenames[i]); | |
212 free(g->filenames); | |
213 free(g->fmt); | |
214 free(g->elems); | |
215 free(g); | |
216 } | |
217 void | |
218 GSI64AddFileToIndex(struct gsi64index_s *g, char *filename, int fmt) | |
219 { | |
220 int len; | |
221 | |
222 len = strlen(filename); | |
223 if (len >= GSI64_KEYSIZE) Die("File name too long to be indexed."); | |
224 g->filenames[g->nfiles] = sre_strdup(filename, len); | |
225 g->fmt[g->nfiles] = fmt; | |
226 g->nfiles++; | |
227 if (g->nfiles % 10 == 0) { | |
228 g->filenames = ReallocOrDie(g->filenames, sizeof(char *) * (g->nfiles + 10)); | |
229 g->fmt = ReallocOrDie(g->fmt, sizeof(int) * (g->nfiles + 10)); | |
230 } | |
231 } | |
232 void | |
233 GSI64AddKeyToIndex(struct gsi64index_s *g, char *key, int filenum, long long offset) | |
234 { | |
235 if (strlen(key) >= GSI64_KEYSIZE) Die("key too long in GSI64 index"); | |
236 if (filenum > SQD_UINT16_MAX) Die("too many files in GSI64 index"); | |
237 if (offset > SQD_UINT64_MAX) Die("offset too big in GSI64 index"); | |
238 | |
239 strncpy(g->elems[g->nkeys].key, key, GSI64_KEYSIZE-1); | |
240 g->elems[g->nkeys].key[GSI64_KEYSIZE-1] = '\0'; | |
241 g->elems[g->nkeys].filenum = (sqd_uint16) filenum; | |
242 g->elems[g->nkeys].offset = (sqd_uint64) offset; | |
243 g->nkeys++; | |
244 | |
245 if (g->nkeys % 100 == 0) | |
246 g->elems = ReallocOrDie(g->elems, sizeof(struct gsi64key_s) * (g->nkeys + 100)); | |
247 } | |
248 static int | |
249 gsi_keysorter(const void *k1, const void *k2) | |
250 { | |
251 struct gsi64key_s *key1; | |
252 struct gsi64key_s *key2; | |
253 key1 = (struct gsi64key_s *) k1; | |
254 key2 = (struct gsi64key_s *) k2; | |
255 return strcmp(key1->key, key2->key); | |
256 } | |
257 void | |
258 GSI64SortIndex(struct gsi64index_s *g) | |
259 { | |
260 qsort((void *) g->elems, g->nkeys, sizeof(struct gsi64key_s), gsi_keysorter); | |
261 } | |
262 void | |
263 GSI64WriteIndex(FILE *fp, struct gsi64index_s *g) | |
264 { | |
265 sqd_uint16 i; | |
266 sqd_uint64 j; | |
267 | |
268 /* Range checking. | |
269 */ | |
270 if (g->nfiles > SQD_UINT16_MAX) Die("Too many files in GSI64 index."); | |
271 if (g->nkeys > SQD_UINT64_MAX) Die("Too many keys in GSI64 index."); | |
272 | |
273 GSI64WriteHeader(fp, g->nfiles, g->nkeys); | |
274 for (i = 0; i < g->nfiles; i++) | |
275 GSI64WriteFileRecord(fp, g->filenames[i], i+1, g->fmt[i]); | |
276 for (j = 0; j < g->nkeys; j++) | |
277 GSI64WriteKeyRecord(fp, g->elems[j].key, g->elems[j].filenum, g->elems[j].offset); | |
278 } | |
279 | |
280 | |
281 | |
282 | |
283 | |
284 /* Function: GSI64WriteHeader() | |
285 * Date: SRE, Wed Aug 5 10:36:02 1998 [St. Louis] | |
286 * | |
287 * Purpose: Write the first record to an open GSI64 file: | |
288 * "GSI64" <nfiles> <nkeys> | |
289 * | |
290 * Args: fp - open file to write to. | |
291 * nfiles - number of files indexed | |
292 * nkeys - number of keys indexed | |
293 * | |
294 * Returns: void | |
295 */ | |
296 void | |
297 GSI64WriteHeader(FILE *fp, int nfiles, long long nkeys) | |
298 { | |
299 char key[GSI64_KEYSIZE]; | |
300 sqd_uint16 f1; | |
301 sqd_uint64 f2; | |
302 | |
303 /* beware potential range errors! | |
304 */ | |
305 if (nfiles > SQD_UINT16_MAX) Die("GSI64: nfiles out of range"); | |
306 if (nkeys > SQD_UINT64_MAX) Die("GSI64: nkeys out of range"); | |
307 | |
308 f1 = (sqd_uint16) nfiles; | |
309 f2 = (sqd_uint64) nkeys; | |
310 #if 0 /* HACK no byteswap */ | |
311 f1 = sre_htons(f1); | |
312 f2 = sre_htonl(f2); | |
313 #endif | |
314 strcpy(key, "GSI64"); | |
315 | |
316 if (fwrite(key, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC; | |
317 if (fwrite(&f1, 2, 1, fp) < 1) PANIC; | |
318 if (fwrite(&f2, 8, 1, fp) < 1) PANIC; | |
319 } | |
320 | |
321 | |
322 /* Function: GSI64WriteFileRecord() | |
323 * Date: SRE, Wed Aug 5 10:45:51 1998 [St. Louis] | |
324 * | |
325 * Purpose: Write a file record to an open GSI64 file. | |
326 * | |
327 * Args: fp - open GSI64 file | |
328 * fname - file name (max 31 characters) | |
329 * idx - file number | |
330 * fmt - file format (e.g. kPearson, etc.) | |
331 * | |
332 * Returns: 0 on failure. 1 on success. | |
333 */ | |
334 int | |
335 GSI64WriteFileRecord(FILE *fp, char *fname, int idx, int fmt) | |
336 { | |
337 sqd_uint16 f1; | |
338 sqd_uint64 f2; | |
339 | |
340 if (strlen(fname) >= GSI64_KEYSIZE) return 0; | |
341 if (idx > SQD_UINT16_MAX) Die("GSI64: file index out of range"); | |
342 if (fmt > SQD_UINT64_MAX) Die("GSI64: format index out of range"); | |
343 | |
344 f1 = (sqd_uint16) idx; | |
345 f2 = (sqd_uint64) fmt; | |
346 #if 0 /* hack : no byteswap */ | |
347 f1 = sre_htons(f1); | |
348 f2 = sre_htonl(f2); | |
349 #endif | |
350 | |
351 if (fwrite(fname, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC; | |
352 if (fwrite(&f1, 2, 1, fp) < 1) PANIC; | |
353 if (fwrite(&f2, 8, 1, fp) < 1) PANIC; | |
354 return 1; | |
355 } | |
356 | |
357 | |
358 /* Function: GSI64WriteKeyRecord() | |
359 * Date: SRE, Wed Aug 5 10:52:30 1998 [St. Louis] | |
360 * | |
361 * Purpose: Write a key record to a GSI64 file. | |
362 * | |
363 * Args: fp - open GSI64 file for writing | |
364 * key - key (max 31 char + \0) | |
365 * fileidx - which file number to find this key in | |
366 * offset - offset for this key | |
367 * | |
368 * Returns: 1 on success, else 0. | |
369 * will fail if key >= 32 chars, for instance. | |
370 */ | |
371 int | |
372 GSI64WriteKeyRecord(FILE *fp, char *key, int fileidx, long long offset) | |
373 { | |
374 sqd_uint16 f1; | |
375 sqd_uint64 f2; | |
376 | |
377 if (strlen(key) >= GSI64_KEYSIZE) return 0; | |
378 if (fileidx > SQD_UINT16_MAX) Die("GSI64: file index out of range"); | |
379 if (offset > SQD_UINT64_MAX) Die("GSI64: offset out of range"); | |
380 | |
381 f1 = (sqd_uint16) fileidx; | |
382 f2 = (sqd_uint64) offset; | |
383 #if 0 /* HACK! */ | |
384 f1 = sre_htons(f1); | |
385 f2 = sre_htonl(f2); | |
386 #endif | |
387 | |
388 if (fwrite(key, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC; | |
389 if (fwrite(&f1, 2, 1, fp) < 1) PANIC; | |
390 if (fwrite(&f2, 8, 1, fp) < 1) PANIC; | |
391 return 1; | |
392 } | |
393 | |
394 #endif /*USE_GSI64 */ |