comparison clustalomega/clustal-omega-0.2.0/src/squid/gsi64.c @ 0:ff1768533a07

Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author clustalomega
date Tue, 07 Jun 2011 17:04:25 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ff1768533a07
1 /*****************************************************************
2 * SQUID - a library of functions for biological sequence analysis
3 * Copyright (C) 1992-2002 Washington University School of Medicine
4 *
5 * This source code is freely distributed under the terms of the
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
7 * for details.
8 *****************************************************************/
9 #ifdef USE_GSI64
10
11 /* gsi64.c
12 * Updated interfaces for GSI64 64-bit "generic sequence index" files.
13 * See gsi.c for old interfaces.
14 * This is a temporary hack! Needed for human genome project.
15 */
16
17 /* 1 + <nfiles> + <nkeys> total records.
18 * Each record = 42 bytes.
19 *
20 * one header record : <"GSI64" (32)> <nfiles (2)> <nkeys (8)>
21 * <nfiles> file records : <filename (32)> <fileno (2)> <fmt (8)>
22 * <nkeys> key records : <key (32)> <fileno (2)> <offset(8)>
23 *
24 * CVS $Id: gsi64.c,v 1.2 2000/12/21 23:42:59 eddy Exp)
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #ifndef SEEK_SET
31 #include <unistd.h> /* needed for poor crippled SunOS */
32 #endif
33
34 #include "squid.h"
35 #include "gsi64.h"
36
37 /*****************************************************************
38 * GSI64 index file access routines
39 *****************************************************************/
40
41 /* Function: GSI64Open()
42 *
43 * Purpose: Open a GSI64 file. Returns the number of records in
44 * the file and a file pointer. Returns NULL on failure.
45 * The file pointer should be fclose()'d normally.
46 */
47 GSI64FILE *
48 GSI64Open(char *gsifile)
49 {
50 GSI64FILE *gsi;
51 char magic[GSI64_KEYSIZE];
52
53 gsi = (GSI64FILE *) MallocOrDie (sizeof(GSI64FILE));
54 if ((gsi->gsifp = fopen(gsifile, "r")) == NULL)
55 { free(gsi); squid_errno = SQERR_NOFILE; return NULL; }
56
57 if (! fread(magic, sizeof(char), GSI64_KEYSIZE, gsi->gsifp))
58 { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
59 if (strcmp(magic, "GSI64") != 0)
60 { free(gsi); squid_errno = SQERR_FORMAT; return NULL; }
61
62 if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp))
63 { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
64 if (! fread(&(gsi->recnum), sizeof(sqd_uint64), 1, gsi->gsifp))
65 { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
66
67 #if 0 /* HACK! we don't byteswap */
68 gsi->nfiles = sre_ntohs(gsi->nfiles); /* convert from network short */
69 gsi->recnum = sre_ntohl(gsi->recnum); /* convert from network long */
70 #endif
71
72 return gsi;
73 }
74
75 /* Function: GSI64GetRecord()
76 *
77 * Purpose: Each non-header record of a GSI64 index file consists
78 * of 42 bytes: 32 bytes of character string, a 2 byte
79 * short, and an 8 byte long long. This function returns the
80 * three values.
81 *
82 * Args: gsi - open GSI64 index file, correctly positioned at a record
83 * f1 - char[32], allocated by caller (or NULL if unwanted)
84 * f2 - pointer to short (or NULL if unwanted)
85 * f3 - pointer to long long (or NULL if unwanted)
86 *
87 * Return: 0 on failure and sets squid_errno.
88 */
89 int
90 GSI64GetRecord(GSI64FILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint64 *f3)
91 {
92 if (f1 == NULL) fseek64(gsi->gsifp, GSI64_KEYSIZE, SEEK_CUR);
93 else if (! fread(f1, GSI64_KEYSIZE, 1, gsi->gsifp))
94 { squid_errno = SQERR_NODATA; return 0; }
95
96 if (f2 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint16), SEEK_CUR);
97 else if (! fread(f2, sizeof(sqd_uint16), 1, gsi->gsifp))
98 { squid_errno = SQERR_NODATA; return 0; }
99
100 if (f3 == NULL) fseek64(gsi->gsifp, sizeof(sqd_uint64), SEEK_CUR);
101 else if (! fread(f3, sizeof(sqd_uint64), 1, gsi->gsifp))
102 { squid_errno = SQERR_NODATA; return 0; }
103
104 #if 0 /* no byteswap yet! HACK! */
105 if (f2 != NULL) *f2 = sre_ntohs(*f2);
106 if (f3 != NULL) *f3 = sre_ntohl(*f3);
107 #endif
108
109 return 1;
110 }
111
112
113 /* Function: GSI64GetOffset()
114 *
115 * Purpose: From a key (sequence name), find a disk offset
116 * in an open general sequence index file by binary
117 * search. Presumably GSI64 indexing could be even faster
118 * if we used hashing.
119 *
120 * Args: gsi - GSI64 index file, opened by GSI64Open()
121 * key - name of key to retrieve indices for
122 * ret_seqfile - pre-alloced char[32] array for seqfile name
123 * ret_fmt - format of seqfile
124 * ret_offset - return: disk offset in seqfile.
125 */
126 int
127 GSI64GetOffset(GSI64FILE *gsi, char *key, char *ret_seqfile,
128 int *ret_format, long long *ret_offset)
129 {
130 sqd_uint64 left, right, mid;
131 int cmp;
132 char name[GSI64_KEYSIZE + 1];
133 sqd_uint64 offset;
134 sqd_uint16 filenum;
135 sqd_uint64 fmt;
136
137 name[GSI64_KEYSIZE] = '\0';
138
139 left = gsi->nfiles + 1;
140 right = gsi->nfiles + gsi->recnum;
141 mid = (left + right) / 2;
142 fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET);
143
144 while (GSI64GetRecord(gsi, name, &filenum, &offset))
145 {
146 cmp = strcmp(name, key);
147 if (cmp == 0) break; /* found it! */
148 else if (left >= right) return 0; /* oops, missed it; fail. */
149 else if (cmp < 0) left = mid + 1; /* it's right of mid */
150 else if (cmp > 0) right = mid - 1; /* it's left of mid */
151 mid = (left + right) / 2;
152 fseek64(gsi->gsifp, mid * GSI64_RECSIZE, SEEK_SET);
153 }
154
155 /* Using file number, look up the sequence file and format.
156 */
157 fseek64(gsi->gsifp, filenum * GSI64_RECSIZE, SEEK_SET);
158 GSI64GetRecord(gsi, ret_seqfile, NULL, &fmt);
159 *ret_format = (int) fmt;
160 *ret_offset = (long long) offset;
161
162 return 1;
163 }
164
165 /* Function: GSI64Close()
166 *
167 * Purpose: Close an open GSI64 sequence index file.
168 */
169 void
170 GSI64Close(GSI64FILE *gsi)
171 {
172 fclose(gsi->gsifp);
173 free(gsi);
174 }
175
176
177 /*****************************************************************
178 * GSI64 index construction routines
179 * SRE, Wed Nov 10 11:49:14 1999 [St. Louis]
180 *
181 * API:
182 * g = GSI64AllocIndex();
183 *
184 * [foreach filename, <32 char, no directory path]
185 * GSI64AddFileToIndex(g, filename);
186 * filenum++;
187 * [foreach key, <32 char, w/ filenum 1..nfiles, w/ 64bit offset]
188 * GSI64AddKeyToIndex(g, key, filenum, offset);
189 *
190 * GSI64SortIndex(g);
191 * GSI64WriteIndex(fp, g);
192 * GSI64FreeIndex(g);
193 *****************************************************************/
194 struct gsi64index_s *
195 GSI64AllocIndex(void)
196 {
197 struct gsi64index_s *g;
198
199 g = MallocOrDie(sizeof(struct gsi64index_s));
200 g->filenames = MallocOrDie(sizeof(char *) * 10);
201 g->fmt = MallocOrDie(sizeof(int) * 10);
202 g->elems = MallocOrDie(sizeof(struct gsi64key_s) * 100);
203 g->nfiles = 0;
204 g->nkeys = 0;
205 return g;
206 }
207 void
208 GSI64FreeIndex(struct gsi64index_s *g)
209 {
210 int i;
211 for (i = 0; i < g->nfiles; i++) free(g->filenames[i]);
212 free(g->filenames);
213 free(g->fmt);
214 free(g->elems);
215 free(g);
216 }
217 void
218 GSI64AddFileToIndex(struct gsi64index_s *g, char *filename, int fmt)
219 {
220 int len;
221
222 len = strlen(filename);
223 if (len >= GSI64_KEYSIZE) Die("File name too long to be indexed.");
224 g->filenames[g->nfiles] = sre_strdup(filename, len);
225 g->fmt[g->nfiles] = fmt;
226 g->nfiles++;
227 if (g->nfiles % 10 == 0) {
228 g->filenames = ReallocOrDie(g->filenames, sizeof(char *) * (g->nfiles + 10));
229 g->fmt = ReallocOrDie(g->fmt, sizeof(int) * (g->nfiles + 10));
230 }
231 }
232 void
233 GSI64AddKeyToIndex(struct gsi64index_s *g, char *key, int filenum, long long offset)
234 {
235 if (strlen(key) >= GSI64_KEYSIZE) Die("key too long in GSI64 index");
236 if (filenum > SQD_UINT16_MAX) Die("too many files in GSI64 index");
237 if (offset > SQD_UINT64_MAX) Die("offset too big in GSI64 index");
238
239 strncpy(g->elems[g->nkeys].key, key, GSI64_KEYSIZE-1);
240 g->elems[g->nkeys].key[GSI64_KEYSIZE-1] = '\0';
241 g->elems[g->nkeys].filenum = (sqd_uint16) filenum;
242 g->elems[g->nkeys].offset = (sqd_uint64) offset;
243 g->nkeys++;
244
245 if (g->nkeys % 100 == 0)
246 g->elems = ReallocOrDie(g->elems, sizeof(struct gsi64key_s) * (g->nkeys + 100));
247 }
248 static int
249 gsi_keysorter(const void *k1, const void *k2)
250 {
251 struct gsi64key_s *key1;
252 struct gsi64key_s *key2;
253 key1 = (struct gsi64key_s *) k1;
254 key2 = (struct gsi64key_s *) k2;
255 return strcmp(key1->key, key2->key);
256 }
257 void
258 GSI64SortIndex(struct gsi64index_s *g)
259 {
260 qsort((void *) g->elems, g->nkeys, sizeof(struct gsi64key_s), gsi_keysorter);
261 }
262 void
263 GSI64WriteIndex(FILE *fp, struct gsi64index_s *g)
264 {
265 sqd_uint16 i;
266 sqd_uint64 j;
267
268 /* Range checking.
269 */
270 if (g->nfiles > SQD_UINT16_MAX) Die("Too many files in GSI64 index.");
271 if (g->nkeys > SQD_UINT64_MAX) Die("Too many keys in GSI64 index.");
272
273 GSI64WriteHeader(fp, g->nfiles, g->nkeys);
274 for (i = 0; i < g->nfiles; i++)
275 GSI64WriteFileRecord(fp, g->filenames[i], i+1, g->fmt[i]);
276 for (j = 0; j < g->nkeys; j++)
277 GSI64WriteKeyRecord(fp, g->elems[j].key, g->elems[j].filenum, g->elems[j].offset);
278 }
279
280
281
282
283
284 /* Function: GSI64WriteHeader()
285 * Date: SRE, Wed Aug 5 10:36:02 1998 [St. Louis]
286 *
287 * Purpose: Write the first record to an open GSI64 file:
288 * "GSI64" <nfiles> <nkeys>
289 *
290 * Args: fp - open file to write to.
291 * nfiles - number of files indexed
292 * nkeys - number of keys indexed
293 *
294 * Returns: void
295 */
296 void
297 GSI64WriteHeader(FILE *fp, int nfiles, long long nkeys)
298 {
299 char key[GSI64_KEYSIZE];
300 sqd_uint16 f1;
301 sqd_uint64 f2;
302
303 /* beware potential range errors!
304 */
305 if (nfiles > SQD_UINT16_MAX) Die("GSI64: nfiles out of range");
306 if (nkeys > SQD_UINT64_MAX) Die("GSI64: nkeys out of range");
307
308 f1 = (sqd_uint16) nfiles;
309 f2 = (sqd_uint64) nkeys;
310 #if 0 /* HACK no byteswap */
311 f1 = sre_htons(f1);
312 f2 = sre_htonl(f2);
313 #endif
314 strcpy(key, "GSI64");
315
316 if (fwrite(key, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC;
317 if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
318 if (fwrite(&f2, 8, 1, fp) < 1) PANIC;
319 }
320
321
322 /* Function: GSI64WriteFileRecord()
323 * Date: SRE, Wed Aug 5 10:45:51 1998 [St. Louis]
324 *
325 * Purpose: Write a file record to an open GSI64 file.
326 *
327 * Args: fp - open GSI64 file
328 * fname - file name (max 31 characters)
329 * idx - file number
330 * fmt - file format (e.g. kPearson, etc.)
331 *
332 * Returns: 0 on failure. 1 on success.
333 */
334 int
335 GSI64WriteFileRecord(FILE *fp, char *fname, int idx, int fmt)
336 {
337 sqd_uint16 f1;
338 sqd_uint64 f2;
339
340 if (strlen(fname) >= GSI64_KEYSIZE) return 0;
341 if (idx > SQD_UINT16_MAX) Die("GSI64: file index out of range");
342 if (fmt > SQD_UINT64_MAX) Die("GSI64: format index out of range");
343
344 f1 = (sqd_uint16) idx;
345 f2 = (sqd_uint64) fmt;
346 #if 0 /* hack : no byteswap */
347 f1 = sre_htons(f1);
348 f2 = sre_htonl(f2);
349 #endif
350
351 if (fwrite(fname, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC;
352 if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
353 if (fwrite(&f2, 8, 1, fp) < 1) PANIC;
354 return 1;
355 }
356
357
358 /* Function: GSI64WriteKeyRecord()
359 * Date: SRE, Wed Aug 5 10:52:30 1998 [St. Louis]
360 *
361 * Purpose: Write a key record to a GSI64 file.
362 *
363 * Args: fp - open GSI64 file for writing
364 * key - key (max 31 char + \0)
365 * fileidx - which file number to find this key in
366 * offset - offset for this key
367 *
368 * Returns: 1 on success, else 0.
369 * will fail if key >= 32 chars, for instance.
370 */
371 int
372 GSI64WriteKeyRecord(FILE *fp, char *key, int fileidx, long long offset)
373 {
374 sqd_uint16 f1;
375 sqd_uint64 f2;
376
377 if (strlen(key) >= GSI64_KEYSIZE) return 0;
378 if (fileidx > SQD_UINT16_MAX) Die("GSI64: file index out of range");
379 if (offset > SQD_UINT64_MAX) Die("GSI64: offset out of range");
380
381 f1 = (sqd_uint16) fileidx;
382 f2 = (sqd_uint64) offset;
383 #if 0 /* HACK! */
384 f1 = sre_htons(f1);
385 f2 = sre_htonl(f2);
386 #endif
387
388 if (fwrite(key, 1, GSI64_KEYSIZE, fp) < GSI64_KEYSIZE) PANIC;
389 if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
390 if (fwrite(&f2, 8, 1, fp) < 1) PANIC;
391 return 1;
392 }
393
394 #endif /*USE_GSI64 */