Mercurial > repos > clustalomega > clustalomega
comparison clustalomega/clustal-omega-1.0.2/src/squid/gsi.c @ 1:bc707542e5de
Uploaded
author | clustalomega |
---|---|
date | Thu, 21 Jul 2011 13:35:08 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:ff1768533a07 | 1:bc707542e5de |
---|---|
1 /***************************************************************** | |
2 * SQUID - a library of functions for biological sequence analysis | |
3 * Copyright (C) 1992-2002 Washington University School of Medicine | |
4 * | |
5 * This source code is freely distributed under the terms of the | |
6 * GNU General Public License. See the files COPYRIGHT and LICENSE | |
7 * for details. | |
8 *****************************************************************/ | |
9 | |
10 /* gsi.c | |
11 * Interfaces for GSI "generic sequence index" files. | |
12 * broken away from sqio.c and extended: SRE, Wed Aug 5 10:32:53 1998 | |
13 * | |
14 * | |
15 * GSI definition: | |
16 * 1 + <nfiles> + <nkeys> total records. | |
17 * Each record = 38 bytes. | |
18 * | |
19 * one header record : <"GSI" (32)> <nfiles (2)> <nkeys (4)> | |
20 * <nfiles> file records : <filename (32)> <fileno (2)> <fmt (4)> | |
21 * <nkeys> key records : <key (32)> <fileno (2)> <offset(4)> | |
22 * | |
23 * Matches up with my Perl scripts that create GSI files. | |
24 * | |
25 * RCS $Id: gsi.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: gsi.c,v 1.5 2001/08/04 20:15:42 eddy Exp) | |
26 */ | |
27 | |
28 #include <stdio.h> | |
29 #include <stdlib.h> | |
30 #include <string.h> | |
31 #ifndef SEEK_SET | |
32 #include <unistd.h> /* needed for poor crippled SunOS */ | |
33 #endif | |
34 | |
35 #include "squid.h" | |
36 #include "gsi.h" | |
37 | |
38 | |
39 /***************************************************************** | |
40 * GSI index file access routines | |
41 *****************************************************************/ | |
42 | |
43 /* Function: GSIOpen() | |
44 * | |
45 * Purpose: Open a GSI file. Returns the number of records in | |
46 * the file and a file pointer. Returns NULL on failure. | |
47 * The file pointer should be fclose()'d normally. | |
48 */ | |
49 GSIFILE * | |
50 GSIOpen(char *gsifile) | |
51 { | |
52 GSIFILE *gsi; | |
53 char magic[GSI_KEYSIZE]; | |
54 | |
55 gsi = (GSIFILE *) MallocOrDie (sizeof(GSIFILE)); | |
56 if ((gsi->gsifp = fopen(gsifile, "r")) == NULL) | |
57 { free(gsi); squid_errno = SQERR_NOFILE; return NULL; } | |
58 | |
59 if (! fread(magic, sizeof(char), GSI_KEYSIZE, gsi->gsifp)) | |
60 { free(gsi); squid_errno = SQERR_NODATA; return NULL; } | |
61 if (strcmp(magic, "GSI") != 0) | |
62 { free(gsi); squid_errno = SQERR_FORMAT; return NULL; } | |
63 | |
64 if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp)) | |
65 { free(gsi); squid_errno = SQERR_NODATA; return NULL; } | |
66 if (! fread(&(gsi->recnum), sizeof(sqd_uint32), 1, gsi->gsifp)) | |
67 { free(gsi); squid_errno = SQERR_NODATA; return NULL; } | |
68 | |
69 gsi->nfiles = sre_ntoh16(gsi->nfiles); /* convert from network short */ | |
70 gsi->recnum = sre_ntoh32(gsi->recnum); /* convert from network long */ | |
71 | |
72 return gsi; | |
73 } | |
74 | |
75 /* Function: GSIGetRecord() | |
76 * | |
77 * Purpose: Each non-header record of a GSI index files consists | |
78 * of 38 bytes: 32 bytes of character string, a 2 byte | |
79 * short, and a 4 byte long. This function returns the | |
80 * three values. | |
81 * | |
82 * Args: gsi - open GSI index file, correctly positioned at a record | |
83 * f1 - char[32], allocated by caller (or NULL if unwanted) | |
84 * f2 - pointer to short (or NULL if unwanted) | |
85 * f3 - pointer to long (or NULL if unwanted) | |
86 * | |
87 * Return: 0 on failure and sets squid_errno. | |
88 */ | |
89 int | |
90 GSIGetRecord(GSIFILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint32 *f3) | |
91 { | |
92 if (f1 == NULL) fseek(gsi->gsifp, GSI_KEYSIZE, SEEK_CUR); | |
93 else if (! fread(f1, GSI_KEYSIZE, 1, gsi->gsifp)) | |
94 { squid_errno = SQERR_NODATA; return 0; } | |
95 | |
96 if (f2 == NULL) fseek(gsi->gsifp, sizeof(sqd_uint16), SEEK_CUR); | |
97 else if (! fread(f2, sizeof(sqd_uint16), 1, gsi->gsifp)) | |
98 { squid_errno = SQERR_NODATA; return 0; } | |
99 | |
100 if (f3 == NULL) fseek(gsi->gsifp, sizeof(sqd_uint32), SEEK_CUR); | |
101 else if (! fread(f3, sizeof(sqd_uint32), 1, gsi->gsifp)) | |
102 { squid_errno = SQERR_NODATA; return 0; } | |
103 | |
104 if (f2 != NULL) *f2 = sre_ntoh16(*f2); | |
105 if (f3 != NULL) *f3 = sre_ntoh32(*f3); | |
106 | |
107 return 1; | |
108 } | |
109 | |
110 | |
111 /* Function: GSIGetOffset() | |
112 * | |
113 * Purpose: From a key (sequence name), find a disk offset | |
114 * in an open general sequence index file by binary | |
115 * search. Presumably GSI indexing could be even faster | |
116 * if we used hashing. | |
117 * | |
118 * Args: gsi - GSI index file, opened by GSIOpen() | |
119 * key - name of key to retrieve indices for | |
120 * ret_seqfile - pre-alloced char[32] array for seqfile name | |
121 * ret_fmt - format of seqfile | |
122 * ret_offset - return: disk offset in seqfile. | |
123 */ | |
124 int | |
125 GSIGetOffset(GSIFILE *gsi, char *key, char *ret_seqfile, | |
126 int *ret_format, long *ret_offset) | |
127 { | |
128 sqd_uint32 left, right, mid; | |
129 int cmp; | |
130 char name[GSI_KEYSIZE + 1]; | |
131 sqd_uint32 offset; | |
132 sqd_uint16 filenum; | |
133 sqd_uint32 fmt; | |
134 | |
135 name[GSI_KEYSIZE] = '\0'; | |
136 | |
137 left = gsi->nfiles + 1; | |
138 right = gsi->nfiles + gsi->recnum; | |
139 mid = (left + right) / 2; | |
140 fseek(gsi->gsifp, mid * GSI_RECSIZE, SEEK_SET); | |
141 | |
142 while (GSIGetRecord(gsi, name, &filenum, &offset)) | |
143 { | |
144 cmp = strcmp(name, key); | |
145 if (cmp == 0) break; /* found it! */ | |
146 else if (left >= right) return 0; /* oops, missed it; fail. */ | |
147 else if (cmp < 0) left = mid + 1; /* it's right of mid */ | |
148 else if (cmp > 0) right = mid - 1; /* it's left of mid */ | |
149 mid = (left + right) / 2; | |
150 fseek(gsi->gsifp, mid * GSI_RECSIZE, SEEK_SET); | |
151 } | |
152 | |
153 /* Using file number, look up the sequence file and format. | |
154 */ | |
155 fseek(gsi->gsifp, filenum * GSI_RECSIZE, SEEK_SET); | |
156 GSIGetRecord(gsi, ret_seqfile, NULL, &fmt); | |
157 *ret_format = (int) fmt; | |
158 *ret_offset = (long) offset; | |
159 | |
160 return 1; | |
161 } | |
162 | |
163 /* Function: GSIClose() | |
164 * | |
165 * Purpose: Close an open GSI sequence index file. | |
166 */ | |
167 void | |
168 GSIClose(GSIFILE *gsi) | |
169 { | |
170 fclose(gsi->gsifp); | |
171 free(gsi); | |
172 } | |
173 | |
174 | |
175 /***************************************************************** | |
176 * GSI index construction routines | |
177 * SRE, Wed Nov 10 11:49:14 1999 [St. Louis] | |
178 * | |
179 * API: | |
180 * g = GSIAllocIndex(); | |
181 * | |
182 * [foreach filename, <32 char, no directory path] | |
183 * GSIAddFileToIndex(g, filename); | |
184 * filenum++; | |
185 * [foreach key, <32 char, w/ filenum 1..nfiles, w/ 32bit offset] | |
186 * GSIAddKeyToIndex(g, key, filenum, offset); | |
187 * | |
188 * GSISortIndex(g); | |
189 * GSIWriteIndex(fp, g); | |
190 * GSIFreeIndex(g); | |
191 *****************************************************************/ | |
192 struct gsiindex_s * | |
193 GSIAllocIndex(void) | |
194 { | |
195 struct gsiindex_s *g; | |
196 | |
197 g = MallocOrDie(sizeof(struct gsiindex_s)); | |
198 g->filenames = MallocOrDie(sizeof(char *) * 10); | |
199 g->fmt = MallocOrDie(sizeof(int) * 10); | |
200 g->elems = MallocOrDie(sizeof(struct gsikey_s) * 100); | |
201 g->nfiles = 0; | |
202 g->nkeys = 0; | |
203 return g; | |
204 } | |
205 void | |
206 GSIFreeIndex(struct gsiindex_s *g) | |
207 { | |
208 int i; | |
209 for (i = 0; i < g->nfiles; i++) free(g->filenames[i]); | |
210 free(g->filenames); | |
211 free(g->fmt); | |
212 free(g->elems); | |
213 free(g); | |
214 } | |
215 void | |
216 GSIAddFileToIndex(struct gsiindex_s *g, char *filename, int fmt) | |
217 { | |
218 int len; | |
219 | |
220 len = strlen(filename); | |
221 if (len >= GSI_KEYSIZE) Die("File name too long to be indexed."); | |
222 g->filenames[g->nfiles] = sre_strdup(filename, len); | |
223 g->fmt[g->nfiles] = fmt; | |
224 g->nfiles++; | |
225 if (g->nfiles % 10 == 0) { | |
226 g->filenames = ReallocOrDie(g->filenames, sizeof(char *) * (g->nfiles + 10)); | |
227 g->fmt = ReallocOrDie(g->fmt, sizeof(int) * (g->nfiles + 10)); | |
228 } | |
229 } | |
230 void | |
231 GSIAddKeyToIndex(struct gsiindex_s *g, char *key, int filenum, long offset) | |
232 { | |
233 if (strlen(key) >= GSI_KEYSIZE) Die("key too long in GSI index"); | |
234 if (filenum > SQD_UINT16_MAX) Die("too many files in GSI index"); | |
235 if (offset > SQD_UINT32_MAX) Die("offset too big in GSI index"); | |
236 | |
237 strncpy(g->elems[g->nkeys].key, key, GSI_KEYSIZE-1); | |
238 g->elems[g->nkeys].key[GSI_KEYSIZE-1] = '\0'; | |
239 g->elems[g->nkeys].filenum = (sqd_uint16) filenum; | |
240 g->elems[g->nkeys].offset = (sqd_uint32) offset; | |
241 g->nkeys++; | |
242 | |
243 if (g->nkeys % 100 == 0) | |
244 g->elems = ReallocOrDie(g->elems, sizeof(struct gsikey_s) * (g->nkeys + 100)); | |
245 } | |
246 static int | |
247 gsi_keysorter(const void *k1, const void *k2) | |
248 { | |
249 struct gsikey_s *key1; | |
250 struct gsikey_s *key2; | |
251 key1 = (struct gsikey_s *) k1; | |
252 key2 = (struct gsikey_s *) k2; | |
253 return strcmp(key1->key, key2->key); | |
254 } | |
255 void | |
256 GSISortIndex(struct gsiindex_s *g) | |
257 { | |
258 qsort((void *) g->elems, g->nkeys, sizeof(struct gsikey_s), gsi_keysorter); | |
259 } | |
260 void | |
261 GSIWriteIndex(FILE *fp, struct gsiindex_s *g) | |
262 { | |
263 sqd_uint32 i; | |
264 | |
265 /* Range checking. | |
266 */ | |
267 /* AW: gcc says: comparison always false die to limited range of data type */ | |
268 #ifndef CLUSTALO | |
269 if (g->nfiles > SQD_UINT16_MAX) Die("Too many files in GSI index."); | |
270 #endif | |
271 if (g->nkeys > SQD_UINT32_MAX) Die("Too many keys in GSI index."); | |
272 | |
273 GSIWriteHeader(fp, g->nfiles, g->nkeys); | |
274 for (i = 0; i < g->nfiles; i++) | |
275 GSIWriteFileRecord(fp, g->filenames[i], i+1, g->fmt[i]); | |
276 for (i = 0; i < g->nkeys; i++) | |
277 GSIWriteKeyRecord(fp, g->elems[i].key, g->elems[i].filenum, g->elems[i].offset); | |
278 } | |
279 | |
280 | |
281 | |
282 | |
283 | |
284 /* Function: GSIWriteHeader() | |
285 * Date: SRE, Wed Aug 5 10:36:02 1998 [St. Louis] | |
286 * | |
287 * Purpose: Write the first record to an open GSI file: | |
288 * "GSI" <nfiles> <nkeys> | |
289 * | |
290 * Args: fp - open file to write to. | |
291 * nfiles - number of files indexed | |
292 * nkeys - number of keys indexed | |
293 * | |
294 * Returns: void | |
295 */ | |
296 void | |
297 GSIWriteHeader(FILE *fp, int nfiles, long nkeys) | |
298 { | |
299 char key[GSI_KEYSIZE]; | |
300 sqd_uint16 f1; | |
301 sqd_uint32 f2; | |
302 | |
303 /* beware potential range errors! | |
304 */ | |
305 if (nfiles > SQD_UINT16_MAX) Die("GSI: nfiles out of range"); | |
306 if (nkeys > SQD_UINT32_MAX) Die("GSI: nkeys out of range"); | |
307 | |
308 f1 = (sqd_uint16) nfiles; | |
309 f2 = (sqd_uint32) nkeys; | |
310 f1 = sre_hton16(f1); | |
311 f2 = sre_hton32(f2); | |
312 strcpy(key, "GSI"); | |
313 | |
314 if (fwrite(key, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC; | |
315 if (fwrite(&f1, 2, 1, fp) < 1) PANIC; | |
316 if (fwrite(&f2, 4, 1, fp) < 1) PANIC; | |
317 } | |
318 | |
319 | |
320 /* Function: GSIWriteFileRecord() | |
321 * Date: SRE, Wed Aug 5 10:45:51 1998 [St. Louis] | |
322 * | |
323 * Purpose: Write a file record to an open GSI file. | |
324 * | |
325 * Args: fp - open GSI file | |
326 * fname - file name (max 31 characters) | |
327 * idx - file number | |
328 * fmt - file format (e.g. kPearson, etc.) | |
329 * | |
330 * Returns: 0 on failure. 1 on success. | |
331 */ | |
332 int | |
333 GSIWriteFileRecord(FILE *fp, char *fname, int idx, int fmt) | |
334 { | |
335 sqd_uint16 f1; | |
336 sqd_uint32 f2; | |
337 | |
338 if (strlen(fname) >= GSI_KEYSIZE) return 0; | |
339 if (idx > SQD_UINT16_MAX) Die("GSI: file index out of range"); | |
340 if (fmt > SQD_UINT32_MAX) Die("GSI: format index out of range"); | |
341 | |
342 f1 = (sqd_uint16) idx; | |
343 f2 = (sqd_uint32) fmt; | |
344 f1 = sre_hton16(f1); | |
345 f2 = sre_hton32(f2); | |
346 | |
347 if (fwrite(fname, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC; | |
348 if (fwrite(&f1, 2, 1, fp) < 1) PANIC; | |
349 if (fwrite(&f2, 4, 1, fp) < 1) PANIC; | |
350 return 1; | |
351 } | |
352 | |
353 | |
354 /* Function: GSIWriteKeyRecord() | |
355 * Date: SRE, Wed Aug 5 10:52:30 1998 [St. Louis] | |
356 * | |
357 * Purpose: Write a key record to a GSI file. | |
358 * | |
359 * Args: fp - open GSI file for writing | |
360 * key - key (max 31 char + \0) | |
361 * fileidx - which file number to find this key in | |
362 * offset - offset for this key | |
363 * | |
364 * Returns: 1 on success, else 0. | |
365 * will fail if key >= 32 chars, for instance. | |
366 */ | |
367 int | |
368 GSIWriteKeyRecord(FILE *fp, char *key, int fileidx, long offset) | |
369 { | |
370 sqd_uint16 f1; | |
371 sqd_uint32 f2; | |
372 | |
373 if (strlen(key) >= GSI_KEYSIZE) return 0; | |
374 if (fileidx > SQD_UINT16_MAX) Die("GSI: file index out of range"); | |
375 if (offset > SQD_UINT32_MAX) Die("GSI: offset out of range"); | |
376 | |
377 f1 = (sqd_uint16) fileidx; | |
378 f2 = (sqd_uint32) offset; | |
379 f1 = sre_hton16(f1); | |
380 f2 = sre_hton32(f2); | |
381 | |
382 if (fwrite(key, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC; | |
383 if (fwrite(&f1, 2, 1, fp) < 1) PANIC; | |
384 if (fwrite(&f2, 4, 1, fp) < 1) PANIC; | |
385 return 1; | |
386 } | |
387 |