Mercurial > repos > clustalomega > clustalomega
comparison clustalomega/clustal-omega-0.2.0/src/squid/ssi.c @ 0:ff1768533a07
Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author | clustalomega |
---|---|
date | Tue, 07 Jun 2011 17:04:25 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:ff1768533a07 |
---|---|
1 /***************************************************************** | |
2 * SQUID - a library of functions for biological sequence analysis | |
3 * Copyright (C) 1992-2002 Washington University School of Medicine | |
4 * | |
5 * This source code is freely distributed under the terms of the | |
6 * GNU General Public License. See the files COPYRIGHT and LICENSE | |
7 * for details. | |
8 *****************************************************************/ | |
9 | |
10 #include <stdio.h> | |
11 #include <stdlib.h> | |
12 #include <string.h> | |
13 #include <sys/stat.h> | |
14 #include <sys/types.h> | |
15 #include <unistd.h> | |
16 #include "squid.h" | |
17 #include "ssi.h" | |
18 #ifdef CLUSTALO | |
19 #include <limits.h> | |
20 #endif | |
21 | |
22 static sqd_uint32 v20magic = 0xf3f3e9b1; /* SSI 1.0: "ssi1" + 0x80808080 */ | |
23 static sqd_uint32 v20swap = 0xb1e9f3f3; /* byteswapped */ | |
24 | |
25 static int read_i16(FILE *fp, sqd_uint16 *ret_result); | |
26 static int read_i32(FILE *fp, sqd_uint32 *ret_result); | |
27 static int read_i64(FILE *fp, sqd_uint64 *ret_result); | |
28 static int read_offset(FILE *fp, char mode, SSIOFFSET *ret_offset); | |
29 static int write_i16(FILE *fp, sqd_uint16 n); | |
30 static int write_i32(FILE *fp, sqd_uint32 n); | |
31 static int write_i64(FILE *fp, sqd_uint64 n); | |
32 static int write_offset(FILE *fp, SSIOFFSET *offset); | |
33 static int binary_search(SSIFILE *sfp, char *key, int klen, SSIOFFSET *base, | |
34 sqd_uint32 recsize, sqd_uint32 maxidx); | |
35 static int indexfile_position(SSIFILE *sfp, SSIOFFSET *base, sqd_uint32 len, | |
36 sqd_uint32 n); | |
37 static void clear_ssifile(SSIFILE *sfp); | |
38 static sqd_uint64 current_index_size(SSIINDEX *g); | |
39 static int activate_external_sort(SSIINDEX *g); | |
40 static int load_indexfile(SSIFILE *sfp); | |
41 static int parse_pkey_info(char *buf, char mode, struct ssipkey_s *pkey); | |
42 static int parse_skey_info(char *buf, struct ssiskey_s *skey); | |
43 | |
44 /* Function: SSIOpen() | |
45 * Date: SRE, Sun Dec 31 12:40:03 2000 [St. Louis] | |
46 * | |
47 * Purpose: Opens the SSI index file {filename} and returns | |
48 * a SSIFILE * stream thru {ret_sfp}. | |
49 * The caller must eventually close this stream using | |
50 * SSIClose(). More than one index file can be open | |
51 * at once. | |
52 * | |
53 * Args: filename - full path to a SSI index file | |
54 * | |
55 * Returns: Returns 0 on success, nonzero on failure. | |
56 */ | |
57 int | |
58 SSIOpen(char *filename, SSIFILE **ret_sfp) | |
59 { | |
60 SSIFILE *sfp = NULL; | |
61 int status; | |
62 if ((sfp = malloc(sizeof(SSIFILE))) == NULL) return SSI_ERR_MALLOC; | |
63 if ((sfp->fp = fopen(filename, "rb")) == NULL) { | |
64 free(sfp); | |
65 return SSI_ERR_NOFILE; | |
66 } | |
67 status = load_indexfile(sfp); | |
68 *ret_sfp = sfp; | |
69 return status; | |
70 } | |
71 /* load_indexfile(): given a SSIFILE structure with an open and positioned | |
72 * stream (fp) -- but no other data loaded -- read the next SSIFILE | |
73 * in from disk. We use this routine without its SSIOpen() wrapper | |
74 * as part of the external mergesort when creating large indices. | |
75 */ | |
76 static int | |
77 load_indexfile(SSIFILE *sfp) | |
78 { | |
79 sqd_uint32 magic; | |
80 sqd_uint16 i; /* counter over files */ | |
81 int status; /* overall return status if an error is thrown */ | |
82 | |
83 status = SSI_ERR_BADFORMAT; /* default: almost every kind of error is a bad format error */ | |
84 | |
85 sfp->filename = NULL; | |
86 sfp->fileformat = NULL; | |
87 sfp->fileflags = NULL; | |
88 sfp->bpl = NULL; | |
89 sfp->rpl = NULL; | |
90 sfp->nfiles = 0; | |
91 if (! read_i32(sfp->fp, &magic)) {status = SSI_ERR_BADMAGIC; goto FAILURE; } | |
92 if (magic != v20magic && magic != v20swap) {status = SSI_ERR_BADMAGIC; goto FAILURE; } | |
93 if (! read_i32(sfp->fp, &(sfp->flags))) goto FAILURE; | |
94 | |
95 /* If we have 64-bit offsets, make sure we can deal with them. | |
96 */ | |
97 #ifndef HAS_64BIT_FILE_OFFSETS | |
98 if ((sfp->flags & SSI_USE64_INDEX) || | |
99 (sfp->flags & SSI_USE64)) | |
100 { status = SSI_ERR_NO64BIT; goto FAILURE; } | |
101 #endif | |
102 | |
103 sfp->imode = (sfp->flags & SSI_USE64_INDEX) ? SSI_OFFSET_I64 : SSI_OFFSET_I32; | |
104 sfp->smode = (sfp->flags & SSI_USE64) ? SSI_OFFSET_I64 : SSI_OFFSET_I32; | |
105 | |
106 if (! read_i16(sfp->fp, &(sfp->nfiles))) goto FAILURE; | |
107 if (! read_i32(sfp->fp, &(sfp->nprimary))) goto FAILURE; | |
108 if (! read_i32(sfp->fp, &(sfp->nsecondary))) goto FAILURE; | |
109 if (! read_i32(sfp->fp, &(sfp->flen))) goto FAILURE; | |
110 if (! read_i32(sfp->fp, &(sfp->plen))) goto FAILURE; | |
111 if (! read_i32(sfp->fp, &(sfp->slen))) goto FAILURE; | |
112 if (! read_i32(sfp->fp, &(sfp->frecsize))) goto FAILURE; | |
113 if (! read_i32(sfp->fp, &(sfp->precsize))) goto FAILURE; | |
114 if (! read_i32(sfp->fp, &(sfp->srecsize))) goto FAILURE; | |
115 | |
116 if (! read_offset(sfp->fp, sfp->imode, &(sfp->foffset))) goto FAILURE; | |
117 if (! read_offset(sfp->fp, sfp->imode, &(sfp->poffset))) goto FAILURE; | |
118 if (! read_offset(sfp->fp, sfp->imode, &(sfp->soffset))) goto FAILURE; | |
119 | |
120 /* Read the file information and keep it. | |
121 * We expect the number of files to be small, so reading it | |
122 * once should be advantageous overall. If SSI ever had to | |
123 * deal with large numbers of files, you'd probably want to | |
124 * read file information on demand. | |
125 */ | |
126 if (sfp->nfiles == 0) goto FAILURE; | |
127 if ((sfp->filename=malloc(sizeof(char *) *sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } | |
128 for (i = 0; i < sfp->nfiles; i++) sfp->filename[i] = NULL; | |
129 if ((sfp->fileformat=malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } | |
130 if ((sfp->fileflags =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } | |
131 if ((sfp->bpl =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } | |
132 if ((sfp->rpl =malloc(sizeof(sqd_uint32)*sfp->nfiles)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } | |
133 | |
134 for (i = 0; i < sfp->nfiles; i++) | |
135 { | |
136 /* We have to explicitly position, because header and file | |
137 * records may expand in the future; frecsize and foffset | |
138 * give us forwards compatibility. | |
139 */ | |
140 if (indexfile_position(sfp, &(sfp->foffset), sfp->frecsize, i) !=0) goto FAILURE; | |
141 if ((sfp->filename[i] =malloc(sizeof(char)*sfp->flen)) == NULL) {status = SSI_ERR_MALLOC; goto FAILURE; } | |
142 if (fread(sfp->filename[i],sizeof(char),sfp->flen, sfp->fp)!=sfp->flen) goto FAILURE; | |
143 if (! read_i32(sfp->fp, &(sfp->fileformat[i]))) goto FAILURE; | |
144 if (! read_i32(sfp->fp, &(sfp->fileflags[i]))) goto FAILURE; | |
145 if (! read_i32(sfp->fp, &(sfp->bpl[i]))) goto FAILURE; | |
146 if (! read_i32(sfp->fp, &(sfp->rpl[i]))) goto FAILURE; | |
147 } | |
148 | |
149 /* Success. Return 0. | |
150 */ | |
151 return 0; | |
152 | |
153 FAILURE: | |
154 /* Failure: free the damaged structure, return status code. | |
155 */ | |
156 SSIClose(sfp); | |
157 return status; | |
158 } | |
159 | |
160 | |
161 | |
162 /* Function: SSIGetOffsetByName() | |
163 * Date: SRE, Sun Dec 31 13:55:31 2000 [St. Louis] | |
164 * | |
165 * Purpose: Looks up the string {key} in the open index {sfp}. | |
166 * {key} can be either a primary or secondary key. If {key} | |
167 * is found, {*ret_fh} contains a unique handle on | |
168 * the file that contains {key} (suitable for an SSIFileInfo() | |
169 * call, or for comparison to the handle of the last file | |
170 * that was opened for retrieval), and {offset} is filled | |
171 * in with the offset in that file. | |
172 * | |
173 * Args: sfp - open index file | |
174 * key - string to search for | |
175 * ret_fh - RETURN: handle on file that key is in | |
176 * ret_offset - RETURN: offset of the start of that key's record | |
177 * | |
178 * Returns: 0 on success. | |
179 * non-zero on error. | |
180 */ | |
181 int | |
182 SSIGetOffsetByName(SSIFILE *sfp, char *key, int *ret_fh, | |
183 SSIOFFSET *ret_offset) | |
184 { | |
185 int status; | |
186 sqd_uint16 fnum; | |
187 | |
188 /* Look in the primary keys. | |
189 */ | |
190 status = binary_search(sfp, key, sfp->plen, &(sfp->poffset), sfp->precsize, | |
191 sfp->nprimary); | |
192 if (status == 0) { | |
193 /* We found it as a primary key; get our data & return. | |
194 */ | |
195 if (! read_i16(sfp->fp, &fnum)) return SSI_ERR_NODATA; | |
196 *ret_fh = (int) fnum; | |
197 if (! read_offset(sfp->fp, sfp->smode, ret_offset)) return SSI_ERR_NODATA; | |
198 | |
199 return 0; /* success! (we don't need the other key data) */ | |
200 } else if (status == SSI_ERR_NO_SUCH_KEY) { | |
201 /* Not in the primary keys? OK, try the secondary keys. | |
202 */ | |
203 if (sfp->nsecondary > 0) { | |
204 char *pkey; | |
205 status = binary_search(sfp, key, sfp->slen, &(sfp->soffset), sfp->srecsize, | |
206 sfp->nsecondary); | |
207 if (status != 0) return status; | |
208 if ((pkey = malloc(sizeof(char) * sfp->plen)) == NULL) return SSI_ERR_MALLOC; | |
209 if (fread(pkey, sizeof(char), sfp->plen, sfp->fp) != sfp->plen) return SSI_ERR_NODATA; | |
210 | |
211 status = SSIGetOffsetByName(sfp, pkey, ret_fh, ret_offset); | |
212 free(pkey); | |
213 } | |
214 return status; | |
215 | |
216 } else return status; | |
217 /*NOTREACHED*/ | |
218 } | |
219 | |
220 /* Function: SSIGetOffsetByNumber() | |
221 * Date: SRE, Mon Jan 1 19:42:42 2001 [St. Louis] | |
222 * | |
223 * Purpose: Looks up primary key #{n} in the open index {sfp}. | |
224 * {n} ranges from 0..nprimary-1. When key #{n} | |
225 * is found, {*ret_fh} contains a unique | |
226 * handle on the file that contains {key} (suitable | |
227 * for an SSIFileInfo() call, or for comparison to | |
228 * the handle of the last file that was opened for retrieval), | |
229 * and {offset} is filled in with the offset in that file. | |
230 * | |
231 * Args: sfp - open index file | |
232 * n - primary key number to retrieve. | |
233 * ret_fh - RETURN: handle on file that key is in | |
234 * ret_offset - RETURN: offset of the start of that key's record | |
235 * | |
236 * Returns: 0 on success. | |
237 * non-zero on error. | |
238 */ | |
239 int | |
240 SSIGetOffsetByNumber(SSIFILE *sfp, int n, int *ret_fh, SSIOFFSET *ret_offset) | |
241 { | |
242 sqd_uint16 fnum; | |
243 char *pkey; | |
244 | |
245 if (n >= sfp->nprimary) return SSI_ERR_NO_SUCH_KEY; | |
246 if (indexfile_position(sfp, &(sfp->poffset), sfp->precsize, n) != 0) | |
247 return SSI_ERR_SEEK_FAILED; | |
248 | |
249 if ((pkey = malloc(sizeof(char) * sfp->plen)) == NULL) return SSI_ERR_MALLOC; | |
250 if (fread(pkey, sizeof(char), sfp->plen, sfp->fp) != sfp->plen) return SSI_ERR_NODATA; | |
251 if (! read_i16(sfp->fp, &fnum)) return SSI_ERR_NODATA; | |
252 if (! read_offset(sfp->fp, sfp->smode, ret_offset)) return SSI_ERR_NODATA; | |
253 *ret_fh = fnum; | |
254 free(pkey); | |
255 return 0; | |
256 } | |
257 | |
258 /* Function: SSIGetSubseqOffset() | |
259 * Date: SRE, Mon Jan 1 19:49:31 2001 [St. Louis] | |
260 * | |
261 * Purpose: Implements SSI_FAST_SUBSEQ. | |
262 * | |
263 * Looks up a primary or secondary {key} in the open | |
264 * index {sfp}. Asks for the nearest offset to a | |
265 * subsequence starting at position {requested_start} | |
266 * in the sequence (numbering the sequence 1..L). | |
267 * If {key} is found, on return, {ret_fh} | |
268 * contains a unique handle on the file that contains | |
269 * {key} (suitable for an SSIFileInfo() call, or for | |
270 * comparison to the handle of the last file that was | |
271 * opened for retrieval); {record_offset} contains the | |
272 * disk offset to the start of the record; {data_offset} | |
273 * contains the disk offset either exactly at the requested | |
274 * residue, or at the start of the line containing the | |
275 * requested residue; {ret_actual_start} contains the | |
276 * coordinate (1..L) of the first valid residue at or | |
277 * after {data_offset}. {ret_actual_start} is <= | |
278 * {requested_start}. | |
279 * | |
280 * Args: sfp - open index file | |
281 * key - primary or secondary key to find | |
282 * requested_start - residue we'd like to start at (1..L) | |
283 * ret_fh - RETURN: handle for file the key is in | |
284 * record_offset - RETURN: offset of entire record | |
285 * data_offset - RETURN: offset of subseq (see above) | |
286 * ret_actual_start- RETURN: coord (1..L) of residue at data_offset | |
287 * | |
288 * Returns: 0 on success, non-zero on failure. | |
289 */ | |
290 int | |
291 SSIGetSubseqOffset(SSIFILE *sfp, char *key, int requested_start, | |
292 int *ret_fh, SSIOFFSET *record_offset, | |
293 SSIOFFSET *data_offset, int *ret_actual_start) | |
294 { | |
295 int status; | |
296 sqd_uint32 len; | |
297 int r, b, i, l; /* tmp variables for "clarity", to match docs */ | |
298 | |
299 /* Look up the key. Rely on the fact that SSIGetOffsetByName() | |
300 * leaves the index file positioned at the rest of the data for this key. | |
301 */ | |
302 status = SSIGetOffsetByName(sfp, key, ret_fh, record_offset); | |
303 if (status != 0) return status; | |
304 | |
305 /* Check that we're allowed to do subseq lookup on that file. | |
306 */ | |
307 if (! (sfp->fileflags[*ret_fh] & SSI_FAST_SUBSEQ)) | |
308 return SSI_ERR_NO_SUBSEQS; | |
309 | |
310 /* Read the data we need for subseq lookup | |
311 */ | |
312 if (! read_offset(sfp->fp, sfp->smode, data_offset)) return SSI_ERR_NODATA; | |
313 if (! read_i32(sfp->fp, &len)) return SSI_ERR_NODATA; | |
314 | |
315 /* Set up tmp variables for clarity of equations below, | |
316 * and to make them match documentation (ssi-format.tex). | |
317 */ | |
318 r = sfp->rpl[*ret_fh]; /* residues per line */ | |
319 b = sfp->bpl[*ret_fh]; /* bytes per line */ | |
320 i = requested_start; /* start position 1..L */ | |
321 l = (i-1)/r; /* data line # (0..) that the residue is on */ | |
322 if (r == 0 || b == 0) return SSI_ERR_NO_SUBSEQS; | |
323 if (i < 0 || i > len) return SSI_ERR_RANGE; | |
324 | |
325 /* When b = r+1, there's nothing but sequence on each data line (and the \0), | |
326 * and we can find each residue precisely. | |
327 */ | |
328 if (b == r+1) { | |
329 if (sfp->smode == SSI_OFFSET_I32) { | |
330 data_offset->mode = SSI_OFFSET_I32; | |
331 data_offset->off.i32 = data_offset->off.i32 + l*b + (i-1)%r; | |
332 } else if (sfp->smode == SSI_OFFSET_I64) { | |
333 data_offset->mode = SSI_OFFSET_I64; | |
334 data_offset->off.i64 = data_offset->off.i64 + l*b + (i-1)%r; | |
335 } | |
336 *ret_actual_start = requested_start; | |
337 } else { | |
338 /* else, there's other stuff on seq lines, so the best | |
339 * we can do easily is to position at start of relevant line. | |
340 */ | |
341 if (sfp->smode == SSI_OFFSET_I32) { | |
342 data_offset->mode = SSI_OFFSET_I32; | |
343 data_offset->off.i32 = data_offset->off.i32 + l*b; | |
344 } else if (sfp->smode == SSI_OFFSET_I64) { | |
345 data_offset->mode = SSI_OFFSET_I64; | |
346 data_offset->off.i64 = data_offset->off.i64 + l*b; | |
347 } | |
348 /* yes, the eq below is = 1 + (i-1)/r*r but it's not = i. that's an integer /. */ | |
349 *ret_actual_start = 1 + l*r; | |
350 } | |
351 return 0; | |
352 } | |
353 | |
354 /* Function: SSISetFilePosition() | |
355 * Date: SRE, Tue Jan 2 09:13:46 2001 [St. Louis] | |
356 * | |
357 * Purpose: Uses {offset} to sets the file position for {fp}, usually an | |
358 * open sequence file, relative to the start of the file. | |
359 * Hides the details of system-dependent shenanigans necessary for | |
360 * file positioning in large (>2 GB) files. | |
361 * | |
362 * Behaves just like fseek(fp, offset, SEEK_SET) for 32 bit | |
363 * offsets and <2 GB files. | |
364 * | |
365 * Warning: if all else fails, in desperation, it will try to | |
366 * use fsetpos(). This requires making assumptions about fpos_t | |
367 * that may be unwarranted... assumptions that ANSI C prohibits | |
368 * me from making... though I believe the ./configure | |
369 * script robustly tests whether I can play with fpos_t like this. | |
370 * | |
371 * Args: fp - file to position. | |
372 * offset - SSI offset relative to file start. | |
373 * | |
374 * Returns: 0 on success, nonzero on error. | |
375 */ | |
376 int | |
377 SSISetFilePosition(FILE *fp, SSIOFFSET *offset) | |
378 { | |
379 if (offset->mode == SSI_OFFSET_I32) { | |
380 if (fseek(fp, offset->off.i32, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED; | |
381 } | |
382 #ifndef HAS_64BIT_FILE_OFFSETS | |
383 else return SSI_ERR_NO64BIT; | |
384 #elif defined HAVE_FSEEKO && SIZEOF_OFF_T == 8 | |
385 else if (fseeko(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED; | |
386 #elif defined HAVE_FSEEKO64 && SIZEOF_OFF64_T == 8 | |
387 else if (fseeko64(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED; | |
388 #elif defined HAVE_FSEEK64 | |
389 else if (fseek64(fp, offset->off.i64, SEEK_SET) != 0) return SSI_ERR_SEEK_FAILED; | |
390 #elif defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8 | |
391 else if (fsetpos(fp, &(offset->off.i64)) != 0) return SSI_ERR_SEEK_FAILED; | |
392 #endif | |
393 return 0; | |
394 } | |
395 | |
396 | |
397 /* Function: SSIFileInfo() | |
398 * Date: SRE, Tue Jan 2 10:31:01 2001 [St. Louis] | |
399 * | |
400 * Purpose: Given a file number {fh} in an open index file | |
401 * {sfp}, retrieve file name {ret_filename} and | |
402 * the file format {ret_format}. | |
403 * | |
404 * {ret_filename} is a pointer to a string maintained | |
405 * internally by {sfp}. It should not be free'd; | |
406 * SSIClose(sfp) takes care of it. | |
407 * | |
408 * Args: sfp - open index file | |
409 * fh - handle on file to look up | |
410 * ret_filename - RETURN: name of file n | |
411 * ret_format - RETURN: format of file n | |
412 * | |
413 * Returns: 0 on success, nonzero on failure. | |
414 */ | |
415 int | |
416 SSIFileInfo(SSIFILE *sfp, int fh, char **ret_filename, int *ret_format) | |
417 { | |
418 if (fh < 0 || fh >= sfp->nfiles) return SSI_ERR_BADARG; | |
419 *ret_filename = sfp->filename[fh]; | |
420 *ret_format = sfp->fileformat[fh]; | |
421 return 0; | |
422 } | |
423 | |
424 /* Function: SSIClose() | |
425 * Date: SRE, Sun Dec 31 14:56:37 2000 [St. Louis] | |
426 * | |
427 * Purpose: Close an open {SSIFILE *}. | |
428 * | |
429 * Args: sfp - index file to close. | |
430 * | |
431 * Returns: (void) | |
432 */ | |
433 void | |
434 SSIClose(SSIFILE *sfp) | |
435 { | |
436 if (sfp != NULL) { | |
437 clear_ssifile(sfp); | |
438 if (sfp->fp != NULL) fclose(sfp->fp); | |
439 free(sfp); | |
440 } | |
441 } | |
442 /* clear_ssifile(): free the innards of SSIFILE, without | |
443 * destroying the structure or closing the stream. | |
444 */ | |
445 static void | |
446 clear_ssifile(SSIFILE *sfp) | |
447 { | |
448 int i; | |
449 | |
450 if (sfp->filename != NULL) { | |
451 for (i = 0; i < sfp->nfiles; i++) | |
452 if (sfp->filename[i] != NULL) free(sfp->filename[i]); | |
453 free(sfp->filename); | |
454 } | |
455 if (sfp->fileformat != NULL) free(sfp->fileformat); | |
456 if (sfp->fileflags != NULL) free(sfp->fileflags); | |
457 if (sfp->bpl != NULL) free(sfp->bpl); | |
458 if (sfp->rpl != NULL) free(sfp->rpl); | |
459 } | |
460 | |
461 | |
462 /* Function: SSIRecommendMode() | |
463 * Date: SRE, Fri Feb 16 08:23:47 2001 [St. Louis] | |
464 * | |
465 * Purpose: Examines the file and determines whether it should be | |
466 * indexed with large file support or not; returns | |
467 * SSI_OFFSET_I32 for most files, SSI_OFFSET_I64 for large | |
468 * files, or -1 on failure. | |
469 * | |
470 * Args: file - name of file to check for size | |
471 * | |
472 * Returns: -1 on failure (including case where file is too big) | |
473 * SSI_OFFSET_I32 for most files (<= 2^31-1 bytes) | |
474 * SSI_OFFSET_I64 for large files (> 2^31-1 bytes) | |
475 */ | |
476 int | |
477 SSIRecommendMode(char *file) | |
478 { | |
479 #if HAVE_STAT64 | |
480 struct stat64 s1; | |
481 if (stat64(file, &s1) == 0) { | |
482 if (s1.st_size <= 2146483647L) return SSI_OFFSET_I32; | |
483 else return SSI_OFFSET_I64; | |
484 } | |
485 #else | |
486 struct stat s2; | |
487 if (stat(file, &s2) == 0) { | |
488 if (s2.st_size <= 2146483647L) return SSI_OFFSET_I32; | |
489 else return SSI_OFFSET_I64; | |
490 } | |
491 #endif | |
492 return -1; | |
493 } | |
494 | |
495 | |
496 /* Function: SSICreateIndex() | |
497 * Date: SRE, Tue Jan 2 11:23:25 2001 [St. Louis] | |
498 * | |
499 * Purpose: Creates and initializes a SSI index structure. | |
500 * Sequence file offset type is specified by {mode}. | |
501 * | |
502 * Args: mode - SSI_OFFSET_I32 or SSI_OFFSET_I64, sequence file index mode. | |
503 * | |
504 * Returns: ptr to new index structure, or NULL on failure. | |
505 * Caller is responsible for free'ing the returned | |
506 * structure with SSIFreeIndex(). | |
507 */ | |
508 SSIINDEX * | |
509 SSICreateIndex(int mode) | |
510 { | |
511 SSIINDEX *g; | |
512 | |
513 g = NULL; | |
514 if ((g = malloc(sizeof(SSIINDEX))) == NULL) goto FAILURE; | |
515 g->smode = mode; | |
516 g->imode = SSI_OFFSET_I32; /* index always starts as 32-bit; may get upgraded later */ | |
517 g->external = FALSE; | |
518 g->max_ram = SSI_MAXRAM; | |
519 | |
520 #ifndef HAS_64BIT_FILE_OFFSETS | |
521 if (mode == SSI_OFFSET_I64) | |
522 Die("\ | |
523 Can't create a 64-bit SSI index on this system, sorry;\n\ | |
524 I don't have 64-bit file offset functions available.\n"); | |
525 #endif | |
526 | |
527 g->filenames = NULL; | |
528 g->fileformat = NULL; | |
529 g->bpl = NULL; | |
530 g->rpl = NULL; | |
531 g->flen = 0; | |
532 g->nfiles = 0; | |
533 | |
534 g->pkeys = NULL; | |
535 g->plen = 0; | |
536 g->nprimary = 0; | |
537 g->ptmpfile = "tmp.ssi.1"; /* hardcoded, for now. */ | |
538 g->ptmp = NULL; | |
539 | |
540 g->skeys = NULL; | |
541 g->slen = 0; | |
542 g->nsecondary = 0; | |
543 g->stmpfile = "tmp.ssi.2"; /* hardcoded, for now. */ | |
544 g->stmp = NULL; | |
545 | |
546 /* All mallocs must go after NULL initializations, because of the cleanup strategy; | |
547 * we'll try to free anything non-NULL if a malloc fails. | |
548 */ | |
549 if ((g->filenames = malloc(sizeof(char *) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; | |
550 if ((g->fileformat= malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; | |
551 if ((g->bpl = malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; | |
552 if ((g->rpl = malloc(sizeof(sqd_uint32) * SSI_FILE_BLOCK)) == NULL) goto FAILURE; | |
553 | |
554 if ((g->pkeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE; | |
555 if ((g->skeys = malloc(sizeof(struct ssipkey_s)* SSI_KEY_BLOCK))== NULL) goto FAILURE; | |
556 | |
557 return g; | |
558 | |
559 FAILURE: | |
560 SSIFreeIndex(g); /* free the damaged structure */ | |
561 return NULL; | |
562 } | |
563 | |
564 /* Function: SSIGetFilePosition() | |
565 * Date: SRE, Tue Jan 2 09:59:26 2001 [St. Louis] | |
566 * | |
567 * Purpose: Fills {ret_offset} with the current disk | |
568 * offset of {fp}, relative to the start of the file. | |
569 * {mode} is set to either SSI_OFFSET_I32 or | |
570 * SSI_OFFSET_I64. If {mode} is _I32 (32 bit), just wraps | |
571 * a call to ftell(); otherwise deals with system-dependent | |
572 * details of 64-bit file offsets. | |
573 * | |
574 * Args: fp - open stream | |
575 * mode - SSI_OFFSET_I32 or SSI_OFFSET_I64 | |
576 * ret_offset - RETURN: file position | |
577 * | |
578 * Returns: 0 on success. nonzero on error. | |
579 */ | |
580 int | |
581 SSIGetFilePosition(FILE *fp, int mode, SSIOFFSET *ret_offset) | |
582 { | |
583 if (mode == SSI_OFFSET_I32) | |
584 { | |
585 ret_offset->mode = SSI_OFFSET_I32; | |
586 ret_offset->off.i32 = ftell(fp); | |
587 if (ret_offset->off.i32 == -1) return SSI_ERR_TELL_FAILED; | |
588 } | |
589 else if (mode != SSI_OFFSET_I64) abort(); /* only happens on a coding error */ | |
590 else { | |
591 ret_offset->mode = SSI_OFFSET_I64; | |
592 #ifndef HAS_64BIT_FILE_OFFSETS | |
593 return SSI_ERR_NO64BIT; | |
594 #elif defined HAVE_FTELLO && SIZEOF_OFF_T == 8 | |
595 if ((ret_offset->off.i64 = ftello(fp)) == -1) return SSI_ERR_TELL_FAILED; | |
596 #elif defined HAVE_FTELLO64 && SIZEOF_OFF64_T == 8 | |
597 if ((ret_offset->off.i64 = ftello64(fp)) == -1) return SSI_ERR_TELL_FAILED; | |
598 #elif defined HAVE_FTELL64 | |
599 if ((ret_offset->off.i64 = ftell64(fp)) == -1) return SSI_ERR_TELL_FAILED; | |
600 #elif defined ARITHMETIC_FPOS_T && SIZEOF_FPOS_T == 8 | |
601 if (fgetpos(fp, &(ret_offset->off.i64)) != 0) return SSI_ERR_TELL_FAILED; | |
602 #endif | |
603 } | |
604 return 0; | |
605 } | |
606 | |
607 /* Function: SSIAddFileToIndex() | |
608 * Date: SRE, Tue Jan 2 12:54:36 2001 [St. Louis] | |
609 * | |
610 * Purpose: Adds the sequence file {filename}, which is known to | |
611 * be in format {fmt}, to the index {g}. Creates and returns | |
612 * a unique filehandle {fh} for then associating primary keys | |
613 * with this file using SSIAddPrimaryKeyToIndex(). | |
614 * | |
615 * Args: g - active index | |
616 * filename - file to add | |
617 * fmt - format code for this file (e.g. SQFILE_FASTA) | |
618 * ret_fh - RETURN: unique handle for this file | |
619 * | |
620 * Returns: 0 on success; nonzero on error. | |
621 */ | |
622 int | |
623 SSIAddFileToIndex(SSIINDEX *g, char *filename, int fmt, int *ret_fh) | |
624 { | |
625 int n; | |
626 | |
627 if (g->nfiles >= SSI_MAXFILES) return SSI_ERR_TOOMANY_FILES; | |
628 | |
629 n = strlen(filename); | |
630 if ((n+1) > g->flen) g->flen = n+1; | |
631 | |
632 g->filenames[g->nfiles] = FileTail(filename, FALSE); | |
633 g->fileformat[g->nfiles] = fmt; | |
634 g->bpl[g->nfiles] = 0; | |
635 g->rpl[g->nfiles] = 0; | |
636 *ret_fh = g->nfiles; /* handle is simply = file number */ | |
637 g->nfiles++; | |
638 | |
639 if (g->nfiles % SSI_FILE_BLOCK == 0) { | |
640 g->filenames = realloc(g->filenames, sizeof(char *) * (g->nfiles+SSI_FILE_BLOCK)); | |
641 if (g->filenames == NULL) return SSI_ERR_MALLOC; | |
642 g->fileformat= realloc(g->fileformat, sizeof(sqd_uint32) * (g->nfiles+SSI_FILE_BLOCK)); | |
643 if (g->fileformat == NULL) return SSI_ERR_MALLOC; | |
644 g->bpl = realloc(g->bpl, sizeof(sqd_uint32) * (g->nfiles+SSI_FILE_BLOCK)); | |
645 if (g->bpl == NULL) return SSI_ERR_MALLOC; | |
646 g->rpl = realloc(g->rpl, sizeof(sqd_uint32) * (g->nfiles+SSI_FILE_BLOCK)); | |
647 if (g->rpl == NULL) return SSI_ERR_MALLOC; | |
648 } | |
649 return 0; | |
650 } | |
651 | |
652 | |
653 /* Function: SSISetFileForSubseq() | |
654 * Date: SRE, Tue Jan 9 10:02:05 2001 [St. Louis] | |
655 * | |
656 * Purpose: Set SSI_FAST_SUBSEQ for the file indicated by | |
657 * filehandle {fh} in the index {g}, setting | |
658 * parameters {bpl} and {rpl} to the values given. | |
659 * {bpl} is the number of bytes per sequence data line. | |
660 * {rpl} is the number of residues per sequence data line. | |
661 * Caller must be sure that {bpl} and {rpl} do not change | |
662 * on any line of any sequence record in the file | |
663 * (except for the last data line of each record). If | |
664 * this is not the case in this file, SSI_FAST_SUBSEQ | |
665 * will not work, and this routine should not be | |
666 * called. | |
667 * | |
668 * Args: g - the active index | |
669 * fh - handle for file to set SSI_FAST_SUBSEQ on | |
670 * bpl - bytes per data line | |
671 * rpl - residues per data line | |
672 * | |
673 * Returns: 0 on success; 1 on error. | |
674 */ | |
675 int | |
676 SSISetFileForSubseq(SSIINDEX *g, int fh, int bpl, int rpl) | |
677 { | |
678 if (fh < 0 || fh >= g->nfiles) return SSI_ERR_BADARG; | |
679 if (bpl <= 0 || rpl <= 0) return SSI_ERR_BADARG; | |
680 g->bpl[fh] = bpl; | |
681 g->rpl[fh] = rpl; | |
682 return 0; | |
683 } | |
684 | |
685 | |
686 /* Function: SSIAddPrimaryKeyToIndex() | |
687 * Date: SRE, Tue Jan 2 11:50:54 2001 [St. Louis] | |
688 * | |
689 * Purpose: Put primary key {key} in the index {g}, while telling | |
690 * the index this primary key is in the file associated | |
691 * with filehandle {fh} (returned by a previous call | |
692 * to SSIAddFileToIndex()), and its record starts at | |
693 * position {r_off} in the file. | |
694 * | |
695 * {d_off} and {L} are optional; they may be left unset | |
696 * by passing NULL and 0, respectively. (If one is | |
697 * provided, both must be provided.) If they are provided, | |
698 * {d_off} gives the position of the first line of sequence | |
699 * data in the record, and {L} gives the length of | |
700 * the sequence in residues. They are used when | |
701 * SSI_FAST_SUBSEQ is set for this file. If SSI_FAST_SUBSEQ | |
702 * is not set for the file, {d_off} and {L} will be | |
703 * ignored by the index reading API even if they are stored | |
704 * by the index writing API, so it doesn't hurt for the | |
705 * indexing program to provide them; typically they | |
706 * won't know whether it's safe to set SSI_FAST_SUBSEQ | |
707 * for the whole file until the whole file has been | |
708 * read and every key has already been added to the index. | |
709 * | |
710 * Args: g - active index | |
711 * key - primary key to add | |
712 * fh - handle on file that this key's in | |
713 * r_off - offset to start of record | |
714 * d_off - offset to start of sequence data | |
715 * L - length of sequence, or 0 | |
716 * | |
717 * Returns: 0 on success, nonzero on error. | |
718 */ | |
719 int | |
720 SSIAddPrimaryKeyToIndex(SSIINDEX *g, char *key, int fh, | |
721 SSIOFFSET *r_off, SSIOFFSET *d_off, int L) | |
722 { | |
723 int n; /* a string length */ | |
724 | |
725 if (fh >= SSI_MAXFILES) return SSI_ERR_TOOMANY_FILES; | |
726 if (g->nprimary >= SSI_MAXKEYS) return SSI_ERR_TOOMANY_KEYS; | |
727 if (L > 0 && d_off == NULL) abort(); /* need both. */ | |
728 | |
729 /* Before adding the key: check how big our index is. | |
730 * If it's getting too large, switch to external mode. | |
731 */ | |
732 if (!g->external && current_index_size(g) >= g->max_ram) | |
733 if (activate_external_sort(g) != 0) return SSI_ERR_NOFILE; | |
734 | |
735 /* Update maximum pkey length, if needed. | |
736 */ | |
737 n = strlen(key); | |
738 if ((n+1) > g->plen) g->plen = n+1; | |
739 | |
740 /* External mode? Simply append to disk... | |
741 */ | |
742 if (g->external) { | |
743 if (g->smode == SSI_OFFSET_I32) { | |
744 fprintf(g->ptmp, "%s\t%d\t%lu\t%lu\t%lu\n", | |
745 key, fh, (unsigned long) r_off->off.i32, | |
746 (unsigned long) (d_off == NULL? 0 : d_off->off.i32), | |
747 (unsigned long) L); | |
748 } else { | |
749 #ifdef CLUSTALO | |
750 fprintf(g->ptmp, "%s\t%d\t%llu\t%llu\t%lu\n", | |
751 key, fh, (unsigned long long)r_off->off.i64, | |
752 d_off == NULL? 0 : (unsigned long long) d_off->off.i64, | |
753 (unsigned long) L); | |
754 #else | |
755 fprintf(g->ptmp, "%s\t%d\t%llu\t%llu\t%lu\n", | |
756 key, fh, r_off->off.i64, | |
757 d_off == NULL? 0 : d_off->off.i64, | |
758 (unsigned long) L); | |
759 #endif | |
760 } | |
761 g->nprimary++; | |
762 return 0; | |
763 } | |
764 | |
765 /* Else: internal mode, keep keys in memory... | |
766 */ | |
767 if ((g->pkeys[g->nprimary].key = sre_strdup(key, n)) == NULL) return SSI_ERR_MALLOC; | |
768 g->pkeys[g->nprimary].fnum = (sqd_uint16) fh; | |
769 g->pkeys[g->nprimary].r_off = *r_off; | |
770 if (d_off != NULL && L > 0) { | |
771 g->pkeys[g->nprimary].d_off = *d_off; | |
772 g->pkeys[g->nprimary].len = L; | |
773 } else { | |
774 /* yeah, this looks stupid, but look: we have to give a valid | |
775 looking, non-NULL d_off of some sort, or writes will fail. | |
776 It's going to be unused anyway. */ | |
777 g->pkeys[g->nprimary].d_off = *r_off; | |
778 g->pkeys[g->nprimary].len = 0; | |
779 } | |
780 g->nprimary++; | |
781 | |
782 if (g->nprimary % SSI_KEY_BLOCK == 0) { | |
783 g->pkeys = realloc(g->pkeys, sizeof(struct ssipkey_s) * (g->nprimary+SSI_KEY_BLOCK)); | |
784 if (g->pkeys == NULL) return SSI_ERR_MALLOC; | |
785 } | |
786 return 0; | |
787 } | |
788 | |
789 | |
790 /* Function: SSIAddSecondaryKeyToIndex() | |
791 * Date: SRE, Tue Jan 2 12:44:40 2001 [St. Louis] | |
792 * | |
793 * Purpose: Puts secondary key {key} in the index {g}, associating | |
794 * it with primary key {pkey} that was previously | |
795 * registered by SSIAddPrimaryKeyToIndex(). | |
796 * | |
797 * Args: g - active index | |
798 * key - secondary key to add | |
799 * pkey - primary key to associate this key with | |
800 * | |
801 * Returns: 0 on success, nonzero on failure. | |
802 */ | |
803 int | |
804 SSIAddSecondaryKeyToIndex(SSIINDEX *g, char *key, char *pkey) | |
805 { | |
806 int n; /* a string length */ | |
807 | |
808 if (g->nsecondary >= SSI_MAXKEYS) return SSI_ERR_TOOMANY_KEYS; | |
809 | |
810 /* Before adding the key: check how big our index is. | |
811 * If it's getting too large, switch to external mode. | |
812 */ | |
813 if (!g->external && current_index_size(g) >= g->max_ram) | |
814 if (activate_external_sort(g) != 0) return SSI_ERR_NOFILE; | |
815 | |
816 /* Update maximum secondary key length, if necessary. | |
817 */ | |
818 n = strlen(key); | |
819 if ((n+1) > g->slen) g->slen = n+1; | |
820 | |
821 /* if external mode: write info to disk. | |
822 */ | |
823 if (g->external) { | |
824 fprintf(g->stmp, "%s\t%s\n", key, pkey); | |
825 g->nsecondary++; | |
826 return 0; | |
827 } | |
828 | |
829 /* else, internal mode... store info in memory. | |
830 */ | |
831 if ((g->skeys[g->nsecondary].key = sre_strdup(key, n)) == NULL) return SSI_ERR_MALLOC; | |
832 if ((g->skeys[g->nsecondary].pkey = sre_strdup(pkey, -1)) == NULL) return SSI_ERR_MALLOC; | |
833 g->nsecondary++; | |
834 | |
835 if (g->nsecondary % SSI_KEY_BLOCK == 0) { | |
836 g->skeys = realloc(g->skeys, sizeof(struct ssiskey_s) * (g->nsecondary+SSI_KEY_BLOCK)); | |
837 if (g->skeys == NULL) return SSI_ERR_MALLOC; | |
838 } | |
839 return 0; | |
840 } | |
841 | |
842 | |
843 | |
844 | |
845 /* Function: SSIWriteIndex() | |
846 * Date: SRE, Tue Jan 2 13:55:56 2001 [St. Louis] | |
847 * | |
848 * Purpose: Writes complete index {g} in SSI format to a | |
849 * binary file {file}. Does all | |
850 * the overhead of sorting the primary and secondary keys, | |
851 * and maintaining the association of secondary keys | |
852 * with primary keys during and after the sort. | |
853 * | |
854 * Args: file - file to write to | |
855 * g - index to sort & write out. | |
856 * | |
857 * Returns: 0 on success, nonzero on error. | |
858 */ | |
859 /* needed for qsort() */ | |
860 static int | |
861 pkeysort(const void *k1, const void *k2) | |
862 { | |
863 struct ssipkey_s *key1; | |
864 struct ssipkey_s *key2; | |
865 key1 = (struct ssipkey_s *) k1; | |
866 key2 = (struct ssipkey_s *) k2; | |
867 return strcmp(key1->key, key2->key); | |
868 } | |
869 static int | |
870 skeysort(const void *k1, const void *k2) | |
871 { | |
872 struct ssiskey_s *key1; | |
873 struct ssiskey_s *key2; | |
874 key1 = (struct ssiskey_s *) k1; | |
875 key2 = (struct ssiskey_s *) k2; | |
876 return strcmp(key1->key, key2->key); | |
877 } | |
878 int | |
879 SSIWriteIndex(char *file, SSIINDEX *g) | |
880 { | |
881 FILE *fp; | |
882 int status; | |
883 int i; | |
884 sqd_uint32 header_flags, file_flags; | |
885 sqd_uint32 frecsize, precsize, srecsize; | |
886 sqd_uint64 foffset, poffset, soffset; | |
887 char *s, *s2; | |
888 | |
889 if ((fp = fopen(file,"wb")) == NULL) return SSI_ERR_NOFILE; | |
890 status = 0; | |
891 | |
892 /* How big is the index? If it's going to be > 2GB, we need | |
893 * to flip to 64-bit index mode. 2047 (instead of 2048) gives us | |
894 * some slop room. | |
895 * die'ing here is pretty brutal - if we flip to 64-bit index | |
896 * mode, we hve 100's of millions of keys, so we've processed | |
897 * a long time before reaching this point. Ah well. | |
898 */ | |
899 if (current_index_size(g) >= 2047) { | |
900 g->imode = SSI_OFFSET_I64; | |
901 #ifndef HAS_64BIT_FILE_OFFSETS | |
902 Die("\ | |
903 Can't switch to 64-bit SSI index mode on this system, sorry;\n\ | |
904 I don't have 64-bit file offset functions available.\n"); | |
905 #endif | |
906 } | |
907 | |
908 /* Magic-looking numbers come from adding up sizes | |
909 * of things in bytes | |
910 */ | |
911 frecsize = 16 + g->flen; | |
912 precsize = (g->smode == SSI_OFFSET_I64) ? 22+g->plen : 14+g->plen; | |
913 srecsize = g->slen + g->plen; | |
914 | |
915 header_flags = 0; | |
916 if (g->smode == SSI_OFFSET_I64) header_flags |= SSI_USE64; | |
917 if (g->imode == SSI_OFFSET_I64) header_flags |= SSI_USE64_INDEX; | |
918 | |
919 /* Magic-looking numbers again come from adding up sizes | |
920 * of things in bytes | |
921 */ | |
922 foffset = (header_flags & SSI_USE64_INDEX) ? 66 : 54; | |
923 poffset = foffset + frecsize*g->nfiles; | |
924 soffset = poffset + precsize*g->nprimary; | |
925 | |
926 /* Sort the keys | |
927 * If external mode, make system calls to UNIX/POSIX "sort" in place, then | |
928 * open new sorted files for reading thru ptmp and stmp handles. | |
929 * If internal mode, call qsort. | |
930 * | |
931 * Note that you'd better force a POSIX locale for the sort; else, | |
932 * some silly distro (e.g. Mandrake Linux >=8.1) may have specified | |
933 * LC_COLLATE=en_US, and this'll give a sort "bug" in which it doesn't | |
934 * sort by byte order. | |
935 */ | |
936 if (g->external) { | |
937 char cmd[1024]; | |
938 | |
939 fclose(g->ptmp); | |
940 g->ptmp = NULL; | |
941 sprintf(cmd, "env LC_ALL=POSIX sort -o %s %s\n", g->ptmpfile, g->ptmpfile); | |
942 if ((status = system(cmd)) != 0) return SSI_ERR_EXTERNAL_SORT; | |
943 if ((g->ptmp = fopen(g->ptmpfile, "r")) == NULL) return SSI_ERR_EXTERNAL_SORT; | |
944 | |
945 fclose(g->stmp); | |
946 g->stmp = NULL; | |
947 sprintf(cmd, "env LC_ALL=POSIX sort -o %s %s\n", g->stmpfile, g->stmpfile); | |
948 if ((status = system(cmd)) != 0) return SSI_ERR_EXTERNAL_SORT; | |
949 if ((g->stmp = fopen(g->stmpfile, "r")) == NULL) return SSI_ERR_EXTERNAL_SORT; | |
950 } else { | |
951 qsort((void *) g->pkeys, g->nprimary, sizeof(struct ssipkey_s), pkeysort); | |
952 qsort((void *) g->skeys, g->nsecondary, sizeof(struct ssiskey_s), skeysort); | |
953 } | |
954 | |
955 /* Write the header | |
956 */ | |
957 if (! write_i32(fp, v20magic)) return SSI_ERR_FWRITE; | |
958 if (! write_i32(fp, header_flags)) return SSI_ERR_FWRITE; | |
959 if (! write_i16(fp, g->nfiles)) return SSI_ERR_FWRITE; | |
960 if (! write_i32(fp, g->nprimary)) return SSI_ERR_FWRITE; | |
961 if (! write_i32(fp, g->nsecondary)) return SSI_ERR_FWRITE; | |
962 if (! write_i32(fp, g->flen)) return SSI_ERR_FWRITE; | |
963 if (! write_i32(fp, g->plen)) return SSI_ERR_FWRITE; | |
964 if (! write_i32(fp, g->slen)) return SSI_ERR_FWRITE; | |
965 if (! write_i32(fp, frecsize)) return SSI_ERR_FWRITE; | |
966 if (! write_i32(fp, precsize)) return SSI_ERR_FWRITE; | |
967 if (! write_i32(fp, srecsize)) return SSI_ERR_FWRITE; | |
968 if (g->imode == SSI_OFFSET_I32) { | |
969 if (! write_i32(fp, foffset)) return SSI_ERR_FWRITE; | |
970 if (! write_i32(fp, poffset)) return SSI_ERR_FWRITE; | |
971 if (! write_i32(fp, soffset)) return SSI_ERR_FWRITE; | |
972 } else { | |
973 if (! write_i64(fp, foffset)) return SSI_ERR_FWRITE; | |
974 if (! write_i64(fp, poffset)) return SSI_ERR_FWRITE; | |
975 if (! write_i64(fp, soffset)) return SSI_ERR_FWRITE; | |
976 } | |
977 | |
978 /* The file section | |
979 */ | |
980 if ((s = malloc(sizeof(char) * g->flen)) == NULL) return SSI_ERR_MALLOC; | |
981 for (i = 0; i < g->nfiles; i++) | |
982 { | |
983 file_flags = 0; | |
984 if (g->bpl[i] > 0 && g->rpl[i] > 0) file_flags |= SSI_FAST_SUBSEQ; | |
985 | |
986 strcpy(s, g->filenames[i]); | |
987 if (fwrite(s, sizeof(char), g->flen, fp) != g->flen) return SSI_ERR_FWRITE; | |
988 if (! write_i32(fp, g->fileformat[i])) return SSI_ERR_FWRITE; | |
989 if (! write_i32(fp, file_flags)) return SSI_ERR_FWRITE; | |
990 if (! write_i32(fp, g->bpl[i])) return SSI_ERR_FWRITE; | |
991 if (! write_i32(fp, g->rpl[i])) return SSI_ERR_FWRITE; | |
992 } | |
993 free(s); | |
994 | |
995 /* The primary key section | |
996 */ | |
997 if ((s = malloc(sizeof(char) * g->plen)) == NULL) return SSI_ERR_MALLOC; | |
998 if (g->external) { | |
999 char *buf = NULL; | |
1000 int buflen = 0; | |
1001 struct ssipkey_s pkey; | |
1002 for (i = 0; i < g->nprimary; i++) | |
1003 { | |
1004 if (sre_fgets(&buf, &buflen, g->ptmp) == NULL) return SSI_ERR_NODATA; | |
1005 if (parse_pkey_info(buf, g->smode, &pkey) != 0) return SSI_ERR_BADFORMAT; | |
1006 strcpy(s, pkey.key); | |
1007 if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; | |
1008 if (! write_i16( fp, pkey.fnum)) return SSI_ERR_FWRITE; | |
1009 if (! write_offset(fp, &(pkey.r_off))) return SSI_ERR_FWRITE; | |
1010 if (! write_offset(fp, &(pkey.d_off))) return SSI_ERR_FWRITE; | |
1011 if (! write_i32( fp, pkey.len)) return SSI_ERR_FWRITE; | |
1012 } | |
1013 free(buf); | |
1014 } else { | |
1015 for (i = 0; i < g->nprimary; i++) | |
1016 { | |
1017 strcpy(s, g->pkeys[i].key); | |
1018 if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; | |
1019 if (! write_i16( fp, g->pkeys[i].fnum)) return SSI_ERR_FWRITE; | |
1020 if (! write_offset(fp, &(g->pkeys[i].r_off))) return SSI_ERR_FWRITE; | |
1021 if (! write_offset(fp, &(g->pkeys[i].d_off))) return SSI_ERR_FWRITE; | |
1022 if (! write_i32( fp, g->pkeys[i].len)) return SSI_ERR_FWRITE; | |
1023 } | |
1024 } | |
1025 | |
1026 /* The secondary key section | |
1027 */ | |
1028 if (g->nsecondary > 0) { | |
1029 if ((s2 = malloc(sizeof(char) * g->slen)) == NULL) return SSI_ERR_MALLOC; | |
1030 | |
1031 if (g->external) { | |
1032 struct ssiskey_s skey; | |
1033 char *buf = NULL; | |
1034 int n = 0; | |
1035 | |
1036 for (i = 0; i < g->nsecondary; i++) | |
1037 { | |
1038 if (sre_fgets(&buf, &n, g->stmp) == NULL) return SSI_ERR_NODATA; | |
1039 if (parse_skey_info(buf, &skey) != 0) return SSI_ERR_BADFORMAT; | |
1040 strcpy(s2, skey.key); | |
1041 strcpy(s, skey.pkey); | |
1042 if (fwrite(s2, sizeof(char), g->slen, fp) != g->slen) return SSI_ERR_FWRITE; | |
1043 if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; | |
1044 } | |
1045 free(buf); | |
1046 } else { | |
1047 for (i = 0; i < g->nsecondary; i++) | |
1048 { | |
1049 strcpy(s2, g->skeys[i].key); | |
1050 strcpy(s, g->skeys[i].pkey); | |
1051 if (fwrite(s2, sizeof(char), g->slen, fp) != g->slen) return SSI_ERR_FWRITE; | |
1052 if (fwrite(s, sizeof(char), g->plen, fp) != g->plen) return SSI_ERR_FWRITE; | |
1053 } | |
1054 } | |
1055 free(s2); | |
1056 } | |
1057 | |
1058 free(s); | |
1059 fclose(fp); | |
1060 return status; | |
1061 } | |
1062 | |
1063 | |
1064 /* Function: SSIFreeIndex() | |
1065 * Date: SRE, Tue Jan 2 11:44:08 2001 [St. Louis] | |
1066 * | |
1067 * Purpose: Free an index structure {g}. | |
1068 * | |
1069 * Args: g - ptr to an open index. | |
1070 * | |
1071 * Returns: (void) | |
1072 */ | |
1073 void | |
1074 SSIFreeIndex(SSIINDEX *g) | |
1075 { | |
1076 int i; | |
1077 if (g != NULL) | |
1078 { | |
1079 if (g->external == FALSE) { | |
1080 for (i = 0; i < g->nprimary; i++) free(g->pkeys[i].key); | |
1081 for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].key); | |
1082 for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].pkey); | |
1083 if (g->pkeys != NULL) free(g->pkeys); | |
1084 if (g->skeys != NULL) free(g->skeys); | |
1085 } else { | |
1086 if (g->ptmp != NULL) fclose(g->ptmp); | |
1087 if (g->stmp != NULL) fclose(g->stmp); | |
1088 #if DEBUGLEVEL == 0 | |
1089 remove(g->ptmpfile); | |
1090 remove(g->stmpfile); | |
1091 #endif | |
1092 } | |
1093 for (i = 0; i < g->nfiles; i++) free(g->filenames[i]); | |
1094 if (g->filenames != NULL) free(g->filenames); | |
1095 if (g->fileformat != NULL) free(g->fileformat); | |
1096 if (g->bpl != NULL) free(g->bpl); | |
1097 if (g->rpl != NULL) free(g->rpl); | |
1098 free(g); | |
1099 } | |
1100 } | |
1101 | |
1102 | |
1103 /* Function: SSIErrorString() | |
1104 * Date: SRE, Tue Jan 2 10:38:10 2001 [St. Louis] | |
1105 * | |
1106 * Purpose: Returns a ptr to an internal string corresponding | |
1107 * to error {n}, a code returned from any of the | |
1108 * functions in the API that return non-zero on error. | |
1109 * | |
1110 * Args: n - error code | |
1111 * | |
1112 * Returns: ptr to an internal string. | |
1113 */ | |
1114 char * | |
1115 SSIErrorString(int n) | |
1116 { | |
1117 switch (n) { | |
1118 case SSI_ERR_OK: return "ok (no error)"; | |
1119 case SSI_ERR_NODATA: return "no data, fread() failed"; | |
1120 case SSI_ERR_NO_SUCH_KEY: return "no such key"; | |
1121 case SSI_ERR_MALLOC: return "out of memory, malloc() failed"; | |
1122 case SSI_ERR_NOFILE: return "file not found, fopen() failed"; | |
1123 case SSI_ERR_BADMAGIC: return "not a SSI file? (bad magic)"; | |
1124 case SSI_ERR_BADFORMAT: return "corrupt format? unexpected data"; | |
1125 case SSI_ERR_NO64BIT: return "no large file support for this system"; | |
1126 case SSI_ERR_SEEK_FAILED: return "failed to reposition on disk"; | |
1127 case SSI_ERR_TELL_FAILED: return "failed to get file position on disk"; | |
1128 case SSI_ERR_NO_SUBSEQS: return "no fast subseq support for this seqfile"; | |
1129 case SSI_ERR_RANGE: return "subseq start is out of range"; | |
1130 case SSI_ERR_BADARG: return "an argument is out of range"; | |
1131 case SSI_ERR_TOOMANY_FILES: return "number of files exceeds limit"; | |
1132 case SSI_ERR_TOOMANY_KEYS: return "number of keys exceeds limit"; | |
1133 case SSI_ERR_FWRITE: return "an fwrite() failed"; | |
1134 case SSI_ERR_EXTERNAL_SORT: return "some problem with external sorting"; | |
1135 default: return "unrecognized code"; | |
1136 } | |
1137 /*NOTREACHED*/ | |
1138 } | |
1139 | |
1140 static int | |
1141 read_i16(FILE *fp, sqd_uint16 *ret_result) | |
1142 { | |
1143 sqd_uint16 result; | |
1144 if (fread(&result, sizeof(sqd_uint16), 1, fp) != 1) return 0; | |
1145 *ret_result = sre_ntoh16(result); | |
1146 return 1; | |
1147 } | |
1148 static int | |
1149 write_i16(FILE *fp, sqd_uint16 n) | |
1150 { | |
1151 n = sre_hton16(n); | |
1152 if (fwrite(&n, sizeof(sqd_uint16), 1, fp) != 1) return 0; | |
1153 return 1; | |
1154 } | |
1155 static int | |
1156 read_i32(FILE *fp, sqd_uint32 *ret_result) | |
1157 { | |
1158 sqd_uint32 result; | |
1159 if (fread(&result, sizeof(sqd_uint32), 1, fp) != 1) return 0; | |
1160 *ret_result = sre_ntoh32(result); | |
1161 return 1; | |
1162 } | |
1163 static int | |
1164 write_i32(FILE *fp, sqd_uint32 n) | |
1165 { | |
1166 n = sre_hton32(n); | |
1167 if (fwrite(&n, sizeof(sqd_uint32), 1, fp) != 1) return 0; | |
1168 return 1; | |
1169 } | |
1170 static int | |
1171 read_i64(FILE *fp, sqd_uint64 *ret_result) | |
1172 { | |
1173 sqd_uint64 result; | |
1174 if (fread(&result, sizeof(sqd_uint64), 1, fp) != 1) return 0; | |
1175 *ret_result = sre_ntoh64(result); | |
1176 return 1; | |
1177 } | |
1178 static int | |
1179 write_i64(FILE *fp, sqd_uint64 n) | |
1180 { | |
1181 n = sre_hton64(n); | |
1182 if (fwrite(&n, sizeof(sqd_uint64), 1, fp) != 1) return 0; | |
1183 return 1; | |
1184 } | |
1185 static int | |
1186 read_offset(FILE *fp, char mode, SSIOFFSET *ret_offset) | |
1187 { | |
1188 if (mode == SSI_OFFSET_I32) { | |
1189 ret_offset->mode = SSI_OFFSET_I32; | |
1190 if (! read_i32(fp, &(ret_offset->off.i32))) return 0; | |
1191 } else if (mode == SSI_OFFSET_I64) { | |
1192 ret_offset->mode = SSI_OFFSET_I64; | |
1193 if (! read_i64(fp, &(ret_offset->off.i64))) return 0; | |
1194 } else return 0; | |
1195 | |
1196 return 1; | |
1197 } | |
1198 static int | |
1199 write_offset(FILE *fp, SSIOFFSET *offset) | |
1200 { | |
1201 if (offset->mode == SSI_OFFSET_I32) return write_i32(fp, offset->off.i32); | |
1202 else if (offset->mode == SSI_OFFSET_I64) return write_i64(fp, offset->off.i64); | |
1203 else abort(); | |
1204 /*UNREACHED*/ | |
1205 return 1; /* silence bitchy compilers */ | |
1206 } | |
1207 | |
1208 static int | |
1209 parse_pkey_info(char *buf, char mode, struct ssipkey_s *pkey) | |
1210 { | |
1211 char *s, *tok; | |
1212 int n; | |
1213 | |
1214 s = buf; | |
1215 if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; | |
1216 pkey->key = tok; | |
1217 if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; | |
1218 pkey->fnum = (sqd_uint16) atoi(tok); | |
1219 | |
1220 if (mode == SSI_OFFSET_I32) { | |
1221 if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; | |
1222 pkey->r_off.mode = mode; | |
1223 pkey->r_off.off.i32 = (sqd_uint32) strtoul(tok, NULL, 10); | |
1224 if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; | |
1225 pkey->d_off.mode = mode; | |
1226 pkey->d_off.off.i32 = (sqd_uint32) strtoul(tok, NULL, 10); | |
1227 } | |
1228 #ifdef HAS_64BIT_FILE_OFFSETS | |
1229 else { | |
1230 if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; | |
1231 pkey->r_off.mode = mode; | |
1232 pkey->r_off.off.i64 = (sqd_uint64) strtoull(tok, NULL, 10); | |
1233 if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; | |
1234 pkey->d_off.mode = mode; | |
1235 pkey->d_off.off.i64 = (sqd_uint64) strtoull(tok, NULL, 10); | |
1236 } | |
1237 #else | |
1238 else { | |
1239 return SSI_ERR_NO64BIT; | |
1240 } | |
1241 #endif | |
1242 if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; | |
1243 pkey->len = (sqd_uint32) strtoul(tok, NULL, 10); | |
1244 | |
1245 return 0; | |
1246 } | |
1247 static int | |
1248 parse_skey_info(char *buf, struct ssiskey_s *skey) | |
1249 { | |
1250 char *s, *tok; | |
1251 int n; | |
1252 | |
1253 s = buf; | |
1254 if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; | |
1255 skey->key = tok; | |
1256 if ((tok = sre_strtok(&s, "\t\n", &n)) == NULL) return SSI_ERR_BADFORMAT; | |
1257 skey->pkey = tok; | |
1258 return 0; | |
1259 } | |
1260 | |
1261 /* Function: binary_search() | |
1262 * Date: SRE, Sun Dec 31 16:05:03 2000 [St. Louis] | |
1263 * | |
1264 * Purpose: Find a key in a SSI index, by a binary search | |
1265 * in an alphabetically sorted list of keys. If successful, | |
1266 * return 0, and the index file is positioned to read | |
1267 * the rest of the data for that key. Else returns nonzero. | |
1268 * | |
1269 * Args: sfp - an open SSIFILE | |
1270 * key - key to find | |
1271 * klen - key length to allocate (plen or slen from sfp) | |
1272 * base - base offset (poffset or soffset) | |
1273 * recsize - size of each key record in bytes (precsize or srecsize) | |
1274 * maxidx - # of keys (nprimary or nsecondary) | |
1275 * | |
1276 * Returns: 0 on success, and leaves file positioned for reading remaining | |
1277 * data for the key. | |
1278 * Nonzero on failure: | |
1279 * SSI_ERR_NO_SUCH_KEY - that key's not in the index | |
1280 * SSI_ERR_MALLOC - a memory allocation failure | |
1281 * SSI_ERR_NODATA - an fread() failed | |
1282 */ | |
1283 static int | |
1284 binary_search(SSIFILE *sfp, char *key, int klen, SSIOFFSET *base, | |
1285 sqd_uint32 recsize, sqd_uint32 maxidx) | |
1286 { | |
1287 char *name; | |
1288 sqd_uint32 left, right, mid; | |
1289 int cmp; | |
1290 int status; | |
1291 | |
1292 if (maxidx == 0) return SSI_ERR_NO_SUCH_KEY; /* special case: empty index */ | |
1293 if ((name = malloc (sizeof(char)*klen)) == NULL) return SSI_ERR_MALLOC; | |
1294 left = 0; | |
1295 right = maxidx-1; | |
1296 while (1) { /* A binary search: */ | |
1297 mid = (left+right) / 2; /* careful here. only works because | |
1298 we limit unsigned vars to signed ranges. */ | |
1299 if ((status = indexfile_position(sfp, base, recsize, mid)) != 0) | |
1300 { free(name); return status; } | |
1301 if (fread(name, sizeof(char), klen, sfp->fp) != klen) | |
1302 { free(name); return SSI_ERR_NODATA; } | |
1303 cmp = strcmp(name, key); | |
1304 if (cmp == 0) break; /* found it! */ | |
1305 else if (left >= right) /* oops, missed it; fail */ | |
1306 { free(name); return SSI_ERR_NO_SUCH_KEY; } | |
1307 else if (cmp < 0) left = mid+1; /* it's right of mid */ | |
1308 else if (cmp > 0) { | |
1309 if (mid == 0) { free(name); return SSI_ERR_NO_SUCH_KEY; } /* special case, beware */ | |
1310 else right = mid-1; /* it's left of mid */ | |
1311 } | |
1312 } | |
1313 free(name); | |
1314 return 0; /* and sfp->fp is positioned... */ | |
1315 } | |
1316 | |
1317 /* Function: indexfile_position() | |
1318 * Date: SRE, Mon Jan 1 19:32:49 2001 [St. Louis] | |
1319 * | |
1320 * Purpose: Position the open index file {sfp} at the start | |
1321 * of record {n} in a list of records that starts at | |
1322 * base offset {base}, where each record takes up {l} | |
1323 * bytes. (e.g. the position is byte (base + n*l)). | |
1324 * | |
1325 * Args: sfp - open SSIFILE | |
1326 * base - offset of record 0 (e.g. sfp->foffset) | |
1327 * len - size of each record in bytes (e.g. sfp->frecsize) | |
1328 * n - which record to get (e.g. 0..sfp->nfiles) | |
1329 * | |
1330 * Returns: 0 on success, non-zero on failure. | |
1331 */ | |
1332 static int | |
1333 indexfile_position(SSIFILE *sfp, SSIOFFSET *base, sqd_uint32 len, sqd_uint32 n) | |
1334 { | |
1335 SSIOFFSET pos; | |
1336 int status; | |
1337 | |
1338 if (base->mode == SSI_OFFSET_I32) { | |
1339 pos.mode = SSI_OFFSET_I32; | |
1340 pos.off.i32 = base->off.i32 + n*len; | |
1341 } else if (base->mode == SSI_OFFSET_I64) { | |
1342 pos.mode = SSI_OFFSET_I64; | |
1343 pos.off.i64 = base->off.i64 + n*len; | |
1344 } else return 0; | |
1345 if ((status = SSISetFilePosition(sfp->fp, &pos)) != 0) return status; | |
1346 return 0; | |
1347 } | |
1348 | |
1349 /* Function: current_index_size() | |
1350 * Date: SRE, Tue Feb 20 18:23:30 2001 [St. Louis] | |
1351 * | |
1352 * Purpose: Calculates the size of the current index, | |
1353 * in megabytes. | |
1354 */ | |
1355 static sqd_uint64 | |
1356 current_index_size(SSIINDEX *g) | |
1357 { | |
1358 sqd_uint64 frecsize, precsize, srecsize; | |
1359 sqd_uint64 total; | |
1360 | |
1361 /* Magic-looking numbers come from adding up sizes | |
1362 * of things in bytes | |
1363 */ | |
1364 frecsize = 16 + g->flen; | |
1365 precsize = (g->smode == SSI_OFFSET_I64) ? 22+g->plen : 14+g->plen; | |
1366 srecsize = g->plen+g->slen; | |
1367 total = (66L + /* header size, if 64bit index offsets */ | |
1368 frecsize * g->nfiles + /* file section size */ | |
1369 precsize * g->nprimary + /* primary key section size */ | |
1370 srecsize * g->nsecondary) / /* secondary key section size */ | |
1371 1048576L; | |
1372 return total; | |
1373 } | |
1374 /* Function: activate_external_sort() | |
1375 * Date: SRE, Mon Feb 4 09:08:08 2002 [St. Louis] | |
1376 * | |
1377 * Purpose: Switch to external sort mode. | |
1378 * Open file handles for external index files (ptmp, stmp). | |
1379 * Flush current index information to these files. | |
1380 * Free current memory, turn over control to the tmpfiles. | |
1381 * | |
1382 * Return: 0 on success; non-zero on failure. | |
1383 */ | |
1384 static int | |
1385 activate_external_sort(SSIINDEX *g) | |
1386 { | |
1387 int i; | |
1388 /* it's a bit late to be checking this, but... */ | |
1389 if (g->external) return 0; /* we already are external, fool */ | |
1390 if (FileExists(g->ptmpfile)) return 1; | |
1391 if (FileExists(g->stmpfile)) return 1; | |
1392 if ((g->ptmp = fopen(g->ptmpfile, "w")) == NULL) return 1; | |
1393 if ((g->stmp = fopen(g->stmpfile, "w")) == NULL) return 1; | |
1394 | |
1395 /* Flush the current indices. | |
1396 */ | |
1397 SQD_DPRINTF1(("Switching to external sort - flushing ssiindex to disk...\n")); | |
1398 for (i = 0; i < g->nprimary; i++) { | |
1399 if (g->smode == SSI_OFFSET_I32) { | |
1400 fprintf(g->ptmp, "%s\t%u\t%lu\t%lu\t%lu\n", | |
1401 g->pkeys[i].key, g->pkeys[i].fnum, | |
1402 (unsigned long) g->pkeys[i].r_off.off.i32, | |
1403 (unsigned long) g->pkeys[i].d_off.off.i32, | |
1404 (unsigned long) g->pkeys[i].len); | |
1405 } else { | |
1406 fprintf(g->ptmp, "%s\t%u\t%llu\t%llu\t%lu\n", | |
1407 g->pkeys[i].key, g->pkeys[i].fnum, | |
1408 (unsigned long long) g->pkeys[i].r_off.off.i64, | |
1409 (unsigned long long) g->pkeys[i].d_off.off.i64, | |
1410 (unsigned long) g->pkeys[i].len); | |
1411 } | |
1412 } | |
1413 for (i = 0; i < g->nsecondary; i++) | |
1414 fprintf(g->stmp, "%s\t%s\n", g->skeys[i].key, g->skeys[i].pkey); | |
1415 | |
1416 /* Free the memory now that we've flushed our lists to disk | |
1417 */ | |
1418 for (i = 0; i < g->nprimary; i++) free(g->pkeys[i].key); | |
1419 for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].key); | |
1420 for (i = 0; i < g->nsecondary; i++) free(g->skeys[i].pkey); | |
1421 if (g->pkeys != NULL) free(g->pkeys); | |
1422 if (g->skeys != NULL) free(g->skeys); | |
1423 g->pkeys = NULL; | |
1424 g->skeys = NULL; | |
1425 | |
1426 /* Turn control over to external accumulation mode. | |
1427 */ | |
1428 g->external = TRUE; | |
1429 return 0; | |
1430 } | |
1431 | |
1432 | |
1433 /***************************************************************** | |
1434 * Debugging API | |
1435 *****************************************************************/ | |
1436 void | |
1437 SSIForceExternalSort(SSIINDEX *g) | |
1438 { | |
1439 if (activate_external_sort(g) != 0) | |
1440 Die("failed to turn external sorting on."); | |
1441 } | |
1442 | |
1443 | |
1444 /***************************************************************** | |
1445 * Test driving mode | |
1446 *****************************************************************/ | |
1447 #ifdef MUGGINS_LETS_ME_SLEEP | |
1448 /* Minimally: | |
1449 cc -g -Wall -o shiva -DDEBUGLEVEL=1 -DMUGGINS_LETS_ME_SLEEP ssi.c sqerror.c sre_string.c types.c sre_ctype.c sre_math.c file.c -lm | |
1450 */ | |
1451 | |
1452 int | |
1453 main(int argc, char **argv) | |
1454 { | |
1455 char name[32], accession[32]; | |
1456 SSIINDEX *ssi; | |
1457 int mode; | |
1458 SSIOFFSET r_off, d_off; | |
1459 FILE *ofp; | |
1460 int i; | |
1461 int fh; /* a file handle */ | |
1462 int status; /* return status from a SSI call */ | |
1463 | |
1464 mode = SSI_OFFSET_I32; | |
1465 if ((ssi = SSICreateIndex(mode)) == NULL) | |
1466 Die("Failed to allocate SSI index"); | |
1467 | |
1468 /* Generate two FASTA files, tmp.0 and tmp.1, and index them. | |
1469 */ | |
1470 if ((ofp = fopen("tmp.0", "w")) == NULL) | |
1471 Die("failed to open tmp.0"); | |
1472 if ((status = SSIAddFileToIndex(ssi, "tmp.0", SQFILE_FASTA, &fh)) != 0) | |
1473 Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status)); | |
1474 for (i = 0; i < 10; i++) { | |
1475 if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0) | |
1476 Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); | |
1477 sprintf(name, "seq%d", i); | |
1478 sprintf(accession, "ac%d", i); | |
1479 fprintf(ofp, ">%s [%s] Description? we don't need no steenking description.\n", | |
1480 name, accession); | |
1481 if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0) | |
1482 Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); | |
1483 fprintf(ofp, "AAAAAAAAAA\n"); | |
1484 fprintf(ofp, "CCCCCCCCCC\n"); | |
1485 fprintf(ofp, "GGGGGGGGGG\n"); | |
1486 fprintf(ofp, "TTTTTTTTTT\n"); | |
1487 | |
1488 if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0) | |
1489 Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status)); | |
1490 if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0) | |
1491 Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status)); | |
1492 } | |
1493 SSISetFileForSubseq(ssi, fh, 11, 10); | |
1494 fclose(ofp); | |
1495 | |
1496 if ((ofp = fopen("tmp.1", "w")) == NULL) | |
1497 Die("failed to open tmp.1"); | |
1498 if ((status = SSIAddFileToIndex(ssi, "tmp.1", SQFILE_FASTA, &fh)) != 0) | |
1499 Die("SSIAddFileToIndex() failed: %s", SSIErrorString(status)); | |
1500 for (i = 10; i < 20; i++) { | |
1501 if ((status = SSIGetFilePosition(ofp, mode, &r_off)) != 0) | |
1502 Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); | |
1503 sprintf(name, "seq%d", i); | |
1504 sprintf(accession, "ac%d", i); | |
1505 fprintf(ofp, ">%s [%s] i/o, i/o, it's off to disk we go.\n", | |
1506 name, accession); | |
1507 if ((status = SSIGetFilePosition(ofp, mode, &d_off)) != 0) | |
1508 Die("SSIGetFilePosition() failed: %s", SSIErrorString(status)); | |
1509 fprintf(ofp, "AAAAAAAAAA 10\n"); | |
1510 fprintf(ofp, "CCCCCCCCCC 20\n"); | |
1511 fprintf(ofp, "GGGGGGGGGG 30\n"); | |
1512 fprintf(ofp, "TTTTTTTTTT 40\n"); | |
1513 | |
1514 if ((status = SSIAddPrimaryKeyToIndex(ssi, name, fh, &r_off, &d_off, 40)) != 0) | |
1515 Die("SSIAddPrimaryKeyToIndex() failed: %s", SSIErrorString(status)); | |
1516 if ((status = SSIAddSecondaryKeyToIndex(ssi, accession, name)) != 0) | |
1517 Die("SSIAddSecondaryKeyToIndex() failed: %s", SSIErrorString(status)); | |
1518 } | |
1519 SSISetFileForSubseq(ssi, fh, 14, 10); | |
1520 fclose(ofp); | |
1521 | |
1522 /* Write the index to tmp.ssi | |
1523 */ | |
1524 if ((status = SSIWriteIndex("tmp.ssi", ssi)) != 0) | |
1525 Die("SSIWriteIndex() failed: %s", SSIErrorString(status)); | |
1526 SSIFreeIndex(ssi); | |
1527 | |
1528 /* Now reopen the index and run some tests. | |
1529 */ | |
1530 exit(0); | |
1531 } | |
1532 | |
1533 | |
1534 #endif /* test driving code */ | |
1535 | |
1536 | |
1537 |