annotate clustalomega/clustal-omega-1.0.2/src/squid/gsi.c @ 1:bc707542e5de

Uploaded
author clustalomega
date Thu, 21 Jul 2011 13:35:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2 * SQUID - a library of functions for biological sequence analysis
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
3 * Copyright (C) 1992-2002 Washington University School of Medicine
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
4 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
5 * This source code is freely distributed under the terms of the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
7 * for details.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
8 *****************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
9
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
10 /* gsi.c
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
11 * Interfaces for GSI "generic sequence index" files.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
12 * broken away from sqio.c and extended: SRE, Wed Aug 5 10:32:53 1998
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
13 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
14 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
15 * GSI definition:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
16 * 1 + <nfiles> + <nkeys> total records.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
17 * Each record = 38 bytes.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
18 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
19 * one header record : <"GSI" (32)> <nfiles (2)> <nkeys (4)>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
20 * <nfiles> file records : <filename (32)> <fileno (2)> <fmt (4)>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
21 * <nkeys> key records : <key (32)> <fileno (2)> <offset(4)>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
22 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
23 * Matches up with my Perl scripts that create GSI files.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
24 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
25 * RCS $Id: gsi.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: gsi.c,v 1.5 2001/08/04 20:15:42 eddy Exp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
26 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
27
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
28 #include <stdio.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
29 #include <stdlib.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
30 #include <string.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
31 #ifndef SEEK_SET
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
32 #include <unistd.h> /* needed for poor crippled SunOS */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
33 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
34
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
35 #include "squid.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
36 #include "gsi.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
37
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
38
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
39 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
40 * GSI index file access routines
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
41 *****************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
42
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
43 /* Function: GSIOpen()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
44 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
45 * Purpose: Open a GSI file. Returns the number of records in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
46 * the file and a file pointer. Returns NULL on failure.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
47 * The file pointer should be fclose()'d normally.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
48 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
49 GSIFILE *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
50 GSIOpen(char *gsifile)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
51 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
52 GSIFILE *gsi;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
53 char magic[GSI_KEYSIZE];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
54
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
55 gsi = (GSIFILE *) MallocOrDie (sizeof(GSIFILE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
56 if ((gsi->gsifp = fopen(gsifile, "r")) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
57 { free(gsi); squid_errno = SQERR_NOFILE; return NULL; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
58
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
59 if (! fread(magic, sizeof(char), GSI_KEYSIZE, gsi->gsifp))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
60 { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
61 if (strcmp(magic, "GSI") != 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
62 { free(gsi); squid_errno = SQERR_FORMAT; return NULL; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
63
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
64 if (! fread(&(gsi->nfiles), sizeof(sqd_uint16), 1, gsi->gsifp))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
65 { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
66 if (! fread(&(gsi->recnum), sizeof(sqd_uint32), 1, gsi->gsifp))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
67 { free(gsi); squid_errno = SQERR_NODATA; return NULL; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
68
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
69 gsi->nfiles = sre_ntoh16(gsi->nfiles); /* convert from network short */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
70 gsi->recnum = sre_ntoh32(gsi->recnum); /* convert from network long */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
71
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
72 return gsi;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
73 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
74
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
75 /* Function: GSIGetRecord()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
76 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
77 * Purpose: Each non-header record of a GSI index files consists
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
78 * of 38 bytes: 32 bytes of character string, a 2 byte
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
79 * short, and a 4 byte long. This function returns the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
80 * three values.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
81 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
82 * Args: gsi - open GSI index file, correctly positioned at a record
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
83 * f1 - char[32], allocated by caller (or NULL if unwanted)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
84 * f2 - pointer to short (or NULL if unwanted)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
85 * f3 - pointer to long (or NULL if unwanted)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
86 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
87 * Return: 0 on failure and sets squid_errno.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
88 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
89 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
90 GSIGetRecord(GSIFILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint32 *f3)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
91 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
92 if (f1 == NULL) fseek(gsi->gsifp, GSI_KEYSIZE, SEEK_CUR);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
93 else if (! fread(f1, GSI_KEYSIZE, 1, gsi->gsifp))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
94 { squid_errno = SQERR_NODATA; return 0; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
95
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
96 if (f2 == NULL) fseek(gsi->gsifp, sizeof(sqd_uint16), SEEK_CUR);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
97 else if (! fread(f2, sizeof(sqd_uint16), 1, gsi->gsifp))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
98 { squid_errno = SQERR_NODATA; return 0; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
99
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
100 if (f3 == NULL) fseek(gsi->gsifp, sizeof(sqd_uint32), SEEK_CUR);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
101 else if (! fread(f3, sizeof(sqd_uint32), 1, gsi->gsifp))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
102 { squid_errno = SQERR_NODATA; return 0; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
103
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
104 if (f2 != NULL) *f2 = sre_ntoh16(*f2);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
105 if (f3 != NULL) *f3 = sre_ntoh32(*f3);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
106
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
107 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
108 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
109
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
110
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
111 /* Function: GSIGetOffset()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
112 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
113 * Purpose: From a key (sequence name), find a disk offset
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
114 * in an open general sequence index file by binary
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
115 * search. Presumably GSI indexing could be even faster
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
116 * if we used hashing.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
117 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
118 * Args: gsi - GSI index file, opened by GSIOpen()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
119 * key - name of key to retrieve indices for
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
120 * ret_seqfile - pre-alloced char[32] array for seqfile name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
121 * ret_fmt - format of seqfile
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
122 * ret_offset - return: disk offset in seqfile.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
123 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
124 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
125 GSIGetOffset(GSIFILE *gsi, char *key, char *ret_seqfile,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
126 int *ret_format, long *ret_offset)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
127 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
128 sqd_uint32 left, right, mid;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
129 int cmp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
130 char name[GSI_KEYSIZE + 1];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
131 sqd_uint32 offset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
132 sqd_uint16 filenum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
133 sqd_uint32 fmt;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
134
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
135 name[GSI_KEYSIZE] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
136
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
137 left = gsi->nfiles + 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
138 right = gsi->nfiles + gsi->recnum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
139 mid = (left + right) / 2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
140 fseek(gsi->gsifp, mid * GSI_RECSIZE, SEEK_SET);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
141
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
142 while (GSIGetRecord(gsi, name, &filenum, &offset))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
143 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
144 cmp = strcmp(name, key);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
145 if (cmp == 0) break; /* found it! */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
146 else if (left >= right) return 0; /* oops, missed it; fail. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
147 else if (cmp < 0) left = mid + 1; /* it's right of mid */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
148 else if (cmp > 0) right = mid - 1; /* it's left of mid */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
149 mid = (left + right) / 2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
150 fseek(gsi->gsifp, mid * GSI_RECSIZE, SEEK_SET);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
151 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
152
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
153 /* Using file number, look up the sequence file and format.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
154 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
155 fseek(gsi->gsifp, filenum * GSI_RECSIZE, SEEK_SET);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
156 GSIGetRecord(gsi, ret_seqfile, NULL, &fmt);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
157 *ret_format = (int) fmt;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
158 *ret_offset = (long) offset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
159
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
160 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
161 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
162
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
163 /* Function: GSIClose()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
164 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
165 * Purpose: Close an open GSI sequence index file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
166 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
167 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
168 GSIClose(GSIFILE *gsi)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
169 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
170 fclose(gsi->gsifp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
171 free(gsi);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
172 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
173
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
174
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
175 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
176 * GSI index construction routines
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
177 * SRE, Wed Nov 10 11:49:14 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
178 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
179 * API:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
180 * g = GSIAllocIndex();
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
181 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
182 * [foreach filename, <32 char, no directory path]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
183 * GSIAddFileToIndex(g, filename);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
184 * filenum++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
185 * [foreach key, <32 char, w/ filenum 1..nfiles, w/ 32bit offset]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
186 * GSIAddKeyToIndex(g, key, filenum, offset);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
187 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
188 * GSISortIndex(g);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
189 * GSIWriteIndex(fp, g);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
190 * GSIFreeIndex(g);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
191 *****************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
192 struct gsiindex_s *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
193 GSIAllocIndex(void)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
194 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
195 struct gsiindex_s *g;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
196
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
197 g = MallocOrDie(sizeof(struct gsiindex_s));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
198 g->filenames = MallocOrDie(sizeof(char *) * 10);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
199 g->fmt = MallocOrDie(sizeof(int) * 10);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
200 g->elems = MallocOrDie(sizeof(struct gsikey_s) * 100);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
201 g->nfiles = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
202 g->nkeys = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
203 return g;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
204 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
205 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
206 GSIFreeIndex(struct gsiindex_s *g)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
207 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
208 int i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
209 for (i = 0; i < g->nfiles; i++) free(g->filenames[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
210 free(g->filenames);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
211 free(g->fmt);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
212 free(g->elems);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
213 free(g);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
214 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
215 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
216 GSIAddFileToIndex(struct gsiindex_s *g, char *filename, int fmt)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
217 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
218 int len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
219
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
220 len = strlen(filename);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
221 if (len >= GSI_KEYSIZE) Die("File name too long to be indexed.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
222 g->filenames[g->nfiles] = sre_strdup(filename, len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
223 g->fmt[g->nfiles] = fmt;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
224 g->nfiles++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
225 if (g->nfiles % 10 == 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
226 g->filenames = ReallocOrDie(g->filenames, sizeof(char *) * (g->nfiles + 10));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
227 g->fmt = ReallocOrDie(g->fmt, sizeof(int) * (g->nfiles + 10));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
228 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
229 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
230 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
231 GSIAddKeyToIndex(struct gsiindex_s *g, char *key, int filenum, long offset)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
232 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
233 if (strlen(key) >= GSI_KEYSIZE) Die("key too long in GSI index");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
234 if (filenum > SQD_UINT16_MAX) Die("too many files in GSI index");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
235 if (offset > SQD_UINT32_MAX) Die("offset too big in GSI index");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
236
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
237 strncpy(g->elems[g->nkeys].key, key, GSI_KEYSIZE-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
238 g->elems[g->nkeys].key[GSI_KEYSIZE-1] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
239 g->elems[g->nkeys].filenum = (sqd_uint16) filenum;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
240 g->elems[g->nkeys].offset = (sqd_uint32) offset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
241 g->nkeys++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
242
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
243 if (g->nkeys % 100 == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
244 g->elems = ReallocOrDie(g->elems, sizeof(struct gsikey_s) * (g->nkeys + 100));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
245 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
246 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
247 gsi_keysorter(const void *k1, const void *k2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
248 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
249 struct gsikey_s *key1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
250 struct gsikey_s *key2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
251 key1 = (struct gsikey_s *) k1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
252 key2 = (struct gsikey_s *) k2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
253 return strcmp(key1->key, key2->key);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
254 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
255 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
256 GSISortIndex(struct gsiindex_s *g)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
257 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
258 qsort((void *) g->elems, g->nkeys, sizeof(struct gsikey_s), gsi_keysorter);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
259 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
260 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
261 GSIWriteIndex(FILE *fp, struct gsiindex_s *g)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
262 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
263 sqd_uint32 i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
264
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
265 /* Range checking.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
266 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
267 /* AW: gcc says: comparison always false die to limited range of data type */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
268 #ifndef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
269 if (g->nfiles > SQD_UINT16_MAX) Die("Too many files in GSI index.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
270 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
271 if (g->nkeys > SQD_UINT32_MAX) Die("Too many keys in GSI index.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
272
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
273 GSIWriteHeader(fp, g->nfiles, g->nkeys);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
274 for (i = 0; i < g->nfiles; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
275 GSIWriteFileRecord(fp, g->filenames[i], i+1, g->fmt[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
276 for (i = 0; i < g->nkeys; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
277 GSIWriteKeyRecord(fp, g->elems[i].key, g->elems[i].filenum, g->elems[i].offset);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
278 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
279
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
280
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
281
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
282
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
283
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
284 /* Function: GSIWriteHeader()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
285 * Date: SRE, Wed Aug 5 10:36:02 1998 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
286 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
287 * Purpose: Write the first record to an open GSI file:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
288 * "GSI" <nfiles> <nkeys>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
289 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
290 * Args: fp - open file to write to.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
291 * nfiles - number of files indexed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
292 * nkeys - number of keys indexed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
293 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
294 * Returns: void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
295 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
296 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
297 GSIWriteHeader(FILE *fp, int nfiles, long nkeys)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
298 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
299 char key[GSI_KEYSIZE];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
300 sqd_uint16 f1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
301 sqd_uint32 f2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
302
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
303 /* beware potential range errors!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
304 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
305 if (nfiles > SQD_UINT16_MAX) Die("GSI: nfiles out of range");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
306 if (nkeys > SQD_UINT32_MAX) Die("GSI: nkeys out of range");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
307
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
308 f1 = (sqd_uint16) nfiles;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
309 f2 = (sqd_uint32) nkeys;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
310 f1 = sre_hton16(f1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
311 f2 = sre_hton32(f2);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
312 strcpy(key, "GSI");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
313
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
314 if (fwrite(key, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
315 if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
316 if (fwrite(&f2, 4, 1, fp) < 1) PANIC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
317 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
318
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
319
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
320 /* Function: GSIWriteFileRecord()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
321 * Date: SRE, Wed Aug 5 10:45:51 1998 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
322 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
323 * Purpose: Write a file record to an open GSI file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
324 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
325 * Args: fp - open GSI file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
326 * fname - file name (max 31 characters)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
327 * idx - file number
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
328 * fmt - file format (e.g. kPearson, etc.)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
329 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
330 * Returns: 0 on failure. 1 on success.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
331 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
332 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
333 GSIWriteFileRecord(FILE *fp, char *fname, int idx, int fmt)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
334 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
335 sqd_uint16 f1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
336 sqd_uint32 f2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
337
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
338 if (strlen(fname) >= GSI_KEYSIZE) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
339 if (idx > SQD_UINT16_MAX) Die("GSI: file index out of range");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
340 if (fmt > SQD_UINT32_MAX) Die("GSI: format index out of range");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
341
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
342 f1 = (sqd_uint16) idx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
343 f2 = (sqd_uint32) fmt;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
344 f1 = sre_hton16(f1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
345 f2 = sre_hton32(f2);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
346
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
347 if (fwrite(fname, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
348 if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
349 if (fwrite(&f2, 4, 1, fp) < 1) PANIC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
350 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
351 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
352
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
353
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
354 /* Function: GSIWriteKeyRecord()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
355 * Date: SRE, Wed Aug 5 10:52:30 1998 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
356 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
357 * Purpose: Write a key record to a GSI file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
358 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
359 * Args: fp - open GSI file for writing
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
360 * key - key (max 31 char + \0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
361 * fileidx - which file number to find this key in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
362 * offset - offset for this key
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
363 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
364 * Returns: 1 on success, else 0.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
365 * will fail if key >= 32 chars, for instance.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
366 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
367 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
368 GSIWriteKeyRecord(FILE *fp, char *key, int fileidx, long offset)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
369 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
370 sqd_uint16 f1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
371 sqd_uint32 f2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
372
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
373 if (strlen(key) >= GSI_KEYSIZE) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
374 if (fileidx > SQD_UINT16_MAX) Die("GSI: file index out of range");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
375 if (offset > SQD_UINT32_MAX) Die("GSI: offset out of range");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
376
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
377 f1 = (sqd_uint16) fileidx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
378 f2 = (sqd_uint32) offset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
379 f1 = sre_hton16(f1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
380 f2 = sre_hton32(f2);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
381
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
382 if (fwrite(key, 1, GSI_KEYSIZE, fp) < GSI_KEYSIZE) PANIC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
383 if (fwrite(&f1, 2, 1, fp) < 1) PANIC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
384 if (fwrite(&f2, 4, 1, fp) < 1) PANIC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
385 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
386 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
387