Mercurial > repos > clustalomega > clustalomega
comparison clustalomega/clustal-omega-1.0.2/src/squid/sre_string.c @ 1:bc707542e5de
Uploaded
author | clustalomega |
---|---|
date | Thu, 21 Jul 2011 13:35:08 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:ff1768533a07 | 1:bc707542e5de |
---|---|
1 /***************************************************************** | |
2 * SQUID - a library of functions for biological sequence analysis | |
3 * Copyright (C) 1992-2002 Washington University School of Medicine | |
4 * | |
5 * This source code is freely distributed under the terms of the | |
6 * GNU General Public License. See the files COPYRIGHT and LICENSE | |
7 * for details. | |
8 *****************************************************************/ | |
9 | |
10 /* sre_string.c | |
11 * | |
12 * my library of extra string functions. Some for portability | |
13 * across UNIXes | |
14 * | |
15 * RCS $Id: sre_string.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_string.c,v 1.11 2001/06/07 16:59:37 eddy Exp) | |
16 */ | |
17 | |
18 #include <stdio.h> | |
19 #include <stdlib.h> | |
20 #include <string.h> | |
21 #include <stdarg.h> | |
22 #include <ctype.h> | |
23 #include "squid.h" | |
24 | |
25 /* Function: Strdup() | |
26 * | |
27 * Purpose: Implementation of the common (but non-ANSI) function | |
28 * strdup(). Robust against being passed a NULL pointer. | |
29 * | |
30 */ | |
31 char * | |
32 Strdup(char *s) | |
33 { | |
34 char *new; | |
35 if (s == NULL) return NULL; | |
36 if ((new = (char *) malloc (strlen(s) +1)) == NULL) return NULL; | |
37 strcpy(new, s); | |
38 return new; | |
39 } | |
40 | |
41 /* Function: StringChop() | |
42 * Date: SRE, Wed Oct 29 12:10:02 1997 [TWA 721] | |
43 * | |
44 * Purpose: Chop trailing whitespace off of a string. | |
45 */ | |
46 void | |
47 StringChop(char *s) | |
48 { | |
49 int i; | |
50 | |
51 i = strlen(s) - 1; /* set i at last char in string */ | |
52 while (i >= 0 && isspace((int) s[i])) i--; /* i now at last non-whitespace char, or -1 */ | |
53 s[i+1] = '\0'; | |
54 } | |
55 | |
56 int | |
57 Strinsert(char *s1, /* string to insert a char into */ | |
58 char c, /* char to insert */ | |
59 int pos) /* position in s1 to insert c at */ | |
60 { | |
61 char oldc; | |
62 char *s; | |
63 | |
64 for (s = s1 + pos; c; s++) | |
65 { | |
66 /* swap current char for inserted one */ | |
67 oldc = *s; /* pick up current */ | |
68 *s = c; /* put down inserted one */ | |
69 c = oldc; /* old becomes next to insert */ | |
70 } | |
71 *s = '\0'; | |
72 | |
73 return 1; | |
74 } | |
75 | |
76 | |
77 int | |
78 Strdelete(char *s1, /* string to delete a char from */ | |
79 int pos) /* position of char to delete 0..n-1 */ | |
80 { | |
81 char *s; | |
82 | |
83 for (s = s1 + pos; *s; s++) | |
84 *s = *(s + 1); | |
85 | |
86 return 1; | |
87 } | |
88 | |
89 void | |
90 s2lower(char *s) | |
91 { | |
92 for (; *s != '\0'; s++) | |
93 *s = sre_tolower((int) *s); | |
94 } | |
95 | |
96 void | |
97 s2upper(char *s) | |
98 { | |
99 for (; *s != '\0'; s++) | |
100 *s = sre_toupper((int) *s); | |
101 } | |
102 | |
103 | |
104 void * | |
105 sre_malloc(char *file, int line, size_t size) | |
106 { | |
107 void *ptr; | |
108 | |
109 SQD_DPRINTF3(("MALLOC: %d bytes (file %s line %d)\n", size, file, line)); | |
110 if ((ptr = malloc (size)) == NULL) | |
111 Die("malloc of %ld bytes failed: file %s line %d", size, file, line); | |
112 return ptr; | |
113 } | |
114 | |
115 void * | |
116 sre_realloc(char *file, int line, void *p, size_t size) | |
117 { | |
118 void *ptr; | |
119 | |
120 if ((ptr = realloc(p, size)) == NULL) | |
121 Die("realloc of %ld bytes failed: file %s line %d", size, file, line); | |
122 return ptr; | |
123 } | |
124 | |
125 | |
126 | |
127 /* Function: Free2DArray(), Free3DArray() | |
128 * Date: SRE, Tue Jun 1 14:47:14 1999 [St. Louis] | |
129 * | |
130 * Purpose: Convenience functions for free'ing 2D | |
131 * and 3D pointer arrays. Tolerates any of the | |
132 * pointers being NULL, to allow "sparse" | |
133 * arrays. | |
134 * | |
135 * Args: p - array to be freed | |
136 * dim1 - n for first dimension | |
137 * dim2 - n for second dimension | |
138 * | |
139 * e.g. a 2d array is indexed p[0..dim1-1][] | |
140 * a 3D array is indexed p[0..dim1-1][0..dim2-1][] | |
141 * | |
142 * Returns: void | |
143 * | |
144 * Diagnostics: (void) | |
145 * "never fails" | |
146 */ | |
147 void | |
148 Free2DArray(void **p, int dim1) | |
149 { | |
150 int i; | |
151 | |
152 if (p != NULL) { | |
153 for (i = 0; i < dim1; i++) | |
154 if (p[i] != NULL) free(p[i]); | |
155 free(p); | |
156 } | |
157 } | |
158 void | |
159 Free3DArray(void ***p, int dim1, int dim2) | |
160 { | |
161 int i, j; | |
162 | |
163 if (p != NULL) { | |
164 for (i = 0; i < dim1; i++) | |
165 if (p[i] != NULL) { | |
166 for (j = 0; j < dim2; j++) | |
167 if (p[i][j] != NULL) free(p[i][j]); | |
168 free(p[i]); | |
169 } | |
170 free(p); | |
171 } | |
172 } | |
173 | |
174 | |
175 /* Function: RandomSequence() | |
176 * | |
177 * Purpose: Generate an iid symbol sequence according | |
178 * to some alphabet, alphabet_size, probability | |
179 * distribution, and length. Return the | |
180 * sequence. | |
181 * | |
182 * Args: alphabet - e.g. "ACGT" | |
183 * p - probability distribution [0..n-1] | |
184 * n - number of symbols in alphabet | |
185 * len - length of generated sequence | |
186 * | |
187 * Return: ptr to random sequence, or NULL on failure. | |
188 */ | |
189 char * | |
190 RandomSequence(char *alphabet, float *p, int n, int len) | |
191 { | |
192 char *s; | |
193 int x; | |
194 | |
195 s = (char *) MallocOrDie (sizeof(char) * (len+1)); | |
196 for (x = 0; x < len; x++) | |
197 s[x] = alphabet[FChoose(p,n)]; | |
198 s[x] = '\0'; | |
199 return s; | |
200 } | |
201 | |
202 /* Function: sre_fgets() | |
203 * Date: SRE, Thu May 13 10:56:28 1999 [St. Louis] | |
204 * | |
205 * Purpose: Dynamic allocation version of fgets(), | |
206 * capable of reading unlimited line lengths. | |
207 * | |
208 * Args: buf - ptr to a string (may be reallocated) | |
209 * n - ptr to current allocated length of buf, | |
210 * (may be changed) | |
211 * fp - open file ptr for reading | |
212 * | |
213 * Before the first call to sre_fgets(), | |
214 * buf should be initialized to NULL and n to 0. | |
215 * They're a linked pair, so don't muck with the | |
216 * allocation of buf or the value of n while | |
217 * you're still doing sre_fgets() calls with them. | |
218 * | |
219 * Returns: ptr to the buffer on success. | |
220 * NULL on EOF (buf isn't to be used in this case) | |
221 * sre_fgets() *always* results in an allocation | |
222 * in buf. | |
223 * | |
224 * The reason to have it return a ptr to buf | |
225 * is that it makes wrapper macros easy; see | |
226 * MSAFileGetLine() for an example. | |
227 * | |
228 * Example: char *buf; | |
229 * int n; | |
230 * FILE *fp; | |
231 * | |
232 * fp = fopen("my_file", "r"); | |
233 * buf = NULL; | |
234 * n = 0; | |
235 * while (sre_fgets(&buf, &n, fp) != NULL) | |
236 * { | |
237 * do stuff with buf; | |
238 * } | |
239 */ | |
240 char * | |
241 sre_fgets(char **buf, int *n, FILE *fp) | |
242 { | |
243 char *s; | |
244 int len; | |
245 int pos; | |
246 | |
247 if (*n == 0) | |
248 { | |
249 *buf = MallocOrDie(sizeof(char) * 128); | |
250 *n = 128; | |
251 } | |
252 | |
253 /* Simple case 1. We're sitting at EOF, or there's an error. | |
254 * fgets() returns NULL, so we return NULL. | |
255 */ | |
256 if (fgets(*buf, *n, fp) == NULL) return NULL; | |
257 | |
258 /* Simple case 2. fgets() got a string, and it reached EOF. | |
259 * return success status, so caller can use | |
260 * the last line; on the next call we'll | |
261 * return the 0 for the EOF. | |
262 */ | |
263 if (feof(fp)) return *buf; | |
264 | |
265 /* Simple case 3. We got a complete string, with \n, | |
266 * and don't need to extend the buffer. | |
267 */ | |
268 len = strlen(*buf); | |
269 if ((*buf)[len-1] == '\n') return *buf; | |
270 | |
271 /* The case we're waiting for. We have an incomplete string, | |
272 * and we have to extend the buffer one or more times. Make | |
273 * sure we overwrite the previous fgets's \0 (hence +(n-1) | |
274 * in first step, rather than 128, and reads of 129, not 128). | |
275 */ | |
276 pos = (*n)-1; | |
277 while (1) { | |
278 *n += 128; | |
279 *buf = ReallocOrDie(*buf, sizeof(char) * (*n)); | |
280 s = *buf + pos; | |
281 if (fgets(s, 129, fp) == NULL) return *buf; | |
282 len = strlen(s); | |
283 if (s[len-1] == '\n') return *buf; | |
284 pos += 128; | |
285 } | |
286 /*NOTREACHED*/ | |
287 } | |
288 | |
289 /* Function: sre_strcat() | |
290 * Date: SRE, Thu May 13 09:36:32 1999 [St. Louis] | |
291 * | |
292 * Purpose: Dynamic memory version of strcat(). | |
293 * appends src to the string that dest points to, | |
294 * extending allocation for dest if necessary. | |
295 * | |
296 * One timing experiment (100 successive appends of | |
297 * 1-255 char) shows sre_strcat() has about a 20% | |
298 * overhead relative to strcat(). However, if optional | |
299 * length info is passed, sre_strcat() is about 30% | |
300 * faster than strcat(). | |
301 * | |
302 * Args: dest - ptr to string (char **), '\0' terminated | |
303 * ldest - length of dest, if known; or -1 if length unknown. | |
304 * src - string to append to dest, '\0' terminated | |
305 * lsrc - length of src, if known; or -1 if length unknown. | |
306 * | |
307 * dest may be NULL, in which case this is | |
308 * the equivalent of dest = Strdup(src). | |
309 * | |
310 * src may also be NULL, in which case | |
311 * dest is unmodified (but why would you want to pass | |
312 * a NULL src?) | |
313 * | |
314 * if both dest and src are NULL, dest is | |
315 * unmodified; it stays NULL. | |
316 * | |
317 * the length parameters are optional. If a -1 | |
318 * is passed, sre_strcat() will call strlen() to | |
319 * determine the length itself. Passing length | |
320 * info saves the strlen() calls and can speed things | |
321 * up if lots of successive appends need to be done. | |
322 * | |
323 * Returns: new length of dest (>=0 on success); | |
324 * dest is (probably) reallocated, and modified | |
325 * to a longer string, '\0' terminated. | |
326 */ | |
327 int | |
328 sre_strcat(char **dest, int ldest, char *src, int lsrc) | |
329 { | |
330 int len1, len2; | |
331 | |
332 if (ldest < 0) len1 = ((*dest == NULL) ? 0 : strlen(*dest)); | |
333 else len1 = ldest; | |
334 | |
335 if (lsrc < 0) len2 = (( src == NULL) ? 0 : strlen(src)); | |
336 else len2 = lsrc; | |
337 | |
338 if (len2 == 0) return len1; | |
339 | |
340 if (*dest == NULL) *dest = MallocOrDie(sizeof(char) * (len2+1)); | |
341 else *dest = ReallocOrDie(*dest, sizeof(char) * (len1+len2+1)); | |
342 | |
343 memcpy((*dest)+len1, src, len2+1); | |
344 return len1+len2; | |
345 } | |
346 | |
347 /* Function: sre_strtok() | |
348 * Date: SRE, Wed May 19 16:30:20 1999 [St. Louis] | |
349 * | |
350 * Purpose: Thread-safe version of strtok(). | |
351 * | |
352 * Returns ptr to next token in a string: skips | |
353 * until it reaches a character that is not in the delim | |
354 * string, and sets beginning of token. Skips to | |
355 * next delim character (or '\0') to set the end; replaces that | |
356 * character with '\0'. | |
357 * If there's still more string left, sets s to point to next | |
358 * character after the '\0' that was written, so successive | |
359 * calls extract tokens in succession. If there was no string | |
360 * left, s points at the terminal '\0'. | |
361 * | |
362 * If no token is found, returns NULL. | |
363 * | |
364 * Also returns the length of the token, which | |
365 * may save us a strlen() call in some applications. | |
366 * | |
367 * Limitations: | |
368 * *s can't be a constant string, since we write to it. | |
369 * | |
370 * Example: | |
371 * char *tok; | |
372 * int len; | |
373 * char *s; | |
374 * char buf[50] = "This is a sentence."; | |
375 * | |
376 * s = buf; | |
377 * tok = sre_strtok(&s, " ", &len); | |
378 * tok is "This"; s is "is a sentence."; len is 4. | |
379 * tok = sre_strtok(&s, " ", &len); | |
380 * tok is "is"; s is " a sentence."; len is 2. | |
381 * tok = sre_strtok(&s, " ", &len); | |
382 * tok is "a"; s is "sentence."; len is 1. | |
383 * tok = sre_strtok(&s, " ", &len); | |
384 * tok is "sentence."; s is "\0"; len is 9. | |
385 * tok = sre_strtok(&s, " ", &len); | |
386 * tok is NULL; s is "\0", len is undefined. | |
387 * | |
388 * Args: s - a tmp, modifiable ptr to string | |
389 * delim - characters that delimits tokens | |
390 * len - RETURN: length of token; pass NULL if not wanted | |
391 * | |
392 * Returns: ptr to next token, or NULL if there aren't any. | |
393 */ | |
394 char * | |
395 sre_strtok(char **s, char *delim, int *len) | |
396 { | |
397 char *begin, *end; | |
398 int n; | |
399 | |
400 begin = *s; | |
401 begin += strspn(begin, delim); | |
402 if (! *begin) return NULL; | |
403 | |
404 n = strcspn(begin, delim); | |
405 end = begin + n; | |
406 if (*end == '\0') { *s = end;} | |
407 else { | |
408 *end = '\0'; | |
409 *s = end+1; | |
410 } | |
411 | |
412 if (len != NULL) *len = n; | |
413 return begin; | |
414 } | |
415 | |
416 | |
417 | |
418 /* Function: sre_strdup() | |
419 * Date: SRE, Wed May 19 17:57:28 1999 [St. Louis] | |
420 * | |
421 * Purpose: A version of the common but non-ANSI strdup() | |
422 * function. Can pass len, if known, to save a | |
423 * strlen() call. | |
424 * | |
425 * Args: s - string to duplicate | |
426 * n - length of string, if known; -1 if unknown. | |
427 * | |
428 * Returns: allocated copy of string. | |
429 * NULL on failure. | |
430 */ | |
431 char * | |
432 sre_strdup(char *s, int n) | |
433 { | |
434 char *new; | |
435 | |
436 if (s == NULL) return NULL; | |
437 if (n < 0) n = strlen(s); | |
438 new = MallocOrDie (sizeof(char) * (n+1)); | |
439 strcpy(new, s); | |
440 return new; | |
441 } | |
442 | |
443 | |
444 /* Function: sre_strncpy() | |
445 * Date: SRE, Tue Jun 22 10:10:46 1999 [Sanger Centre] | |
446 * | |
447 * Purpose: a strncpy() that makes sure it adds a trailing \0. | |
448 * | |
449 * Args: s1 - string to copy to (allocated n+1 or larger) | |
450 * s2 - string to copy from | |
451 * n - number of chars to copy | |
452 * | |
453 * Returns: s1. | |
454 * Done only for consistency with strncpy(). Not clear | |
455 * why it's useful for a strncpy() to return s1. | |
456 */ | |
457 char * | |
458 sre_strncpy(char *s1, char *s2, int n) | |
459 { | |
460 strncpy(s1,s2,n); | |
461 s1[n] = '\0'; | |
462 return s1; | |
463 } | |
464 | |
465 /* Function: IsBlankline() | |
466 * Date: SRE, Fri Jun 18 14:36:08 1999 [St. Louis] | |
467 * | |
468 * Purpose: Returns TRUE if string consists solely of whitespace. | |
469 * | |
470 * Args: s - string to check | |
471 */ | |
472 int | |
473 IsBlankline(char *s) | |
474 { | |
475 for (; *s != '\0'; s++) | |
476 if (! isspace(*s)) return FALSE; | |
477 return TRUE; | |
478 } | |
479 | |
480 | |
481 | |
482 #ifdef CUBS_WIN | |
483 /* A timing test for sre_strcat() | |
484 * cc -O2 -g sre_string.c sre_ctype.c sqerror.c sre_math.c hsregex.c -lm | |
485 * 15.200u - 5.360u = 9.84u if sre_strcat() with no length info passed | |
486 * 13.660u - 5.360u = 8.30u if strcat(), with a single malloc(). | |
487 * 11.370u - 5.360u = 6.01u if sre_strcat() with length info passed. | |
488 */ | |
489 int main(void) | |
490 { | |
491 float p[4] = {0.25, 0.25, 0.25, 0.25}; | |
492 int buflen; | |
493 int len; | |
494 int nappends; | |
495 int nstrings; | |
496 char *s1 = NULL; | |
497 char *s2; | |
498 int i; | |
499 | |
500 nappends = 100; | |
501 nstrings = 1000; | |
502 while (nstrings--) | |
503 { | |
504 /* s1 = malloc(sizeof(char) * (255*nappends+1)); | |
505 s1[0] = '\0'; | |
506 */ | |
507 | |
508 s1 = NULL; | |
509 len = 0; | |
510 for (i = 0; i < nappends; i++) | |
511 { | |
512 buflen = CHOOSE(255) + 1; | |
513 s2 = RandomSequence("ACGT", p, 4, buflen); | |
514 | |
515 /* strcat(s1,s2); */ | |
516 if ((len = sre_strcat(&s1, len, s2, buflen)) < 0) exit(1); | |
517 free(s2); | |
518 } | |
519 free(s1); | |
520 } | |
521 exit(0); | |
522 } | |
523 #endif /*CUBS_WIN*/ |