1
|
1 /*****************************************************************
|
|
2 * SQUID - a library of functions for biological sequence analysis
|
|
3 * Copyright (C) 1992-2002 Washington University School of Medicine
|
|
4 *
|
|
5 * This source code is freely distributed under the terms of the
|
|
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
|
|
7 * for details.
|
|
8 *****************************************************************/
|
|
9
|
|
10 /* sre_string.c
|
|
11 *
|
|
12 * my library of extra string functions. Some for portability
|
|
13 * across UNIXes
|
|
14 *
|
|
15 * RCS $Id: sre_string.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_string.c,v 1.11 2001/06/07 16:59:37 eddy Exp)
|
|
16 */
|
|
17
|
|
18 #include <stdio.h>
|
|
19 #include <stdlib.h>
|
|
20 #include <string.h>
|
|
21 #include <stdarg.h>
|
|
22 #include <ctype.h>
|
|
23 #include "squid.h"
|
|
24
|
|
25 /* Function: Strdup()
|
|
26 *
|
|
27 * Purpose: Implementation of the common (but non-ANSI) function
|
|
28 * strdup(). Robust against being passed a NULL pointer.
|
|
29 *
|
|
30 */
|
|
31 char *
|
|
32 Strdup(char *s)
|
|
33 {
|
|
34 char *new;
|
|
35 if (s == NULL) return NULL;
|
|
36 if ((new = (char *) malloc (strlen(s) +1)) == NULL) return NULL;
|
|
37 strcpy(new, s);
|
|
38 return new;
|
|
39 }
|
|
40
|
|
41 /* Function: StringChop()
|
|
42 * Date: SRE, Wed Oct 29 12:10:02 1997 [TWA 721]
|
|
43 *
|
|
44 * Purpose: Chop trailing whitespace off of a string.
|
|
45 */
|
|
46 void
|
|
47 StringChop(char *s)
|
|
48 {
|
|
49 int i;
|
|
50
|
|
51 i = strlen(s) - 1; /* set i at last char in string */
|
|
52 while (i >= 0 && isspace((int) s[i])) i--; /* i now at last non-whitespace char, or -1 */
|
|
53 s[i+1] = '\0';
|
|
54 }
|
|
55
|
|
56 int
|
|
57 Strinsert(char *s1, /* string to insert a char into */
|
|
58 char c, /* char to insert */
|
|
59 int pos) /* position in s1 to insert c at */
|
|
60 {
|
|
61 char oldc;
|
|
62 char *s;
|
|
63
|
|
64 for (s = s1 + pos; c; s++)
|
|
65 {
|
|
66 /* swap current char for inserted one */
|
|
67 oldc = *s; /* pick up current */
|
|
68 *s = c; /* put down inserted one */
|
|
69 c = oldc; /* old becomes next to insert */
|
|
70 }
|
|
71 *s = '\0';
|
|
72
|
|
73 return 1;
|
|
74 }
|
|
75
|
|
76
|
|
77 int
|
|
78 Strdelete(char *s1, /* string to delete a char from */
|
|
79 int pos) /* position of char to delete 0..n-1 */
|
|
80 {
|
|
81 char *s;
|
|
82
|
|
83 for (s = s1 + pos; *s; s++)
|
|
84 *s = *(s + 1);
|
|
85
|
|
86 return 1;
|
|
87 }
|
|
88
|
|
89 void
|
|
90 s2lower(char *s)
|
|
91 {
|
|
92 for (; *s != '\0'; s++)
|
|
93 *s = sre_tolower((int) *s);
|
|
94 }
|
|
95
|
|
96 void
|
|
97 s2upper(char *s)
|
|
98 {
|
|
99 for (; *s != '\0'; s++)
|
|
100 *s = sre_toupper((int) *s);
|
|
101 }
|
|
102
|
|
103
|
|
104 void *
|
|
105 sre_malloc(char *file, int line, size_t size)
|
|
106 {
|
|
107 void *ptr;
|
|
108
|
|
109 SQD_DPRINTF3(("MALLOC: %d bytes (file %s line %d)\n", size, file, line));
|
|
110 if ((ptr = malloc (size)) == NULL)
|
|
111 Die("malloc of %ld bytes failed: file %s line %d", size, file, line);
|
|
112 return ptr;
|
|
113 }
|
|
114
|
|
115 void *
|
|
116 sre_realloc(char *file, int line, void *p, size_t size)
|
|
117 {
|
|
118 void *ptr;
|
|
119
|
|
120 if ((ptr = realloc(p, size)) == NULL)
|
|
121 Die("realloc of %ld bytes failed: file %s line %d", size, file, line);
|
|
122 return ptr;
|
|
123 }
|
|
124
|
|
125
|
|
126
|
|
127 /* Function: Free2DArray(), Free3DArray()
|
|
128 * Date: SRE, Tue Jun 1 14:47:14 1999 [St. Louis]
|
|
129 *
|
|
130 * Purpose: Convenience functions for free'ing 2D
|
|
131 * and 3D pointer arrays. Tolerates any of the
|
|
132 * pointers being NULL, to allow "sparse"
|
|
133 * arrays.
|
|
134 *
|
|
135 * Args: p - array to be freed
|
|
136 * dim1 - n for first dimension
|
|
137 * dim2 - n for second dimension
|
|
138 *
|
|
139 * e.g. a 2d array is indexed p[0..dim1-1][]
|
|
140 * a 3D array is indexed p[0..dim1-1][0..dim2-1][]
|
|
141 *
|
|
142 * Returns: void
|
|
143 *
|
|
144 * Diagnostics: (void)
|
|
145 * "never fails"
|
|
146 */
|
|
147 void
|
|
148 Free2DArray(void **p, int dim1)
|
|
149 {
|
|
150 int i;
|
|
151
|
|
152 if (p != NULL) {
|
|
153 for (i = 0; i < dim1; i++)
|
|
154 if (p[i] != NULL) free(p[i]);
|
|
155 free(p);
|
|
156 }
|
|
157 }
|
|
158 void
|
|
159 Free3DArray(void ***p, int dim1, int dim2)
|
|
160 {
|
|
161 int i, j;
|
|
162
|
|
163 if (p != NULL) {
|
|
164 for (i = 0; i < dim1; i++)
|
|
165 if (p[i] != NULL) {
|
|
166 for (j = 0; j < dim2; j++)
|
|
167 if (p[i][j] != NULL) free(p[i][j]);
|
|
168 free(p[i]);
|
|
169 }
|
|
170 free(p);
|
|
171 }
|
|
172 }
|
|
173
|
|
174
|
|
175 /* Function: RandomSequence()
|
|
176 *
|
|
177 * Purpose: Generate an iid symbol sequence according
|
|
178 * to some alphabet, alphabet_size, probability
|
|
179 * distribution, and length. Return the
|
|
180 * sequence.
|
|
181 *
|
|
182 * Args: alphabet - e.g. "ACGT"
|
|
183 * p - probability distribution [0..n-1]
|
|
184 * n - number of symbols in alphabet
|
|
185 * len - length of generated sequence
|
|
186 *
|
|
187 * Return: ptr to random sequence, or NULL on failure.
|
|
188 */
|
|
189 char *
|
|
190 RandomSequence(char *alphabet, float *p, int n, int len)
|
|
191 {
|
|
192 char *s;
|
|
193 int x;
|
|
194
|
|
195 s = (char *) MallocOrDie (sizeof(char) * (len+1));
|
|
196 for (x = 0; x < len; x++)
|
|
197 s[x] = alphabet[FChoose(p,n)];
|
|
198 s[x] = '\0';
|
|
199 return s;
|
|
200 }
|
|
201
|
|
202 /* Function: sre_fgets()
|
|
203 * Date: SRE, Thu May 13 10:56:28 1999 [St. Louis]
|
|
204 *
|
|
205 * Purpose: Dynamic allocation version of fgets(),
|
|
206 * capable of reading unlimited line lengths.
|
|
207 *
|
|
208 * Args: buf - ptr to a string (may be reallocated)
|
|
209 * n - ptr to current allocated length of buf,
|
|
210 * (may be changed)
|
|
211 * fp - open file ptr for reading
|
|
212 *
|
|
213 * Before the first call to sre_fgets(),
|
|
214 * buf should be initialized to NULL and n to 0.
|
|
215 * They're a linked pair, so don't muck with the
|
|
216 * allocation of buf or the value of n while
|
|
217 * you're still doing sre_fgets() calls with them.
|
|
218 *
|
|
219 * Returns: ptr to the buffer on success.
|
|
220 * NULL on EOF (buf isn't to be used in this case)
|
|
221 * sre_fgets() *always* results in an allocation
|
|
222 * in buf.
|
|
223 *
|
|
224 * The reason to have it return a ptr to buf
|
|
225 * is that it makes wrapper macros easy; see
|
|
226 * MSAFileGetLine() for an example.
|
|
227 *
|
|
228 * Example: char *buf;
|
|
229 * int n;
|
|
230 * FILE *fp;
|
|
231 *
|
|
232 * fp = fopen("my_file", "r");
|
|
233 * buf = NULL;
|
|
234 * n = 0;
|
|
235 * while (sre_fgets(&buf, &n, fp) != NULL)
|
|
236 * {
|
|
237 * do stuff with buf;
|
|
238 * }
|
|
239 */
|
|
240 char *
|
|
241 sre_fgets(char **buf, int *n, FILE *fp)
|
|
242 {
|
|
243 char *s;
|
|
244 int len;
|
|
245 int pos;
|
|
246
|
|
247 if (*n == 0)
|
|
248 {
|
|
249 *buf = MallocOrDie(sizeof(char) * 128);
|
|
250 *n = 128;
|
|
251 }
|
|
252
|
|
253 /* Simple case 1. We're sitting at EOF, or there's an error.
|
|
254 * fgets() returns NULL, so we return NULL.
|
|
255 */
|
|
256 if (fgets(*buf, *n, fp) == NULL) return NULL;
|
|
257
|
|
258 /* Simple case 2. fgets() got a string, and it reached EOF.
|
|
259 * return success status, so caller can use
|
|
260 * the last line; on the next call we'll
|
|
261 * return the 0 for the EOF.
|
|
262 */
|
|
263 if (feof(fp)) return *buf;
|
|
264
|
|
265 /* Simple case 3. We got a complete string, with \n,
|
|
266 * and don't need to extend the buffer.
|
|
267 */
|
|
268 len = strlen(*buf);
|
|
269 if ((*buf)[len-1] == '\n') return *buf;
|
|
270
|
|
271 /* The case we're waiting for. We have an incomplete string,
|
|
272 * and we have to extend the buffer one or more times. Make
|
|
273 * sure we overwrite the previous fgets's \0 (hence +(n-1)
|
|
274 * in first step, rather than 128, and reads of 129, not 128).
|
|
275 */
|
|
276 pos = (*n)-1;
|
|
277 while (1) {
|
|
278 *n += 128;
|
|
279 *buf = ReallocOrDie(*buf, sizeof(char) * (*n));
|
|
280 s = *buf + pos;
|
|
281 if (fgets(s, 129, fp) == NULL) return *buf;
|
|
282 len = strlen(s);
|
|
283 if (s[len-1] == '\n') return *buf;
|
|
284 pos += 128;
|
|
285 }
|
|
286 /*NOTREACHED*/
|
|
287 }
|
|
288
|
|
289 /* Function: sre_strcat()
|
|
290 * Date: SRE, Thu May 13 09:36:32 1999 [St. Louis]
|
|
291 *
|
|
292 * Purpose: Dynamic memory version of strcat().
|
|
293 * appends src to the string that dest points to,
|
|
294 * extending allocation for dest if necessary.
|
|
295 *
|
|
296 * One timing experiment (100 successive appends of
|
|
297 * 1-255 char) shows sre_strcat() has about a 20%
|
|
298 * overhead relative to strcat(). However, if optional
|
|
299 * length info is passed, sre_strcat() is about 30%
|
|
300 * faster than strcat().
|
|
301 *
|
|
302 * Args: dest - ptr to string (char **), '\0' terminated
|
|
303 * ldest - length of dest, if known; or -1 if length unknown.
|
|
304 * src - string to append to dest, '\0' terminated
|
|
305 * lsrc - length of src, if known; or -1 if length unknown.
|
|
306 *
|
|
307 * dest may be NULL, in which case this is
|
|
308 * the equivalent of dest = Strdup(src).
|
|
309 *
|
|
310 * src may also be NULL, in which case
|
|
311 * dest is unmodified (but why would you want to pass
|
|
312 * a NULL src?)
|
|
313 *
|
|
314 * if both dest and src are NULL, dest is
|
|
315 * unmodified; it stays NULL.
|
|
316 *
|
|
317 * the length parameters are optional. If a -1
|
|
318 * is passed, sre_strcat() will call strlen() to
|
|
319 * determine the length itself. Passing length
|
|
320 * info saves the strlen() calls and can speed things
|
|
321 * up if lots of successive appends need to be done.
|
|
322 *
|
|
323 * Returns: new length of dest (>=0 on success);
|
|
324 * dest is (probably) reallocated, and modified
|
|
325 * to a longer string, '\0' terminated.
|
|
326 */
|
|
327 int
|
|
328 sre_strcat(char **dest, int ldest, char *src, int lsrc)
|
|
329 {
|
|
330 int len1, len2;
|
|
331
|
|
332 if (ldest < 0) len1 = ((*dest == NULL) ? 0 : strlen(*dest));
|
|
333 else len1 = ldest;
|
|
334
|
|
335 if (lsrc < 0) len2 = (( src == NULL) ? 0 : strlen(src));
|
|
336 else len2 = lsrc;
|
|
337
|
|
338 if (len2 == 0) return len1;
|
|
339
|
|
340 if (*dest == NULL) *dest = MallocOrDie(sizeof(char) * (len2+1));
|
|
341 else *dest = ReallocOrDie(*dest, sizeof(char) * (len1+len2+1));
|
|
342
|
|
343 memcpy((*dest)+len1, src, len2+1);
|
|
344 return len1+len2;
|
|
345 }
|
|
346
|
|
347 /* Function: sre_strtok()
|
|
348 * Date: SRE, Wed May 19 16:30:20 1999 [St. Louis]
|
|
349 *
|
|
350 * Purpose: Thread-safe version of strtok().
|
|
351 *
|
|
352 * Returns ptr to next token in a string: skips
|
|
353 * until it reaches a character that is not in the delim
|
|
354 * string, and sets beginning of token. Skips to
|
|
355 * next delim character (or '\0') to set the end; replaces that
|
|
356 * character with '\0'.
|
|
357 * If there's still more string left, sets s to point to next
|
|
358 * character after the '\0' that was written, so successive
|
|
359 * calls extract tokens in succession. If there was no string
|
|
360 * left, s points at the terminal '\0'.
|
|
361 *
|
|
362 * If no token is found, returns NULL.
|
|
363 *
|
|
364 * Also returns the length of the token, which
|
|
365 * may save us a strlen() call in some applications.
|
|
366 *
|
|
367 * Limitations:
|
|
368 * *s can't be a constant string, since we write to it.
|
|
369 *
|
|
370 * Example:
|
|
371 * char *tok;
|
|
372 * int len;
|
|
373 * char *s;
|
|
374 * char buf[50] = "This is a sentence.";
|
|
375 *
|
|
376 * s = buf;
|
|
377 * tok = sre_strtok(&s, " ", &len);
|
|
378 * tok is "This"; s is "is a sentence."; len is 4.
|
|
379 * tok = sre_strtok(&s, " ", &len);
|
|
380 * tok is "is"; s is " a sentence."; len is 2.
|
|
381 * tok = sre_strtok(&s, " ", &len);
|
|
382 * tok is "a"; s is "sentence."; len is 1.
|
|
383 * tok = sre_strtok(&s, " ", &len);
|
|
384 * tok is "sentence."; s is "\0"; len is 9.
|
|
385 * tok = sre_strtok(&s, " ", &len);
|
|
386 * tok is NULL; s is "\0", len is undefined.
|
|
387 *
|
|
388 * Args: s - a tmp, modifiable ptr to string
|
|
389 * delim - characters that delimits tokens
|
|
390 * len - RETURN: length of token; pass NULL if not wanted
|
|
391 *
|
|
392 * Returns: ptr to next token, or NULL if there aren't any.
|
|
393 */
|
|
394 char *
|
|
395 sre_strtok(char **s, char *delim, int *len)
|
|
396 {
|
|
397 char *begin, *end;
|
|
398 int n;
|
|
399
|
|
400 begin = *s;
|
|
401 begin += strspn(begin, delim);
|
|
402 if (! *begin) return NULL;
|
|
403
|
|
404 n = strcspn(begin, delim);
|
|
405 end = begin + n;
|
|
406 if (*end == '\0') { *s = end;}
|
|
407 else {
|
|
408 *end = '\0';
|
|
409 *s = end+1;
|
|
410 }
|
|
411
|
|
412 if (len != NULL) *len = n;
|
|
413 return begin;
|
|
414 }
|
|
415
|
|
416
|
|
417
|
|
418 /* Function: sre_strdup()
|
|
419 * Date: SRE, Wed May 19 17:57:28 1999 [St. Louis]
|
|
420 *
|
|
421 * Purpose: A version of the common but non-ANSI strdup()
|
|
422 * function. Can pass len, if known, to save a
|
|
423 * strlen() call.
|
|
424 *
|
|
425 * Args: s - string to duplicate
|
|
426 * n - length of string, if known; -1 if unknown.
|
|
427 *
|
|
428 * Returns: allocated copy of string.
|
|
429 * NULL on failure.
|
|
430 */
|
|
431 char *
|
|
432 sre_strdup(char *s, int n)
|
|
433 {
|
|
434 char *new;
|
|
435
|
|
436 if (s == NULL) return NULL;
|
|
437 if (n < 0) n = strlen(s);
|
|
438 new = MallocOrDie (sizeof(char) * (n+1));
|
|
439 strcpy(new, s);
|
|
440 return new;
|
|
441 }
|
|
442
|
|
443
|
|
444 /* Function: sre_strncpy()
|
|
445 * Date: SRE, Tue Jun 22 10:10:46 1999 [Sanger Centre]
|
|
446 *
|
|
447 * Purpose: a strncpy() that makes sure it adds a trailing \0.
|
|
448 *
|
|
449 * Args: s1 - string to copy to (allocated n+1 or larger)
|
|
450 * s2 - string to copy from
|
|
451 * n - number of chars to copy
|
|
452 *
|
|
453 * Returns: s1.
|
|
454 * Done only for consistency with strncpy(). Not clear
|
|
455 * why it's useful for a strncpy() to return s1.
|
|
456 */
|
|
457 char *
|
|
458 sre_strncpy(char *s1, char *s2, int n)
|
|
459 {
|
|
460 strncpy(s1,s2,n);
|
|
461 s1[n] = '\0';
|
|
462 return s1;
|
|
463 }
|
|
464
|
|
465 /* Function: IsBlankline()
|
|
466 * Date: SRE, Fri Jun 18 14:36:08 1999 [St. Louis]
|
|
467 *
|
|
468 * Purpose: Returns TRUE if string consists solely of whitespace.
|
|
469 *
|
|
470 * Args: s - string to check
|
|
471 */
|
|
472 int
|
|
473 IsBlankline(char *s)
|
|
474 {
|
|
475 for (; *s != '\0'; s++)
|
|
476 if (! isspace(*s)) return FALSE;
|
|
477 return TRUE;
|
|
478 }
|
|
479
|
|
480
|
|
481
|
|
482 #ifdef CUBS_WIN
|
|
483 /* A timing test for sre_strcat()
|
|
484 * cc -O2 -g sre_string.c sre_ctype.c sqerror.c sre_math.c hsregex.c -lm
|
|
485 * 15.200u - 5.360u = 9.84u if sre_strcat() with no length info passed
|
|
486 * 13.660u - 5.360u = 8.30u if strcat(), with a single malloc().
|
|
487 * 11.370u - 5.360u = 6.01u if sre_strcat() with length info passed.
|
|
488 */
|
|
489 int main(void)
|
|
490 {
|
|
491 float p[4] = {0.25, 0.25, 0.25, 0.25};
|
|
492 int buflen;
|
|
493 int len;
|
|
494 int nappends;
|
|
495 int nstrings;
|
|
496 char *s1 = NULL;
|
|
497 char *s2;
|
|
498 int i;
|
|
499
|
|
500 nappends = 100;
|
|
501 nstrings = 1000;
|
|
502 while (nstrings--)
|
|
503 {
|
|
504 /* s1 = malloc(sizeof(char) * (255*nappends+1));
|
|
505 s1[0] = '\0';
|
|
506 */
|
|
507
|
|
508 s1 = NULL;
|
|
509 len = 0;
|
|
510 for (i = 0; i < nappends; i++)
|
|
511 {
|
|
512 buflen = CHOOSE(255) + 1;
|
|
513 s2 = RandomSequence("ACGT", p, 4, buflen);
|
|
514
|
|
515 /* strcat(s1,s2); */
|
|
516 if ((len = sre_strcat(&s1, len, s2, buflen)) < 0) exit(1);
|
|
517 free(s2);
|
|
518 }
|
|
519 free(s1);
|
|
520 }
|
|
521 exit(0);
|
|
522 }
|
|
523 #endif /*CUBS_WIN*/
|