diff clustalomega/clustal-omega-1.0.2/src/squid/sre_string.c @ 1:bc707542e5de

Uploaded
author clustalomega
date Thu, 21 Jul 2011 13:35:08 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/clustalomega/clustal-omega-1.0.2/src/squid/sre_string.c	Thu Jul 21 13:35:08 2011 -0400
@@ -0,0 +1,523 @@
+/*****************************************************************
+ * SQUID - a library of functions for biological sequence analysis
+ * Copyright (C) 1992-2002 Washington University School of Medicine
+ * 
+ *     This source code is freely distributed under the terms of the
+ *     GNU General Public License. See the files COPYRIGHT and LICENSE
+ *     for details.
+ *****************************************************************/
+
+/* sre_string.c
+ * 
+ * my library of extra string functions. Some for portability
+ * across UNIXes
+ *
+ * RCS $Id: sre_string.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_string.c,v 1.11 2001/06/07 16:59:37 eddy Exp)
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include "squid.h"
+
+/* Function: Strdup()
+ * 
+ * Purpose:  Implementation of the common (but non-ANSI) function
+ *           strdup(). Robust against being passed a NULL pointer.
+ *           
+ */
+char *
+Strdup(char *s)
+{
+  char *new;
+  if (s == NULL) return NULL;
+  if ((new = (char *) malloc (strlen(s) +1)) == NULL) return NULL;
+  strcpy(new, s);
+  return new;
+}
+
+/* Function: StringChop()
+ * Date:     SRE, Wed Oct 29 12:10:02 1997 [TWA 721]
+ * 
+ * Purpose:  Chop trailing whitespace off of a string.
+ */
+void
+StringChop(char *s)
+{
+  int   i;
+
+  i = strlen(s) - 1;		         /* set i at last char in string     */
+  while (i >= 0 && isspace((int) s[i])) i--;   /* i now at last non-whitespace char, or -1 */
+  s[i+1] = '\0';
+}
+
+int
+Strinsert(char  *s1,            /* string to insert a char into  */
+	  char   c,		/* char to insert                */
+	  int    pos)		/* position in s1 to insert c at */
+{
+  char    oldc;
+  char   *s;
+
+  for (s = s1 + pos; c; s++)
+    {
+				/* swap current char for inserted one */
+      oldc = *s;		/* pick up current */
+      *s   = c;   		/* put down inserted one    */
+      c    = oldc;		/* old becomes next to insert */
+    }
+  *s = '\0';
+
+  return 1;
+}
+
+
+int
+Strdelete(char *s1,             /* string to delete a char from       */
+	  int   pos)		/* position of char to delete 0..n-1  */
+{
+  char *s;                      
+
+  for (s = s1 + pos; *s; s++)
+    *s = *(s + 1);
+
+  return 1;
+}
+
+void
+s2lower(char *s)
+{
+  for (; *s != '\0'; s++)
+    *s = sre_tolower((int) *s);
+}
+
+void
+s2upper(char *s)
+{
+  for (; *s != '\0'; s++)
+    *s = sre_toupper((int) *s);
+}
+
+
+void *
+sre_malloc(char *file, int line, size_t size)
+{
+  void *ptr;
+
+  SQD_DPRINTF3(("MALLOC: %d bytes (file %s line %d)\n", size, file, line));
+  if ((ptr = malloc (size)) == NULL)
+    Die("malloc of %ld bytes failed: file %s line %d", size, file, line);
+  return ptr;
+}
+
+void *
+sre_realloc(char *file, int line, void *p, size_t size)
+{
+  void *ptr;
+
+  if ((ptr = realloc(p, size)) == NULL)
+    Die("realloc of %ld bytes failed: file %s line %d", size, file, line);
+  return ptr;
+}
+
+
+
+/* Function: Free2DArray(), Free3DArray()
+ * Date:     SRE, Tue Jun  1 14:47:14 1999 [St. Louis]
+ *
+ * Purpose:  Convenience functions for free'ing 2D
+ *           and 3D pointer arrays. Tolerates any of the
+ *           pointers being NULL, to allow "sparse" 
+ *           arrays.
+ *
+ * Args:     p     - array to be freed
+ *           dim1  - n for first dimension
+ *           dim2  - n for second dimension
+ *
+ *           e.g. a 2d array is indexed p[0..dim1-1][]
+ *                a 3D array is indexed p[0..dim1-1][0..dim2-1][]
+ *           
+ * Returns:  void
+ * 
+ * Diagnostics: (void)
+ *              "never fails"
+ */
+void
+Free2DArray(void **p, int dim1)
+{
+  int i;
+  
+  if (p != NULL) {
+    for (i = 0; i < dim1; i++)
+      if (p[i] != NULL) free(p[i]);
+    free(p);
+  }
+}
+void
+Free3DArray(void ***p, int dim1, int dim2)
+{
+  int i, j;
+
+  if (p != NULL) {
+    for (i = 0; i < dim1; i++)
+      if (p[i] != NULL) {
+	for (j = 0; j < dim2; j++)
+	  if (p[i][j] != NULL) free(p[i][j]);
+	free(p[i]);
+      }
+    free(p);
+  }
+}
+
+
+/* Function: RandomSequence()
+ * 
+ * Purpose:  Generate an iid symbol sequence according
+ *           to some alphabet, alphabet_size, probability
+ *           distribution, and length. Return the
+ *           sequence.
+ *           
+ * Args:     alphabet  - e.g. "ACGT"
+ *           p         - probability distribution [0..n-1]
+ *           n         - number of symbols in alphabet
+ *           len       - length of generated sequence 
+ *           
+ * Return:   ptr to random sequence, or NULL on failure.
+ */
+char *
+RandomSequence(char *alphabet, float *p, int n, int len)
+{
+  char *s;
+  int   x;
+
+  s = (char *) MallocOrDie (sizeof(char) * (len+1));
+  for (x = 0; x < len; x++)
+    s[x] = alphabet[FChoose(p,n)];
+  s[x] = '\0';
+  return s;
+}
+
+/* Function: sre_fgets()
+ * Date:     SRE, Thu May 13 10:56:28 1999 [St. Louis]
+ *
+ * Purpose:  Dynamic allocation version of fgets(),
+ *           capable of reading unlimited line lengths.
+ *
+ * Args:     buf - ptr to a string (may be reallocated)
+ *           n   - ptr to current allocated length of buf,
+ *                 (may be changed)
+ *           fp  - open file ptr for reading
+ *           
+ *           Before the first call to sre_fgets(), 
+ *           buf should be initialized to NULL and n to 0.
+ *           They're a linked pair, so don't muck with the
+ *           allocation of buf or the value of n while
+ *           you're still doing sre_fgets() calls with them.
+ *
+ * Returns:  ptr to the buffer on success. 
+ *           NULL on EOF (buf isn't to be used in this case)
+ *           sre_fgets() *always* results in an allocation
+ *           in buf.
+ *
+ *           The reason to have it return a ptr to buf
+ *           is that it makes wrapper macros easy; see
+ *           MSAFileGetLine() for an example.
+ *
+ * Example:  char *buf;
+ *           int   n;
+ *           FILE *fp;
+ *           
+ *           fp  = fopen("my_file", "r");
+ *           buf = NULL;
+ *           n   = 0;
+ *           while (sre_fgets(&buf, &n, fp) != NULL) 
+ *           {
+ *             do stuff with buf;
+ *           }
+ */
+char *
+sre_fgets(char **buf, int *n, FILE *fp)
+{
+  char *s;
+  int   len;
+  int   pos;
+
+  if (*n == 0) 
+    {
+      *buf = MallocOrDie(sizeof(char) * 128);
+      *n   = 128;
+    }
+
+  /* Simple case 1. We're sitting at EOF, or there's an error.
+   *                fgets() returns NULL, so we return NULL.
+   */
+  if (fgets(*buf, *n, fp) == NULL) return NULL;
+
+  /* Simple case 2. fgets() got a string, and it reached EOF.
+   *                return success status, so caller can use
+   *                the last line; on the next call we'll
+   *                return the 0 for the EOF.
+   */
+  if (feof(fp)) return *buf;
+
+  /* Simple case 3. We got a complete string, with \n,
+   *                and don't need to extend the buffer.
+   */
+  len = strlen(*buf);
+  if ((*buf)[len-1] == '\n') return *buf;
+
+  /* The case we're waiting for. We have an incomplete string,
+   * and we have to extend the buffer one or more times. Make
+   * sure we overwrite the previous fgets's \0 (hence +(n-1)
+   * in first step, rather than 128, and reads of 129, not 128).
+   */
+  pos = (*n)-1;
+  while (1) {
+    *n  += 128;
+    *buf = ReallocOrDie(*buf, sizeof(char) * (*n));
+    s = *buf + pos;
+    if (fgets(s, 129, fp) == NULL) return *buf;
+    len = strlen(s);
+    if (s[len-1] == '\n') return *buf;
+    pos += 128;
+  } 
+  /*NOTREACHED*/
+}
+
+/* Function: sre_strcat()
+ * Date:     SRE, Thu May 13 09:36:32 1999 [St. Louis]
+ *
+ * Purpose:  Dynamic memory version of strcat().
+ *           appends src to the string that dest points to,
+ *           extending allocation for dest if necessary.
+ *           
+ *           One timing experiment (100 successive appends of 
+ *           1-255 char) shows sre_strcat() has about a 20%
+ *           overhead relative to strcat(). However, if optional
+ *           length info is passed, sre_strcat() is about 30%
+ *           faster than strcat().
+ *           
+ * Args:     dest  - ptr to string (char **), '\0' terminated
+ *           ldest - length of dest, if known; or -1 if length unknown.
+ *           src   - string to append to dest, '\0' terminated       
+ *           lsrc  - length of src, if known; or -1 if length unknown.
+ *
+ *           dest may be NULL, in which case this is
+ *           the equivalent of dest = Strdup(src).
+ *           
+ *           src may also be NULL, in which case
+ *           dest is unmodified (but why would you want to pass
+ *           a NULL src?)
+ *           
+ *           if both dest and src are NULL, dest is
+ *           unmodified; it stays NULL.
+ * 
+ *           the length parameters are optional. If a -1
+ *           is passed, sre_strcat() will call strlen() to
+ *           determine the length itself. Passing length
+ *           info saves the strlen() calls and can speed things
+ *           up if lots of successive appends need to be done.
+ *           
+ * Returns:  new length of dest (>=0 on success);
+ *           dest is (probably) reallocated, and modified
+ *           to a longer string, '\0' terminated.
+ */
+int
+sre_strcat(char **dest, int ldest, char *src, int lsrc)
+{
+  int   len1, len2;
+
+  if (ldest < 0) len1 = ((*dest == NULL) ? 0 : strlen(*dest));
+  else           len1 = ldest;
+
+  if (lsrc < 0)  len2 = ((  src == NULL) ? 0 : strlen(src)); 
+  else           len2 = lsrc;
+
+  if (len2 == 0) return len1;
+
+  if (*dest == NULL) *dest = MallocOrDie(sizeof(char) * (len2+1));
+  else               *dest = ReallocOrDie(*dest, sizeof(char) * (len1+len2+1));
+
+  memcpy((*dest)+len1, src, len2+1);
+  return len1+len2;
+}
+
+/* Function: sre_strtok()
+ * Date:     SRE, Wed May 19 16:30:20 1999 [St. Louis]
+ *
+ * Purpose:  Thread-safe version of strtok().
+ *
+ *           Returns ptr to next token in a string: skips
+ *            until it reaches a character that is not in the delim
+ *            string, and sets beginning of token. Skips to
+ *            next delim character (or '\0') to set the end; replaces that
+ *            character with '\0'.
+ *           If there's still more string left, sets s to point to next 
+ *            character after the '\0' that was written, so successive 
+ *            calls extract tokens in succession. If there was no string
+ *            left, s points at the terminal '\0'. 
+ *            
+ *           If no token is found, returns NULL.
+ *            
+ *           Also returns the length of the token, which
+ *           may save us a strlen() call in some applications.
+ *           
+ * Limitations:
+ *           *s can't be a constant string, since we write to it.
+ *                      
+ * Example:  
+ *           char *tok;
+ *           int   len;
+ *           char *s;             
+ *           char  buf[50] = "This is  a sentence.";
+ *           
+ *           s = buf;  
+ *           tok = sre_strtok(&s, " ", &len);
+ *                tok is "This"; s is "is  a sentence."; len is 4.
+ *           tok = sre_strtok(&s, " ", &len);
+ *                tok is "is"; s is " a sentence."; len is 2.
+ *           tok = sre_strtok(&s, " ", &len);
+ *                tok is "a"; s is "sentence."; len is 1.
+ *           tok = sre_strtok(&s, " ", &len);
+ *                tok is "sentence."; s is "\0"; len is 9.
+ *           tok = sre_strtok(&s, " ", &len);
+ *                tok is NULL; s is "\0", len is undefined.
+ *       
+ * Args:     s     - a tmp, modifiable ptr to string
+ *           delim - characters that delimits tokens
+ *           len   - RETURN: length of token; pass NULL if not wanted
+ *
+ * Returns:  ptr to next token, or NULL if there aren't any.
+ */
+char *
+sre_strtok(char **s, char *delim, int *len)
+{
+  char *begin, *end;
+  int   n;
+
+  begin = *s;
+  begin += strspn(begin, delim);
+  if (! *begin) return NULL;
+
+  n = strcspn(begin, delim);
+  end  = begin + n;
+  if (*end == '\0') { *s = end;}
+  else {
+    *end = '\0';
+    *s   = end+1;
+  }
+
+  if (len != NULL) *len = n;
+  return begin;
+}
+
+
+
+/* Function: sre_strdup()
+ * Date:     SRE, Wed May 19 17:57:28 1999 [St. Louis]
+ *
+ * Purpose:  A version of the common but non-ANSI strdup()
+ *           function. Can pass len, if known, to save a
+ *           strlen() call.
+ *
+ * Args:     s  - string to duplicate
+ *           n  - length of string, if known; -1 if unknown.
+ *                
+ * Returns:  allocated copy of string.
+ *           NULL on failure.
+ */
+char *
+sre_strdup(char *s, int n)
+{
+  char *new;
+
+  if (s == NULL) return NULL;
+  if (n < 0) n = strlen(s);
+  new = MallocOrDie (sizeof(char) * (n+1));
+  strcpy(new, s);
+  return new;
+}
+
+
+/* Function: sre_strncpy()
+ * Date:     SRE, Tue Jun 22 10:10:46 1999 [Sanger Centre]
+ *
+ * Purpose:  a strncpy() that makes sure it adds a trailing \0.
+ *
+ * Args:     s1   - string to copy to (allocated n+1 or larger)
+ *           s2   - string to copy from
+ *           n    - number of chars to copy 
+ *
+ * Returns:  s1. 
+ *           Done only for consistency with strncpy(). Not clear
+ *           why it's useful for a strncpy() to return s1.
+ */
+char *
+sre_strncpy(char *s1, char *s2, int n)
+{
+  strncpy(s1,s2,n);
+  s1[n] = '\0';
+  return s1;
+}
+
+/* Function: IsBlankline()
+ * Date:     SRE, Fri Jun 18 14:36:08 1999 [St. Louis]
+ *
+ * Purpose:  Returns TRUE if string consists solely of whitespace.
+ *
+ * Args:     s   - string to check
+ */
+int
+IsBlankline(char *s)
+{
+  for (; *s != '\0'; s++)
+    if (! isspace(*s)) return FALSE;
+  return TRUE;
+}
+
+
+
+#ifdef CUBS_WIN
+/* A timing test for sre_strcat()
+ * cc -O2 -g sre_string.c sre_ctype.c sqerror.c sre_math.c hsregex.c -lm 
+ * 15.200u - 5.360u = 9.84u if sre_strcat() with no length info passed
+ * 13.660u - 5.360u = 8.30u if strcat(), with a single malloc().
+ * 11.370u - 5.360u = 6.01u if sre_strcat() with length info passed.
+ */
+int main(void)
+{
+  float p[4] = {0.25, 0.25, 0.25, 0.25};
+  int   buflen;
+  int   len;
+  int   nappends;
+  int   nstrings;
+  char *s1 = NULL;
+  char *s2;
+  int   i;
+
+  nappends = 100;
+  nstrings = 1000;
+  while (nstrings--)
+    {
+      /* s1 = malloc(sizeof(char) * (255*nappends+1));
+	 s1[0] = '\0';
+      */
+
+      s1 = NULL;
+      len = 0;
+      for (i = 0; i < nappends; i++)
+	{
+	  buflen = CHOOSE(255) + 1;
+	  s2 = RandomSequence("ACGT", p, 4, buflen);
+      
+	  /* strcat(s1,s2); */
+	  if ((len = sre_strcat(&s1, len, s2, buflen)) < 0) exit(1); 
+	  free(s2);
+	}
+      free(s1); 
+    }
+  exit(0);
+}
+#endif /*CUBS_WIN*/