annotate clustalomega/clustal-omega-1.0.2/src/squid/phylip.c @ 1:bc707542e5de

Uploaded
author clustalomega
date Thu, 21 Jul 2011 13:35:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2 * SQUID - a library of functions for biological sequence analysis
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
3 * Copyright (C) 1992-2002 Washington University School of Medicine
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
4 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
5 * This source code is freely distributed under the terms of the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
7 * for details.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
8 *****************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
9
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
10 /* phylip.c
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
11 * SRE, Mon Jun 14 14:08:33 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
12 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
13 * Import/export of PHYLIP interleaved multiple sequence alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
14 * format files.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
15 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
16 * RCS $Id: phylip.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: phylip.c,v 1.1 1999/07/15 22:29:20 eddy Exp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
17 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
18
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
19 #include <stdio.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
20 #include <stdlib.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
21 #include <string.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
22 #include <ctype.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
23 #include "squid.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
24 #include "msa.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
25
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
26 #ifdef TESTDRIVE_PHYLIP
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
27 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
28 * phylip.c test driver:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
29 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
30 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
31 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
32 main(int argc, char **argv)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
33 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
34 MSAFILE *afp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
35 MSA *msa;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
36 char *file;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
37
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
38 file = argv[1];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
39
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
40 if ((afp = MSAFileOpen(file, MSAFILE_UNKNOWN, NULL)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
41 Die("Couldn't open %s\n", file);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
42
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
43 printf("format %d\n", afp->format);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
44
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
45 while ((msa = ReadPhylip(afp)) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
46 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
47 WritePhylip(stdout, msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
48 MSAFree(msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
49 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
50
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
51 MSAFileClose(afp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
52 exit(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
53 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
54 /******************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
55 #endif /* testdrive_phylip */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
56
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
57
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
58
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
59 /* Function: ReadPhylip()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
60 * Date: SRE, Fri Jun 18 12:59:37 1999 [Sanger Centre]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
61 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
62 * Purpose: Parse an alignment from an open Phylip format
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
63 * alignment file. Phylip is a single-alignment format.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
64 * Return the alignment, or NULL if we have no data.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
65 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
66 * Args: afp - open alignment file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
67 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
68 * Returns: MSA * - an alignment object
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
69 * Caller responsible for an MSAFree()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
70 * NULL if no more alignments
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
71 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
72 MSA *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
73 ReadPhylip(MSAFILE *afp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
74 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
75 MSA *msa;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
76 char *s, *s1, *s2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
77 char name[11]; /* seq name max len = 10 char */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
78 int nseq, alen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
79 int idx; /* index of current sequence */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
80 int slen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
81 int nblock;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
82
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
83 if (feof(afp->f)) return NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
84
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
85 /* Skip until we see a nonblank line; it's the header,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
86 * containing nseq/alen
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
87 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
88 nseq = 0; alen = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
89 while ((s = MSAFileGetLine(afp)) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
90 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
91 if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) == NULL) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
92 if ((s2 = sre_strtok(&s, WHITESPACE, NULL)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
93 Die("Failed to parse nseq/alen from first line of PHYLIP file %s\n", afp->fname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
94 if (! IsInt(s1) || ! IsInt(s2))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
95 Die("nseq and/or alen not an integer in first line of PHYLIP file %s\n", afp->fname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
96 nseq = atoi(s1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
97 alen = atoi(s2);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
98 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
99 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
100
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
101 msa = MSAAlloc(nseq, 0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
102 idx = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
103 nblock = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
104 while ((s = MSAFileGetLine(afp)) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
105 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
106 /* ignore blank lines. nonblank lines start w/ nonblank char */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
107 if (isspace(*s)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
108 /* First block has seq names */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
109 if (nblock == 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
110 strncpy(name, s, 10);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
111 name[10] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
112 GKIStoreKey(msa->index, name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
113 msa->sqname[idx] = sre_strdup(name, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
114 s += 10;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
115 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
116 /* be careful of trailing whitespace on lines */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
117 if ((s1 = sre_strtok(&s, WHITESPACE, &slen)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
118 Die("Failed to parse sequence at line %d of PHYLIP file %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
119 afp->linenumber, afp->fname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
120 msa->sqlen[idx] = sre_strcat(&(msa->aseq[idx]), msa->sqlen[idx], s1, slen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
121
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
122 idx++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
123 if (idx == nseq) { idx = 0; nblock++; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
124 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
125 msa->nseq = nseq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
126 MSAVerifyParse(msa); /* verifies; sets alen, wgt; frees sqlen[] */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
127 return msa;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
128 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
129
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
130
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
131
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
132 /* Function: WritePhylip()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
133 * Date: SRE, Fri Jun 18 12:07:41 1999 [Sanger Centre]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
134 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
135 * Purpose: Write an alignment in Phylip format to an open file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
136 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
137 * Args: fp - file that's open for writing.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
138 * msa - alignment to write.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
139 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
140 * Returns: (void)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
141 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
142 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
143 WritePhylip(FILE *fp, MSA *msa)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
144 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
145 int idx; /* counter for sequences */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
146 int cpl = 50; /* 50 seq char per line */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
147 char buf[51]; /* buffer for writing seq */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
148 int pos;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
149
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
150 /* First line has nseq, alen
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
151 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
152 fprintf(fp, " %d %d\n", msa->nseq, msa->alen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
153
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
154 /* Alignment section.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
155 * PHYLIP is a multiblock format, blocks (optionally) separated
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
156 * by blanks; names only attached to first block. Names are
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
157 * restricted to ten char; we achieve this by simple truncation (!).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
158 * (Do we need to convert gap characters from our ./- convention?)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
159 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
160 for (pos = 0; pos < msa->alen; pos += cpl)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
161 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
162 if (pos > 0) fprintf(fp, "\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
163
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
164 for (idx = 0; idx < msa->nseq; idx++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
165 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
166 strncpy(buf, msa->aseq[idx] + pos, cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
167 buf[cpl] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
168 if (pos > 0) fprintf(fp, "%s\n", buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
169 else fprintf(fp, "%-10.10s%s\n", msa->sqname[idx], buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
170 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
171 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
172 return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
173 }