annotate clustalomega/clustal-omega-1.0.2/src/squid/stockholm.c @ 1:bc707542e5de

Uploaded
author clustalomega
date Thu, 21 Jul 2011 13:35:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2 * SQUID - a library of functions for biological sequence analysis
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
3 * Copyright (C) 1992-2002 Washington University School of Medicine
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
4 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
5 * This source code is freely distributed under the terms of the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
7 * for details.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
8 *****************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
9
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
10 /* stockholm.c
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
11 * SRE, Fri May 28 15:46:41 1999
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
12 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
13 * Reading/writing of Stockholm format multiple sequence alignments.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
14 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
15 * example of API:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
16 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
17 * MSA *msa;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
18 * FILE *fp; -- opened for write with fopen()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
19 * MSAFILE *afp; -- opened for read with MSAFileOpen()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
20 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
21 * while ((msa = ReadStockholm(afp)) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
22 * {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
23 * WriteStockholm(fp, msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
24 * MSAFree(msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
25 * }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
26 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
27 * RCS $Id: stockholm.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: stockholm.c,v 1.7 2002/10/12 04:40:36 eddy Exp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
28 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
29 #include <stdio.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
30 #include <string.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
31 #include "squid.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
32 #include "msa.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
33
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
34 static int parse_gf(MSA *msa, char *buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
35 static int parse_gs(MSA *msa, char *buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
36 static int parse_gc(MSA *msa, char *buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
37 static int parse_gr(MSA *msa, char *buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
38 static int parse_comment(MSA *msa, char *buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
39 static int parse_sequence(MSA *msa, char *buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
40 static void actually_write_stockholm(FILE *fp, MSA *msa, int cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
41
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
42 #ifdef TESTDRIVE_STOCKHOLM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
43 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
44 * stockholm.c test driver:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
45 * cc -DTESTDRIVE_STOCKHOLM -g -O2 -Wall -o test stockholm.c msa.c gki.c sqerror.c sre_string.c file.c hsregex.c sre_math.c sre_ctype.c -lm
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
46 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
47 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
48 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
49 main(int argc, char **argv)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
50 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
51 MSAFILE *afp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
52 MSA *msa;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
53 char *file;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
54
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
55 file = argv[1];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
56
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
57 if ((afp = MSAFileOpen(file, MSAFILE_STOCKHOLM, NULL)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
58 Die("Couldn't open %s\n", file);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
59
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
60 while ((msa = ReadStockholm(afp)) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
61 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
62 WriteStockholm(stdout, msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
63 MSAFree(msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
64 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
65
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
66 MSAFileClose(afp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
67 exit(0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
68 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
69 /******************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
70 #endif /* testdriver */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
71
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
72
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
73 /* Function: ReadStockholm()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
74 * Date: SRE, Fri May 21 17:33:10 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
75 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
76 * Purpose: Parse the next alignment from an open Stockholm
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
77 * format alignment file. Return the alignment, or
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
78 * NULL if there are no more alignments in the file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
79 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
80 * Args: afp - open alignment file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
81 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
82 * Returns: MSA * - an alignment object.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
83 * caller responsible for an MSAFree()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
84 * NULL if no more alignments
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
85 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
86 * Diagnostics:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
87 * Will Die() here with a (potentially) useful message
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
88 * if a parsing error occurs
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
89 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
90 MSA *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
91 ReadStockholm(MSAFILE *afp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
92 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
93 MSA *msa;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
94 char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
95 int status;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
96
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
97 if (feof(afp->f)) return NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
98
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
99 /* Initialize allocation of the MSA.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
100 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
101 msa = MSAAlloc(10, 0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
102
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
103 /* Check the magic Stockholm header line.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
104 * We have to skip blank lines here, else we perceive
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
105 * trailing blank lines in a file as a format error when
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
106 * reading in multi-record mode.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
107 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
108 do {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
109 if ((s = MSAFileGetLine(afp)) == NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
110 MSAFree(msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
111 return NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
112 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
113 } while (IsBlankline(s));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
114
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
115 if (strncmp(s, "# STOCKHOLM 1.", 14) != 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
116 Die("\
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
117 File %s doesn't appear to be in Stockholm format.\n\
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
118 Assuming there isn't some other problem with your file (it is an\n\
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
119 alignment file, right?), please either:\n\
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
120 a) use the Babelfish format autotranslator option (-B, usually);\n\
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
121 b) specify the file's format with the --informat option; or\n\
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
122 a) reformat the alignment to Stockholm format.\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
123 afp->fname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
124
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
125 /* Read the alignment file one line at a time.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
126 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
127 while ((s = MSAFileGetLine(afp)) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
128 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
129 while (*s == ' ' || *s == '\t') s++; /* skip leading whitespace */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
130
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
131 if (*s == '#') {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
132 if (strncmp(s, "#=GF", 4) == 0) status = parse_gf(msa, s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
133 else if (strncmp(s, "#=GS", 4) == 0) status = parse_gs(msa, s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
134 else if (strncmp(s, "#=GC", 4) == 0) status = parse_gc(msa, s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
135 else if (strncmp(s, "#=GR", 4) == 0) status = parse_gr(msa, s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
136 else status = parse_comment(msa, s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
137 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
138 else if (strncmp(s, "//", 2) == 0) break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
139 else if (*s == '\n') continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
140 else status = parse_sequence(msa, s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
141
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
142 if (status == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
143 Die("Stockholm format parse error: line %d of file %s while reading alignment %s",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
144 afp->linenumber, afp->fname, msa->name == NULL? "" : msa->name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
145 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
146
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
147 if (s == NULL && msa->nseq != 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
148 Die ("Didn't find // at end of alignment %s", msa->name == NULL ? "" : msa->name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
149
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
150 if (s == NULL && msa->nseq == 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
151 /* probably just some junk at end of file */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
152 MSAFree(msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
153 return NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
154 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
155
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
156 MSAVerifyParse(msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
157 return msa;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
158 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
159
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
160
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
161 /* Function: WriteStockholm()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
162 * Date: SRE, Mon May 31 19:15:22 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
163 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
164 * Purpose: Write an alignment in standard multi-block
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
165 * Stockholm format to an open file. A wrapper
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
166 * for actually_write_stockholm().
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
167 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
168 * Args: fp - file that's open for writing
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
169 * msa - alignment to write
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
170 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
171 * Returns: (void)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
172 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
173 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
174 WriteStockholm(FILE *fp, MSA *msa)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
175 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
176 actually_write_stockholm(fp, msa, 50); /* 50 char per block */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
177 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
178
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
179 /* Function: WriteStockholmOneBlock()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
180 * Date: SRE, Mon May 31 19:15:22 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
181 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
182 * Purpose: Write an alignment in Pfam's single-block
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
183 * Stockholm format to an open file. A wrapper
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
184 * for actually_write_stockholm().
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
185 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
186 * Args: fp - file that's open for writing
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
187 * msa - alignment to write
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
188 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
189 * Returns: (void)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
190 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
191 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
192 WriteStockholmOneBlock(FILE *fp, MSA *msa)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
193 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
194 actually_write_stockholm(fp, msa, msa->alen); /* one big block */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
195 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
196
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
197
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
198 /* Function: actually_write_stockholm()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
199 * Date: SRE, Fri May 21 17:39:22 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
200 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
201 * Purpose: Write an alignment in Stockholm format to
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
202 * an open file. This is the function that actually
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
203 * does the work. The API's WriteStockholm()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
204 * and WriteStockholmOneBlock() are wrappers.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
205 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
206 * Args: fp - file that's open for writing
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
207 * msa - alignment to write
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
208 * cpl - characters to write per line in alignment block
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
209 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
210 * Returns: (void)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
211 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
212 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
213 actually_write_stockholm(FILE *fp, MSA *msa, int cpl)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
214 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
215 int i, j;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
216 int len = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
217 int namewidth;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
218 int typewidth = 0; /* markup tags are up to 5 chars long */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
219 int markupwidth = 0; /* #=GR, #=GC are four char wide + 1 space */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
220 char *buf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
221 int currpos;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
222 char *s, *tok;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
223
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
224 /* Figure out how much space we need for name + markup
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
225 * to keep the alignment in register. Required by Stockholm
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
226 * spec, even though our Stockholm parser doesn't care (Erik's does).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
227 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
228 namewidth = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
229 for (i = 0; i < msa->nseq; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
230 if ((len = strlen(msa->sqname[i])) > namewidth)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
231 namewidth = len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
232
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
233 /* Figure out how much space we need for markup tags
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
234 * markupwidth = always 4 if we're doing markup: strlen("#=GR")
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
235 * typewidth = longest markup tag
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
236 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
237 if (msa->ss != NULL) { markupwidth = 4; typewidth = 2; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
238 if (msa->sa != NULL) { markupwidth = 4; typewidth = 2; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
239 for (i = 0; i < msa->ngr; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
240 if ((len = strlen(msa->gr_tag[i])) > typewidth) typewidth = len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
241
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
242 if (msa->rf != NULL) { markupwidth = 4; if (typewidth < 2) typewidth = 2; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
243 if (msa->ss_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
244 if (msa->sa_cons != NULL) { markupwidth = 4; if (typewidth < 7) typewidth = 7; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
245 for (i = 0; i < msa->ngc; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
246 if ((len = strlen(msa->gc_tag[i])) > typewidth) typewidth = len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
247
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
248 buf = MallocOrDie(sizeof(char) * (cpl+namewidth+typewidth+markupwidth+61));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
249
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
250 /* Magic Stockholm header
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
251 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
252 fprintf(fp, "# STOCKHOLM 1.0\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
253
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
254 /* Free text comments
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
255 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
256 for (i = 0; i < msa->ncomment; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
257 fprintf(fp, "# %s\n", msa->comment[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
258 if (msa->ncomment > 0) fprintf(fp, "\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
259
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
260 /* GF section: per-file annotation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
261 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
262 if (msa->name != NULL) fprintf(fp, "#=GF ID %s\n", msa->name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
263 if (msa->acc != NULL) fprintf(fp, "#=GF AC %s\n", msa->acc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
264 if (msa->desc != NULL) fprintf(fp, "#=GF DE %s\n", msa->desc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
265 if (msa->au != NULL) fprintf(fp, "#=GF AU %s\n", msa->au);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
266
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
267 /* Thresholds are hacky. Pfam has two. Rfam has one.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
268 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
269 if (msa->cutoff_is_set[MSA_CUTOFF_GA1] && msa->cutoff_is_set[MSA_CUTOFF_GA2])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
270 fprintf(fp, "#=GF GA %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_GA1], msa->cutoff[MSA_CUTOFF_GA2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
271 else if (msa->cutoff_is_set[MSA_CUTOFF_GA1])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
272 fprintf(fp, "#=GF GA %.1f\n", msa->cutoff[MSA_CUTOFF_GA1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
273 if (msa->cutoff_is_set[MSA_CUTOFF_NC1] && msa->cutoff_is_set[MSA_CUTOFF_NC2])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
274 fprintf(fp, "#=GF NC %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_NC1], msa->cutoff[MSA_CUTOFF_NC2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
275 else if (msa->cutoff_is_set[MSA_CUTOFF_NC1])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
276 fprintf(fp, "#=GF NC %.1f\n", msa->cutoff[MSA_CUTOFF_NC1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
277 if (msa->cutoff_is_set[MSA_CUTOFF_TC1] && msa->cutoff_is_set[MSA_CUTOFF_TC2])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
278 fprintf(fp, "#=GF TC %.1f %.1f\n", msa->cutoff[MSA_CUTOFF_TC1], msa->cutoff[MSA_CUTOFF_TC2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
279 else if (msa->cutoff_is_set[MSA_CUTOFF_TC1])
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
280 fprintf(fp, "#=GF TC %.1f\n", msa->cutoff[MSA_CUTOFF_TC1]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
281
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
282 for (i = 0; i < msa->ngf; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
283 fprintf(fp, "#=GF %-5s %s\n", msa->gf_tag[i], msa->gf[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
284 fprintf(fp, "\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
285
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
286
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
287 /* GS section: per-sequence annotation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
288 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
289 if (msa->flags & MSA_SET_WGT)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
290 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
291 for (i = 0; i < msa->nseq; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
292 fprintf(fp, "#=GS %-*.*s WT %.2f\n", namewidth, namewidth, msa->sqname[i], msa->wgt[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
293 fprintf(fp, "\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
294 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
295 if (msa->sqacc != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
296 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
297 for (i = 0; i < msa->nseq; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
298 if (msa->sqacc[i] != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
299 fprintf(fp, "#=GS %-*.*s AC %s\n", namewidth, namewidth, msa->sqname[i], msa->sqacc[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
300 fprintf(fp, "\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
301 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
302 if (msa->sqdesc != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
303 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
304 for (i = 0; i < msa->nseq; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
305 if (msa->sqdesc[i] != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
306 fprintf(fp, "#=GS %*.*s DE %s\n", namewidth, namewidth, msa->sqname[i], msa->sqdesc[i]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
307 fprintf(fp, "\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
308 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
309 for (i = 0; i < msa->ngs; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
310 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
311 /* Multiannotated GS tags are possible; for example,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
312 * #=GS foo DR PDB; 1xxx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
313 * #=GS foo DR PDB; 2yyy;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
314 * These are stored, for example, as:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
315 * msa->gs[0][0] = "PDB; 1xxx;\nPDB; 2yyy;"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
316 * and must be decomposed.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
317 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
318 for (j = 0; j < msa->nseq; j++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
319 if (msa->gs[i][j] != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
320 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
321 s = msa->gs[i][j];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
322 while ((tok = sre_strtok(&s, "\n", NULL)) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
323 fprintf(fp, "#=GS %*.*s %5s %s\n", namewidth, namewidth,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
324 msa->sqname[j], msa->gs_tag[i], tok);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
325 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
326 fprintf(fp, "\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
327 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
328
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
329 /* Alignment section:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
330 * contains aligned sequence, #=GR annotation, and #=GC annotation
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
331 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
332 for (currpos = 0; currpos < msa->alen; currpos += cpl)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
333 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
334 if (currpos > 0) fprintf(fp, "\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
335 for (i = 0; i < msa->nseq; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
336 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
337 strncpy(buf, msa->aseq[i] + currpos, cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
338 buf[cpl] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
339 fprintf(fp, "%-*.*s %s\n", namewidth+typewidth+markupwidth, namewidth+typewidth+markupwidth,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
340 msa->sqname[i], buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
341
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
342 if (msa->ss != NULL && msa->ss[i] != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
343 strncpy(buf, msa->ss[i] + currpos, cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
344 buf[cpl] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
345 fprintf(fp, "#=GR %-*.*s SS %s\n", namewidth, namewidth, msa->sqname[i], buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
346 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
347 if (msa->sa != NULL && msa->sa[i] != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
348 strncpy(buf, msa->sa[i] + currpos, cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
349 buf[cpl] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
350 fprintf(fp, "#=GR %-*.*s SA %s\n", namewidth, namewidth, msa->sqname[i], buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
351 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
352 for (j = 0; j < msa->ngr; j++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
353 if (msa->gr[j][i] != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
354 strncpy(buf, msa->gr[j][i] + currpos, cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
355 buf[cpl] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
356 fprintf(fp, "#=GR %-*.*s %5s %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
357 namewidth, namewidth, msa->sqname[i], msa->gr_tag[j], buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
358 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
359 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
360 if (msa->ss_cons != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
361 strncpy(buf, msa->ss_cons + currpos, cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
362 buf[cpl] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
363 fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SS_cons", buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
364 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
365
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
366 if (msa->sa_cons != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
367 strncpy(buf, msa->sa_cons + currpos, cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
368 buf[cpl] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
369 fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "SA_cons", buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
370 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
371
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
372 if (msa->rf != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
373 strncpy(buf, msa->rf + currpos, cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
374 buf[cpl] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
375 fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth, "RF", buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
376 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
377 for (j = 0; j < msa->ngc; j++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
378 strncpy(buf, msa->gc[j] + currpos, cpl);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
379 buf[cpl] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
380 fprintf(fp, "#=GC %-*.*s %s\n", namewidth+typewidth, namewidth+typewidth,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
381 msa->gc_tag[j], buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
382 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
383 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
384 fprintf(fp, "//\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
385 free(buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
386 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
387
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
388
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
389
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
390
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
391
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
392 /* Format of a GF line:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
393 * #=GF <featurename> <text>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
394 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
395 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
396 parse_gf(MSA *msa, char *buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
397 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
398 char *gf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
399 char *featurename;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
400 char *text;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
401 char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
402
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
403 s = buf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
404 if ((gf = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
405 if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
406 if ((text = sre_strtok(&s, "\n", NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
407 while (*text && (*text == ' ' || *text == '\t')) text++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
408
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
409 if (strcmp(featurename, "ID") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
410 msa->name = sre_strdup(text, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
411 else if (strcmp(featurename, "AC") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
412 msa->acc = sre_strdup(text, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
413 else if (strcmp(featurename, "DE") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
414 msa->desc = sre_strdup(text, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
415 else if (strcmp(featurename, "AU") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
416 msa->au = sre_strdup(text, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
417 else if (strcmp(featurename, "GA") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
418 { /* Pfam has GA1, GA2. Rfam just has GA1. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
419 s = text;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
420 if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
421 msa->cutoff[MSA_CUTOFF_GA1] = atof(text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
422 msa->cutoff_is_set[MSA_CUTOFF_GA1] = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
423 if ((text = sre_strtok(&s, WHITESPACE, NULL)) != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
424 msa->cutoff[MSA_CUTOFF_GA2] = atof(text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
425 msa->cutoff_is_set[MSA_CUTOFF_GA2] = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
426 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
427 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
428 else if (strcmp(featurename, "NC") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
429 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
430 s = text;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
431 if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
432 msa->cutoff[MSA_CUTOFF_NC1] = atof(text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
433 msa->cutoff_is_set[MSA_CUTOFF_NC1] = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
434 if ((text = sre_strtok(&s, WHITESPACE, NULL)) != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
435 msa->cutoff[MSA_CUTOFF_NC2] = atof(text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
436 msa->cutoff_is_set[MSA_CUTOFF_NC2] = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
437 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
438 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
439 else if (strcmp(featurename, "TC") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
440 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
441 s = text;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
442 if ((text = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
443 msa->cutoff[MSA_CUTOFF_TC1] = atof(text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
444 msa->cutoff_is_set[MSA_CUTOFF_TC1] = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
445 if ((text = sre_strtok(&s, WHITESPACE, NULL)) != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
446 msa->cutoff[MSA_CUTOFF_TC2] = atof(text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
447 msa->cutoff_is_set[MSA_CUTOFF_TC2] = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
448 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
449 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
450 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
451 MSAAddGF(msa, featurename, text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
452
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
453 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
454 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
455
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
456
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
457 /* Format of a GS line:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
458 * #=GS <seqname> <featurename> <text>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
459 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
460 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
461 parse_gs(MSA *msa, char *buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
462 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
463 char *gs;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
464 char *seqname;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
465 char *featurename;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
466 char *text;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
467 int seqidx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
468 char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
469
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
470 s = buf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
471 if ((gs = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
472 if ((seqname = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
473 if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
474 if ((text = sre_strtok(&s, "\n", NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
475 while (*text && (*text == ' ' || *text == '\t')) text++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
476
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
477 /* GS usually follows another GS; guess lastidx+1
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
478 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
479 seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
480 msa->lastidx = seqidx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
481
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
482 if (strcmp(featurename, "WT") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
483 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
484 msa->wgt[seqidx] = atof(text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
485 msa->flags |= MSA_SET_WGT;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
486 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
487
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
488 else if (strcmp(featurename, "AC") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
489 MSASetSeqAccession(msa, seqidx, text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
490
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
491 else if (strcmp(featurename, "DE") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
492 MSASetSeqDescription(msa, seqidx, text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
493
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
494 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
495 MSAAddGS(msa, featurename, seqidx, text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
496
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
497 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
498 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
499
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
500 /* Format of a GC line:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
501 * #=GC <featurename> <text>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
502 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
503 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
504 parse_gc(MSA *msa, char *buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
505 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
506 char *gc;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
507 char *featurename;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
508 char *text;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
509 char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
510 int len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
511
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
512 s = buf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
513 if ((gc = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
514 if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
515 if ((text = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
516
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
517 if (strcmp(featurename, "SS_cons") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
518 sre_strcat(&(msa->ss_cons), -1, text, len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
519 else if (strcmp(featurename, "SA_cons") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
520 sre_strcat(&(msa->sa_cons), -1, text, len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
521 else if (strcmp(featurename, "RF") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
522 sre_strcat(&(msa->rf), -1, text, len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
523 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
524 MSAAppendGC(msa, featurename, text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
525
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
526 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
527 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
528
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
529 /* Format of a GR line:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
530 * #=GR <seqname> <featurename> <text>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
531 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
532 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
533 parse_gr(MSA *msa, char *buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
534 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
535 char *gr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
536 char *seqname;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
537 char *featurename;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
538 char *text;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
539 int seqidx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
540 int len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
541 int j;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
542 char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
543
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
544 s = buf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
545 if ((gr = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
546 if ((seqname = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
547 if ((featurename = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
548 if ((text = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
549
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
550 /* GR usually follows sequence it refers to; guess msa->lastidx */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
551 seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
552 msa->lastidx = seqidx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
553
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
554 if (strcmp(featurename, "SS") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
555 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
556 if (msa->ss == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
557 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
558 msa->ss = MallocOrDie(sizeof(char *) * msa->nseqalloc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
559 msa->sslen = MallocOrDie(sizeof(int) * msa->nseqalloc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
560 for (j = 0; j < msa->nseqalloc; j++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
561 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
562 msa->ss[j] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
563 msa->sslen[j] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
564 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
565 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
566 msa->sslen[seqidx] = sre_strcat(&(msa->ss[seqidx]), msa->sslen[seqidx], text, len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
567 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
568 else if (strcmp(featurename, "SA") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
569 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
570 if (msa->sa == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
571 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
572 msa->sa = MallocOrDie(sizeof(char *) * msa->nseqalloc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
573 msa->salen = MallocOrDie(sizeof(int) * msa->nseqalloc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
574 for (j = 0; j < msa->nseqalloc; j++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
575 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
576 msa->sa[j] = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
577 msa->salen[j] = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
578 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
579 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
580 msa->salen[seqidx] = sre_strcat(&(msa->sa[seqidx]), msa->salen[seqidx], text, len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
581 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
582 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
583 MSAAppendGR(msa, featurename, seqidx, text);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
584
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
585 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
586 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
587
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
588
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
589 /* comments are simply stored verbatim, not parsed
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
590 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
591 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
592 parse_comment(MSA *msa, char *buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
593 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
594 char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
595 char *comment;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
596
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
597 s = buf + 1; /* skip leading '#' */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
598 if (*s == '\n') { *s = '\0'; comment = s; } /* deal with blank comment */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
599 else if ((comment = sre_strtok(&s, "\n", NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
600
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
601 MSAAddComment(msa, comment);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
602 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
603 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
604
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
605 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
606 parse_sequence(MSA *msa, char *buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
607 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
608 char *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
609 char *seqname;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
610 char *text;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
611 int seqidx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
612 int len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
613
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
614 s = buf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
615 if ((seqname = sre_strtok(&s, WHITESPACE, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
616 if ((text = sre_strtok(&s, WHITESPACE, &len)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
617
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
618 /* seq usually follows another seq; guess msa->lastidx +1 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
619 seqidx = MSAGetSeqidx(msa, seqname, msa->lastidx+1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
620 msa->lastidx = seqidx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
621
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
622 msa->sqlen[seqidx] = sre_strcat(&(msa->aseq[seqidx]), msa->sqlen[seqidx], text, len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
623 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
624 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
625
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
626
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
627