annotate clustalomega/clustal-omega-1.0.2/src/squid/sqio.c @ 1:bc707542e5de

Uploaded
author clustalomega
date Thu, 21 Jul 2011 13:35:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1 /*****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2 * SQUID - a library of functions for biological sequence analysis
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
3 * Copyright (C) 1992-2002 Washington University School of Medicine
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
4 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
5 * This source code is freely distributed under the terms of the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
7 * for details.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
8 *****************************************************************/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
9
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
10 /* File: sqio.c
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
11 * From: ureadseq.c in Don Gilbert's sequence i/o package
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
12 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
13 * Reads and writes nucleic/protein sequence in various
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
14 * formats. Data files may have multiple sequences.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
15 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
16 * Heavily modified from READSEQ package
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
17 * Copyright (C) 1990 by D.G. Gilbert
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
18 * Biology Dept., Indiana University, Bloomington, IN 47405
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
19 * email: gilbertd@bio.indiana.edu
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
20 * Thanks Don!
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
21 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
22 * SRE: Modifications as noted. Fri Jul 3 09:44:54 1992
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
23 * Packaged for squid, Thu Oct 1 10:07:11 1992
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
24 * ANSI conversion in full swing, Mon Jul 12 12:22:21 1993
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
25 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
26 * CVS $Id: sqio.c,v 1.29 2002/08/26 23:10:52 eddy Exp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
27 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
28 *****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
29 * Basic API for single sequence reading:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
30 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
31 * SQFILE *sqfp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
32 * char *seqfile;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
33 * int format; - see squid.h for formats; example: SQFILE_FASTA
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
34 * char *seq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
35 * SQINFO sqinfo;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
36 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
37 * if ((sqfp = SeqfileOpen(seqfile, format, "BLASTDB")) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
38 * Die("Failed to open sequence database file %s\n%s\n", seqfile, usage);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
39 * while (ReadSeq(sqfp, sqfp->format, &seq, &sqinfo)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
40 * do_stuff;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
41 * FreeSequence(seq, &sqinfo);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
42 * }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
43 * SeqfileClose(sqfp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
44 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
45 *****************************************************************
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
46 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
47
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
48 #include <stdio.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
49 #include <stdlib.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
50 #include <string.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
51 #include <ctype.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
52
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
53 #ifndef SEEK_SET
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
54 #include <unistd.h>
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
55 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
56
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
57 #include "squid.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
58 #include "msa.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
59 #include "ssi.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
60
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
61 static void SeqfileGetLine(SQFILE *V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
62
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
63 #define kStartLength 500
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
64
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
65 static char *aminos = "ABCDEFGHIKLMNPQRSTVWXYZ*";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
66 static char *primenuc = "ACGTUN";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
67 static char *protonly = "EFIPQZ";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
68
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
69 static SQFILE *seqfile_open(char *filename, int format, char *env, int ssimode);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
70
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
71 /* Function: SeqfileOpen()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
72 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
73 * Purpose : Open a sequence database file and prepare for reading
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
74 * sequentially.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
75 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
76 * Args: filename - name of file to open
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
77 * format - format of file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
78 * env - environment variable for path (e.g. BLASTDB)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
79 * ssimode - -1, SSI_OFFSET_I32, or SSI_OFFSET_I64
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
80 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
81 * Returns opened SQFILE ptr, or NULL on failure.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
82 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
83 SQFILE *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
84 SeqfileOpen(char *filename, int format, char *env)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
85 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
86 return seqfile_open(filename, format, env, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
87 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
88 SQFILE *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
89 SeqfileOpenForIndexing(char *filename, int format, char *env, int ssimode)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
90 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
91 return seqfile_open(filename, format, env, ssimode);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
92 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
93 static SQFILE *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
94 seqfile_open(char *filename, int format, char *env, int ssimode)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
95 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
96 SQFILE *dbfp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
97
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
98 dbfp = (SQFILE *) MallocOrDie (sizeof(SQFILE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
99
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
100 dbfp->ssimode = ssimode;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
101 dbfp->rpl = -1; /* flag meaning "unset" */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
102 dbfp->lastrpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
103 dbfp->maxrpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
104 dbfp->bpl = -1; /* flag meaning "unset" */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
105 dbfp->lastbpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
106 dbfp->maxbpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
107
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
108 /* Open our file handle.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
109 * Three possibilities:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
110 * 1. normal file open
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
111 * 2. filename = "-"; read from stdin
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
112 * 3. filename = "*.gz"; read thru pipe from gzip
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
113 * If we're reading from stdin or a pipe, we can't reliably
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
114 * back up, so we can't do two-pass parsers like the interleaved alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
115 * formats.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
116 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
117 if (strcmp(filename, "-") == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
118 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
119 dbfp->f = stdin;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
120 dbfp->do_stdin = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
121 dbfp->do_gzip = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
122 dbfp->fname = sre_strdup("[STDIN]", -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
123 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
124 #ifndef SRE_STRICT_ANSI
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
125 /* popen(), pclose() aren't portable to non-POSIX systems; disable */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
126 else if (Strparse("^.*\\.gz$", filename, 0))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
127 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
128 char cmd[256];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
129
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
130 /* Note that popen() will return "successfully"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
131 * if file doesn't exist, because gzip works fine
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
132 * and prints an error! So we have to check for
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
133 * existence of file ourself.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
134 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
135 if (! FileExists(filename))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
136 Die("%s: file does not exist", filename);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
137
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
138 if (strlen(filename) + strlen("gzip -dc ") >= 256)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
139 Die("filename > 255 char in SeqfileOpen()");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
140 sprintf(cmd, "gzip -dc %s", filename);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
141 if ((dbfp->f = popen(cmd, "r")) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
142 return NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
143
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
144 dbfp->do_stdin = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
145 dbfp->do_gzip = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
146 dbfp->fname = sre_strdup(filename, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
147 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
148 #endif /*SRE_STRICT_ANSI*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
149 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
150 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
151 if ((dbfp->f = fopen(filename, "r")) == NULL &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
152 (dbfp->f = EnvFileOpen(filename, env, NULL)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
153 return NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
154
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
155 dbfp->do_stdin = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
156 dbfp->do_gzip = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
157 dbfp->fname = sre_strdup(filename, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
158 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
159
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
160
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
161 /* Invoke autodetection if we haven't already been told what
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
162 * to expect.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
163 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
164 if (format == SQFILE_UNKNOWN)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
165 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
166 if (dbfp->do_stdin == TRUE || dbfp->do_gzip)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
167 Die("Can't autodetect sequence file format from a stdin or gzip pipe");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
168 format = SeqfileFormat(dbfp->f);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
169 if (format == SQFILE_UNKNOWN)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
170 Die("Can't determine format of sequence file %s", dbfp->fname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
171 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
172
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
173 /* The hack for sequential access of an interleaved alignment file:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
174 * read the alignment in, we'll copy sequences out one at a time.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
175 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
176 dbfp->msa = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
177 dbfp->afp = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
178 dbfp->format = format;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
179 dbfp->linenumber = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
180 dbfp->buf = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
181 dbfp->buflen = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
182 if (IsAlignmentFormat(format))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
183 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
184 /* We'll be reading from the MSA interface. Copy our data
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
185 * to the MSA afp's structure.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
186 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
187 dbfp->afp = MallocOrDie(sizeof(MSAFILE));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
188 dbfp->afp->f = dbfp->f; /* just a ptr, don't close */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
189 dbfp->afp->do_stdin = dbfp->do_stdin;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
190 dbfp->afp->do_gzip = dbfp->do_gzip;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
191 dbfp->afp->fname = dbfp->fname; /* just a ptr, don't free */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
192 dbfp->afp->format = dbfp->format; /* e.g. format */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
193 dbfp->afp->linenumber = dbfp->linenumber; /* e.g. 0 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
194 dbfp->afp->buf = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
195 dbfp->afp->buflen = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
196
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
197 if ((dbfp->msa = MSAFileRead(dbfp->afp)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
198 Die("Failed to read any alignment data from file %s", dbfp->fname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
199 /* hack: overload/reuse msa->lastidx; indicates
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
200 next seq to return upon a ReadSeq() call */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
201 dbfp->msa->lastidx = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
202
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
203 return dbfp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
204 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
205
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
206 /* Load the first line.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
207 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
208 SeqfileGetLine(dbfp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
209 return dbfp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
210 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
211
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
212 /* Function: SeqfilePosition()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
213 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
214 * Purpose: Move to a particular offset in a seqfile.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
215 * Will not work on alignment files.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
216 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
217 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
218 SeqfilePosition(SQFILE *sqfp, SSIOFFSET *offset)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
219 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
220 if (sqfp->do_stdin || sqfp->do_gzip || IsAlignmentFormat(sqfp->format))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
221 Die("SeqfilePosition() failed: in a nonrewindable data file or stream");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
222
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
223 if (SSISetFilePosition(sqfp->f, offset) != 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
224 Die("SSISetFilePosition failed, but that shouldn't happen.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
225 SeqfileGetLine(sqfp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
226 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
227
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
228
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
229 /* Function: SeqfileRewind()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
230 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
231 * Purpose: Set a sequence file back to the first sequence.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
232 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
233 * Won't work on alignment files. Although it would
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
234 * seem that it could (just set msa->lastidx back to 0),
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
235 * that'll fail on "multiple multiple" alignment file formats
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
236 * (e.g. Stockholm).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
237 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
238 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
239 SeqfileRewind(SQFILE *sqfp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
240 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
241 if (sqfp->do_stdin || sqfp->do_gzip)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
242 Die("SeqfileRewind() failed: in a nonrewindable data file or stream");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
243
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
244 rewind(sqfp->f);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
245 SeqfileGetLine(sqfp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
246 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
247
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
248 /* Function: SeqfileLineParameters()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
249 * Date: SRE, Thu Feb 15 17:00:41 2001 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
250 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
251 * Purpose: After all the sequences have been read from the file,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
252 * but before closing it, retrieve overall bytes-per-line and
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
253 * residues-per-line info. If non-zero, these mean that
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
254 * the file contains homogeneous sequence line lengths (except
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
255 * the last line in each record).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
256 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
257 * If either of bpl or rpl is determined to be inhomogeneous,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
258 * both are returned as 0.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
259 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
260 * Args: *sqfp - an open but fully read sequence file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
261 * ret_bpl - RETURN: bytes per line, or 0 if inhomogeneous
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
262 * ret_rpl - RETURN: residues per line, or 0 if inhomogenous.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
263 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
264 * Returns: void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
265 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
266 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
267 SeqfileLineParameters(SQFILE *V, int *ret_bpl, int *ret_rpl)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
268 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
269 if (V->rpl > 0 && V->maxrpl == V->rpl &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
270 V->bpl > 0 && V->maxbpl == V->bpl) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
271 *ret_bpl = V->bpl;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
272 *ret_rpl = V->rpl;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
273 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
274 *ret_bpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
275 *ret_rpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
276 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
277 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
278
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
279
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
280 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
281 SeqfileClose(SQFILE *sqfp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
282 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
283 /* note: don't test for sqfp->msa being NULL. Now that
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
284 * we're holding afp open and allowing access to multi-MSA
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
285 * databases (e.g. Stockholm format, Pfam), msa ends
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
286 * up being NULL when we run out of alignments.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
287 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
288 if (sqfp->afp != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
289 if (sqfp->msa != NULL) MSAFree(sqfp->msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
290 if (sqfp->afp->buf != NULL) free(sqfp->afp->buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
291 free(sqfp->afp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
292 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
293 #ifndef SRE_STRICT_ANSI /* gunzip functionality only on POSIX systems */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
294 if (sqfp->do_gzip) pclose(sqfp->f);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
295 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
296 else if (! sqfp->do_stdin) fclose(sqfp->f);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
297 if (sqfp->buf != NULL) free(sqfp->buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
298 if (sqfp->fname != NULL) free(sqfp->fname);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
299 free(sqfp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
300 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
301
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
302
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
303 /* Function: SeqfileGetLine()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
304 * Date: SRE, Tue Jun 22 09:15:49 1999 [Sanger Centre]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
305 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
306 * Purpose: read a line from a sequence file into V->buf
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
307 * If the fgets() is NULL, sets V->buf[0] to '\0'.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
308 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
309 * Args: V
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
310 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
311 * Returns: void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
312 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
313 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
314 SeqfileGetLine(SQFILE *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
315 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
316 if (V->ssimode >= 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
317 if (0 != SSIGetFilePosition(V->f, V->ssimode, &(V->ssioffset)))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
318 Die("SSIGetFilePosition() failed");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
319 if (sre_fgets(&(V->buf), &(V->buflen), V->f) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
320 *(V->buf) = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
321 V->linenumber++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
322 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
323
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
324
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
325 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
326 FreeSequence(char *seq, SQINFO *sqinfo)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
327 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
328 if (seq != NULL) free(seq); /* FS, r244, here is potential problem in profile/profile */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
329 if (sqinfo->flags & SQINFO_SS){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
330 if (NULL != sqinfo->ss){ /* FS, r244 -> r245 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
331 free(sqinfo->ss);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
332 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
333 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
334 if (sqinfo->flags & SQINFO_SA){
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
335 if (NULL != sqinfo->sa){ /* FS, r244 -> r245 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
336 free(sqinfo->sa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
337 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
338 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
339 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
340
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
341 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
342 SetSeqinfoString(SQINFO *sqinfo, char *sptr, int flag)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
343 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
344 int len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
345 int pos;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
346
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
347 /* silently ignore NULL. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
348 if (sptr == NULL) return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
349
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
350 while (*sptr == ' ') sptr++; /* ignore leading whitespace */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
351 for (pos = strlen(sptr)-1; pos >= 0; pos--)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
352 if (! isspace((int) sptr[pos])) break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
353 sptr[pos+1] = '\0'; /* ignore trailing whitespace */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
354
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
355 switch (flag) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
356 case SQINFO_NAME:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
357 if (*sptr != '-')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
358 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
359 strncpy(sqinfo->name, sptr, SQINFO_NAMELEN-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
360 sqinfo->name[SQINFO_NAMELEN-1] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
361 sqinfo->flags |= SQINFO_NAME;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
362 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
363 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
364
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
365 case SQINFO_ID:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
366 if (*sptr != '-')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
367 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
368 strncpy(sqinfo->id, sptr, SQINFO_NAMELEN-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
369 sqinfo->id[SQINFO_NAMELEN-1] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
370 sqinfo->flags |= SQINFO_ID;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
371 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
372 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
373
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
374 case SQINFO_ACC:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
375 if (*sptr != '-')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
376 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
377 strncpy(sqinfo->acc, sptr, SQINFO_NAMELEN-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
378 sqinfo->acc[SQINFO_NAMELEN-1] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
379 sqinfo->flags |= SQINFO_ACC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
380 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
381 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
382
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
383 case SQINFO_DESC:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
384 if (*sptr != '-')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
385 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
386 if (sqinfo->flags & SQINFO_DESC) /* append? */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
387 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
388 len = strlen(sqinfo->desc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
389 if (len < SQINFO_DESCLEN-2) /* is there room? */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
390 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
391 strncat(sqinfo->desc, " ", SQINFO_DESCLEN-1-len); len++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
392 strncat(sqinfo->desc, sptr, SQINFO_DESCLEN-1-len);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
393 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
394 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
395 else /* else copy */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
396 strncpy(sqinfo->desc, sptr, SQINFO_DESCLEN-1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
397 sqinfo->desc[SQINFO_DESCLEN-1] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
398 sqinfo->flags |= SQINFO_DESC;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
399 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
400 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
401
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
402 case SQINFO_START:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
403 if (!IsInt(sptr)) { squid_errno = SQERR_FORMAT; return 0; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
404 sqinfo->start = atoi(sptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
405 if (sqinfo->start != 0) sqinfo->flags |= SQINFO_START;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
406 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
407
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
408 case SQINFO_STOP:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
409 if (!IsInt(sptr)) { squid_errno = SQERR_FORMAT; return 0; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
410 sqinfo->stop = atoi(sptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
411 if (sqinfo->stop != 0) sqinfo->flags |= SQINFO_STOP;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
412 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
413
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
414 case SQINFO_OLEN:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
415 if (!IsInt(sptr)) { squid_errno = SQERR_FORMAT; return 0; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
416 sqinfo->olen = atoi(sptr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
417 if (sqinfo->olen != 0) sqinfo->flags |= SQINFO_OLEN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
418 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
419
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
420 default:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
421 Die("Invalid flag %d to SetSeqinfoString()", flag);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
422 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
423 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
424 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
425
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
426 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
427 SeqinfoCopy(SQINFO *sq1, SQINFO *sq2)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
428 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
429 sq1->flags = sq2->flags;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
430 if (sq2->flags & SQINFO_NAME) strcpy(sq1->name, sq2->name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
431 if (sq2->flags & SQINFO_ID) strcpy(sq1->id, sq2->id);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
432 if (sq2->flags & SQINFO_ACC) strcpy(sq1->acc, sq2->acc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
433 if (sq2->flags & SQINFO_DESC) strcpy(sq1->desc, sq2->desc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
434 if (sq2->flags & SQINFO_LEN) sq1->len = sq2->len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
435 if (sq2->flags & SQINFO_START) sq1->start = sq2->start;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
436 if (sq2->flags & SQINFO_STOP) sq1->stop = sq2->stop;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
437 if (sq2->flags & SQINFO_OLEN) sq1->olen = sq2->olen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
438 if (sq2->flags & SQINFO_TYPE) sq1->type = sq2->type;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
439 if (sq2->flags & SQINFO_SS) sq1->ss = Strdup(sq2->ss);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
440 if (sq2->flags & SQINFO_SA) sq1->sa = Strdup(sq2->sa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
441 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
442
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
443 /* Function: ToDNA()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
444 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
445 * Purpose: Convert a sequence to DNA.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
446 * U --> T
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
447 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
448 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
449 ToDNA(char *seq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
450 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
451 for (; *seq != '\0'; seq++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
452 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
453 if (*seq == 'U') *seq = 'T';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
454 else if (*seq == 'u') *seq = 't';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
455 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
456 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
457
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
458 /* Function: ToRNA()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
459 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
460 * Purpose: Convert a sequence to RNA.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
461 * T --> U
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
462 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
463 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
464 ToRNA(char *seq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
465 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
466 for (; *seq != '\0'; seq++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
467 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
468 if (*seq == 'T') *seq = 'U';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
469 else if (*seq == 't') *seq = 'u';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
470 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
471 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
472
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
473
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
474 /* Function: ToIUPAC()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
475 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
476 * Purpose: Convert X's, o's, other junk in a nucleic acid sequence to N's,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
477 * to comply with IUPAC code. If is_aseq is TRUE, will allow gap
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
478 * characters though, so we can call ToIUPAC() on aligned seqs.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
479 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
480 * NUCLEOTIDES is defined in squid.h as:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
481 * "ACGTUNRYMKSWHBVDacgtunrymkswhbvd"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
482 * gap chars allowed by isgap() are defined in squid.h as:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
483 * " ._-~"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
484 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
485 * WU-BLAST's pressdb will
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
486 * choke on X's, for instance, necessitating conversion
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
487 * of certain genome centers' data.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
488 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
489 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
490 ToIUPAC(char *seq, int is_aseq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
491 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
492 if (is_aseq) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
493 for (; *seq != '\0'; seq++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
494 if (strchr(NUCLEOTIDES, *seq) == NULL && ! isgap(*seq)) *seq = 'N';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
495 } else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
496 for (; *seq != '\0'; seq++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
497 if (strchr(NUCLEOTIDES, *seq) == NULL) *seq = 'N';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
498 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
499 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
500
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
501
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
502 /* Function: addseq()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
503 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
504 * Purpose: Add a line of sequence to the growing string in V.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
505 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
506 * In the seven supported unaligned formats, all sequence
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
507 * lines may contain whitespace that must be filtered out;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
508 * four formats (PIR, EMBL, Genbank, GCG) include coordinates
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
509 * that must be filtered out. Thus an (!isdigit && !isspace)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
510 * test on each character before we accept it.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
511 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
512 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
513 addseq(char *s, struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
514 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
515 char *s0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
516 char *sq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
517 int rpl; /* valid residues per line */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
518 int bpl; /* characters per line */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
519
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
520 if (V->ssimode == -1)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
521 { /* Normal mode: keeping the seq */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
522 /* Make sure we have enough room. We know that s is <= buflen,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
523 * so just make sure we've got room for a whole new buflen worth
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
524 * of sequence.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
525 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
526 if (V->seqlen + V->buflen > V->maxseq) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
527 V->maxseq += MAX(V->buflen, kStartLength);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
528 V->seq = ReallocOrDie (V->seq, V->maxseq+1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
529 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
530
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
531 sq = V->seq + V->seqlen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
532 while (*s != 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
533 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
534 if (! isdigit((int) *s) && ! isspace((int) *s) && isprint((int) *s)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
535 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
536 if (! isdigit((int) *s) && ! isspace((int) *s)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
537 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
538 *sq = *s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
539 sq++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
540 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
541 s++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
542 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
543 V->seqlen = sq - V->seq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
544 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
545 else /* else: indexing mode, discard the seq */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
546 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
547 s0 = s;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
548 rpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
549 while (*s != 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
550 if (! isdigit((int) *s) && ! isspace((int) *s)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
551 rpl++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
552 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
553 s++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
554 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
555 V->seqlen += rpl;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
556 bpl = s - s0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
557
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
558 /* Keep track of the global rpl, bpl for the file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
559 * This is overly complicated because we have to
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
560 * allow the last line of each record (e.g. the last addseq() call
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
561 * on each sequence) to have a different length - and sometimes
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
562 * we'll have one-line sequence records, too. Thus we only
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
563 * do something with the global V->rpl when we have *passed over*
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
564 * a line - we keep the last line's rpl in last_rpl. And because
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
565 * a file might consist entirely of single-line records, we keep
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
566 * a third guy, maxrpl, that tells us the maximum rpl of any line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
567 * in the file. If we reach the end of file and rpl is still unset,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
568 * we'll set it to maxrpl. If we reach eof and rpl is set, but is
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
569 * less than maxrpl, that's a weird case where a last line in some
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
570 * record is longer than every other line.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
571 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
572 if (V->rpl != 0) { /* 0 means we already know rpl is invalid */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
573 if (V->lastrpl > 0) { /* we're on something that's not the first line */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
574 if (V->rpl > 0 && V->lastrpl != V->rpl) V->rpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
575 else if (V->rpl == -1) V->rpl = V->lastrpl;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
576 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
577 V->lastrpl = rpl;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
578 if (rpl > V->maxrpl) V->maxrpl = rpl; /* make sure we check max length of final lines */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
579 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
580 if (V->bpl != 0) { /* 0 means we already know bpl is invalid */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
581 if (V->lastbpl > 0) { /* we're on something that's not the first line */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
582 if (V->bpl > 0 && V->lastbpl != V->bpl) V->bpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
583 else if (V->bpl == -1) V->bpl = V->lastbpl;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
584 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
585 V->lastbpl = bpl;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
586 if (bpl > V->maxbpl) V->maxbpl = bpl; /* make sure we check max length of final lines */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
587 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
588 } /* end of indexing mode of addseq(). */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
589
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
590 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
591
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
592 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
593 readLoop(int addfirst, int (*endTest)(char *,int *), struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
594 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
595 int addend = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
596 int done = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
597
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
598 V->seqlen = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
599 V->lastrpl = V->lastbpl = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
600 if (addfirst) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
601 if (V->ssimode >= 0) V->d_off = V->ssioffset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
602 addseq(V->buf, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
603 } else if (V->ssimode >= 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
604 if (0 != SSIGetFilePosition(V->f, V->ssimode, &(V->d_off)))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
605 Die("SSIGetFilePosition() failed");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
606
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
607 do {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
608 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
609 /* feof() alone is a bug; files not necessarily \n terminated */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
610 if (*(V->buf) == '\0' && feof(V->f))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
611 done = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
612 done |= (*endTest)(V->buf, &addend);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
613 if (addend || !done)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
614 addseq(V->buf, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
615 } while (!done);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
616 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
617
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
618
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
619 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
620 endPIR(char *s, int *addend)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
621 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
622 *addend = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
623 if ((strncmp(s, "///", 3) == 0) ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
624 (strncmp(s, "ENTRY", 5) == 0))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
625 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
626 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
627 return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
628 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
629
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
630 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
631 readPIR(struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
632 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
633 char *sptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
634 /* load first line of entry */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
635 while (!feof(V->f) && strncmp(V->buf, "ENTRY", 5) != 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
636 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
637 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
638 if (feof(V->f)) return;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
639 if (V->ssimode >= 0) V->r_off = V->ssioffset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
640
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
641 if ((sptr = strtok(V->buf + 15, "\n\t ")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
642 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
643 SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
644 SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
645 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
646 do {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
647 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
648 if (!feof(V->f) && strncmp(V->buf, "TITLE", 5) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
649 SetSeqinfoString(V->sqinfo, V->buf+15, SQINFO_DESC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
650 else if (!feof(V->f) && strncmp(V->buf, "ACCESSION", 9) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
651 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
652 if ((sptr = strtok(V->buf+15, " \t\n")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
653 SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
654 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
655 } while (! feof(V->f) && (strncmp(V->buf,"SEQUENCE", 8) != 0));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
656 SeqfileGetLine(V); /* skip next line, coords */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
657
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
658 readLoop(0, endPIR, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
659
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
660 /* reading a real PIR-CODATA database file, we keep the source coords
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
661 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
662 V->sqinfo->start = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
663 V->sqinfo->stop = V->seqlen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
664 V->sqinfo->olen = V->seqlen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
665 V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
666
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
667 /* get next line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
668 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
669 while (!feof(V->f) && strncmp(V->buf, "ENTRY", 5) != 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
670 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
671 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
672 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
673
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
674
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
675
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
676 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
677 endIG(char *s, int *addend)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
678 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
679 *addend = 1; /* 1 or 2 occur in line w/ bases */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
680 return((strchr(s,'1')!=NULL) || (strchr(s,'2')!=NULL));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
681 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
682
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
683 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
684 readIG(struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
685 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
686 char *nm;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
687 /* position past ';' comments */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
688 do {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
689 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
690 } while (! (feof(V->f) || ((*V->buf != 0) && (*V->buf != ';')) ));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
691
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
692 if (!feof(V->f))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
693 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
694 if ((nm = strtok(V->buf, "\n\t ")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
695 SetSeqinfoString(V->sqinfo, nm, SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
696
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
697 readLoop(0, endIG, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
698 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
699
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
700 while (!(feof(V->f) || ((*V->buf != '\0') && (*V->buf == ';'))))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
701 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
702 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
703
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
704 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
705 endStrider(char *s, int *addend)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
706 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
707 *addend = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
708 return (strstr( s, "//") != NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
709 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
710
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
711 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
712 readStrider(struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
713 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
714 char *nm;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
715
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
716 while ((!feof(V->f)) && (*V->buf == ';'))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
717 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
718 if (strncmp(V->buf,"; DNA sequence", 14) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
719 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
720 if ((nm = strtok(V->buf+16, ",\n\t ")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
721 SetSeqinfoString(V->sqinfo, nm, SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
722 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
723 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
724 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
725
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
726 if (! feof(V->f))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
727 readLoop(1, endStrider, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
728
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
729 /* load next line
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
730 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
731 while ((!feof(V->f)) && (*V->buf != ';'))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
732 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
733 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
734
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
735
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
736 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
737 endGB(char *s, int *addend)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
738 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
739 *addend = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
740 return ((strstr(s,"//") != NULL) || (strstr(s,"LOCUS") == s));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
741 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
742
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
743 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
744 readGenBank(struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
745 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
746 char *sptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
747 int in_definition;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
748
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
749 /* We'll map three genbank identifiers onto names:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
750 * LOCUS -> sqinfo.name
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
751 * ACCESSION -> sqinfo.acc [primary accession only]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
752 * VERSION -> sqinfo.id
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
753 * We don't currently store the GI number, or secondary accessions.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
754 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
755 while (strncmp(V->buf, "LOCUS", 5) != 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
756 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
757 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
758 if (V->ssimode >= 0) V->r_off = V->ssioffset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
759
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
760 if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
761 SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
762
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
763 in_definition = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
764 while (! feof(V->f))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
765 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
766 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
767 if (! feof(V->f) && strstr(V->buf, "DEFINITION") == V->buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
768 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
769 if ((sptr = strtok(V->buf+12, "\n")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
770 SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
771 in_definition = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
772 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
773 else if (! feof(V->f) && strstr(V->buf, "ACCESSION") == V->buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
774 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
775 if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
776 SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
777 in_definition = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
778 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
779 else if (! feof(V->f) && strstr(V->buf, "VERSION") == V->buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
780 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
781 if ((sptr = strtok(V->buf+12, "\n\t ")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
782 SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
783 in_definition = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
784 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
785 else if (strncmp(V->buf,"ORIGIN", 6) != 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
786 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
787 if (in_definition)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
788 SetSeqinfoString(V->sqinfo, V->buf, SQINFO_DESC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
789 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
790 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
791 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
792 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
793
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
794 readLoop(0, endGB, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
795
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
796 /* reading a real GenBank database file, we keep the source coords
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
797 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
798 V->sqinfo->start = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
799 V->sqinfo->stop = V->seqlen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
800 V->sqinfo->olen = V->seqlen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
801 V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
802
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
803
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
804 while (!(feof(V->f) || ((*V->buf!=0) && (strstr(V->buf,"LOCUS") == V->buf))))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
805 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
806 /* SRE: V->s now holds "//", so sequential
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
807 reads are wedged: fixed Tue Jul 13 1993 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
808 while (!feof(V->f) && strstr(V->buf, "LOCUS ") != V->buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
809 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
810 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
811
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
812 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
813 endGCGdata(char *s, int *addend)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
814 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
815 *addend = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
816 return (*s == '>');
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
817 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
818
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
819 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
820 readGCGdata(struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
821 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
822 int binary = FALSE; /* whether data are binary or not */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
823 int blen = 0; /* length of binary sequence */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
824
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
825 /* first line contains ">>>>" followed by name */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
826 if (Strparse(">>>>([^ ]+) .+2BIT +Len: ([0-9]+)", V->buf, 2))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
827 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
828 binary = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
829 SetSeqinfoString(V->sqinfo, sqd_parse[1], SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
830 blen = atoi(sqd_parse[2]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
831 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
832 else if (Strparse(">>>>([^ ]+) .+ASCII +Len: [0-9]+", V->buf, 1))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
833 SetSeqinfoString(V->sqinfo, sqd_parse[1], SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
834 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
835 Die("bogus GCGdata format? %s", V->buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
836
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
837 /* second line contains free text description */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
838 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
839 SetSeqinfoString(V->sqinfo, V->buf, SQINFO_DESC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
840
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
841 if (binary) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
842 /* allocate for blen characters +3... (allow for 3 bytes of slop) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
843 if (blen >= V->maxseq) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
844 V->maxseq = blen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
845 if ((V->seq = (char *) realloc (V->seq, sizeof(char)*(V->maxseq+4)))==NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
846 Die("malloc failed");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
847 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
848 /* read (blen+3)/4 bytes from file */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
849 if (fread(V->seq, sizeof(char), (blen+3)/4, V->f) < (size_t) ((blen+3)/4))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
850 Die("fread failed");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
851 V->seqlen = blen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
852 /* convert binary code to seq */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
853 GCGBinaryToSequence(V->seq, blen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
854 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
855 else readLoop(0, endGCGdata, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
856
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
857 while (!(feof(V->f) || ((*V->buf != 0) && (*V->buf == '>'))))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
858 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
859 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
860
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
861 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
862 endPearson(char *s, int *addend)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
863 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
864 *addend = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
865 return(*s == '>');
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
866 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
867
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
868 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
869 readPearson(struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
870 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
871 char *sptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
872
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
873 if (V->ssimode >= 0) V->r_off = V->ssioffset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
874
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
875 if (*V->buf != '>')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
876 Die("\
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
877 File %s does not appear to be in FASTA format at line %d.\n\
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
878 You may want to specify the file format on the command line.\n\
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
879 Usually this is done with an option --informat <fmt>.\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
880 V->fname, V->linenumber);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
881
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
882 if ((sptr = strtok(V->buf+1, "\n\t ")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
883 SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
884 if ((sptr = strtok(NULL, "\n")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
885 SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
886
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
887 readLoop(0, endPearson, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
888
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
889 while (!(feof(V->f) || ((*V->buf != 0) && (*V->buf == '>')))) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
890 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
891 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
892 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
893
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
894
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
895 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
896 endEMBL(char *s, int *addend)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
897 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
898 *addend = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
899 /* Some people (Berlin 5S rRNA database, f'r instance) use
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
900 * an extended EMBL format that attaches extra data after
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
901 * the sequence -- watch out for that. We use the fact that
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
902 * real EMBL sequence lines begin with five spaces.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
903 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
904 * We can use this as the sole end test because readEMBL() will
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
905 * advance to the next ID line before starting to read again.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
906 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
907 return (strncmp(s," ",5) != 0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
908 /* return ((strstr(s,"//") != NULL) || (strstr(s,"ID ") == s)); */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
909 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
910
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
911 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
912 readEMBL(struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
913 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
914 char *sptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
915
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
916 /* make sure we have first line */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
917 while (!feof(V->f) && strncmp(V->buf, "ID ", 4) != 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
918 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
919 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
920 if (V->ssimode >= 0) V->r_off = V->ssioffset;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
921
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
922 if ((sptr = strtok(V->buf+5, "\n\t ")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
923 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
924 SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
925 SetSeqinfoString(V->sqinfo, sptr, SQINFO_ID);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
926 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
927
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
928 do {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
929 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
930 if (!feof(V->f) && strstr(V->buf, "AC ") == V->buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
931 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
932 if ((sptr = strtok(V->buf+5, "; \t\n")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
933 SetSeqinfoString(V->sqinfo, sptr, SQINFO_ACC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
934 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
935 else if (!feof(V->f) && strstr(V->buf, "DE ") == V->buf)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
936 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
937 if ((sptr = strtok(V->buf+5, "\n")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
938 SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
939 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
940 } while (! feof(V->f) && strncmp(V->buf,"SQ",2) != 0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
941
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
942 readLoop(0, endEMBL, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
943
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
944 /* Hack for Staden experiment files: convert - to N
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
945 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
946 if (V->ssimode == -1) /* if we're in ssi mode, we're not keeping the seq */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
947 for (sptr = V->seq; *sptr != '\0'; sptr++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
948 if (*sptr == '-') *sptr = 'N';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
949
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
950 /* reading a real EMBL database file, we keep the source coords
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
951 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
952 V->sqinfo->start = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
953 V->sqinfo->stop = V->seqlen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
954 V->sqinfo->olen = V->seqlen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
955 V->sqinfo->flags |= SQINFO_START | SQINFO_STOP | SQINFO_OLEN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
956
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
957 /* load next record's ID line */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
958 while (!feof(V->f) && strncmp(V->buf, "ID ", 4) != 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
959 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
960 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
961
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
962 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
963
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
964
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
965 static int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
966 endZuker(char *s, int *addend)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
967 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
968 *addend = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
969 return( *s == '(' );
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
970 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
971
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
972 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
973 readZuker(struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
974 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
975 char *sptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
976
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
977 SeqfileGetLine(V); /*s == "seqLen seqid string..."*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
978
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
979 if ((sptr = strtok(V->buf+6, " \t\n")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
980 SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
981
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
982 if ((sptr = strtok(NULL, "\n")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
983 SetSeqinfoString(V->sqinfo, sptr, SQINFO_DESC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
984
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
985 readLoop(0, endZuker, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
986
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
987 while (!(feof(V->f) | ((*V->buf != '\0') & (*V->buf == '('))))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
988 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
989 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
990
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
991 static void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
992 readUWGCG(struct ReadSeqVars *V)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
993 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
994 char *si;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
995 char *sptr;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
996 int done;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
997
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
998 V->seqlen = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
999
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1000 /*writeseq: " %s Length: %d (today) Check: %d ..\n" */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1001 /*drop above or ".." from id*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1002 if ((si = strstr(V->buf," Length: ")) != NULL) *si = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1003 else if ((si = strstr(V->buf,"..")) != NULL) *si = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1004
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1005 if ((sptr = strtok(V->buf, "\n\t ")) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1006 SetSeqinfoString(V->sqinfo, sptr, SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1007
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1008 do {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1009 done = feof(V->f);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1010 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1011 if (! done) addseq(V->buf, V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1012 } while (!done);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1013 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1014
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1015
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1016 /* Function: ReadSeq()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1017 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1018 * Purpose: Read next sequence from an open database file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1019 * Return the sequence and associated info.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1020 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1021 * Args: fp - open sequence database file pointer
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1022 * format - format of the file (previously determined
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1023 * by call to SeqfileFormat()).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1024 * Currently unused, since we carry it in V.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1025 * ret_seq - RETURN: sequence
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1026 * sqinfo - RETURN: filled in w/ other information
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1027 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1028 * Limitations: uses squid_errno, so it's not threadsafe.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1029 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1030 * Return: 1 on success, 0 on failure.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1031 * ret_seq and some field of sqinfo are allocated here,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1032 * The preferred call mechanism to properly free the memory is:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1033 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1034 * SQINFO sqinfo;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1035 * char *seq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1036 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1037 * ReadSeq(fp, format, &seq, &sqinfo);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1038 * ... do something...
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1039 * FreeSequence(seq, &sqinfo);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1040 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1041 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1042 ReadSeq(SQFILE *V, int format, char **ret_seq, SQINFO *sqinfo)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1043 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1044 int gotuw;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1045
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1046 squid_errno = SQERR_OK;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1047
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1048 /* Here's the hack for sequential access of sequences from
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1049 * the multiple sequence alignment formats
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1050 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1051 if (IsAlignmentFormat(V->format))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1052 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1053 if (V->msa->lastidx >= V->msa->nseq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1054 { /* out of data. try to read another alignment */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1055 MSAFree(V->msa);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1056 if ((V->msa = MSAFileRead(V->afp)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1057 return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1058 V->msa->lastidx = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1059 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1060 /* copy and dealign the appropriate aligned seq */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1061 /* AW: stopping squid from dealigning sequences and corresponding info */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1062 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1063 V->seq = sre_strdup(V->msa->aseq[V->msa->lastidx], V->msa->alen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1064 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1065 MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1066 V->msa->aseq[V->msa->lastidx], &(V->seq));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1067 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1068 V->seqlen = strlen(V->seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1069
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1070 /* Extract sqinfo stuff for this sequence from the msa.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1071 * Tedious; code that should be cleaned.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1072 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1073 sqinfo->flags = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1074 if (V->msa->sqname[V->msa->lastidx] != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1075 SetSeqinfoString(sqinfo, V->msa->sqname[V->msa->lastidx], SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1076 if (V->msa->sqacc != NULL && V->msa->sqacc[V->msa->lastidx] != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1077 SetSeqinfoString(sqinfo, V->msa->sqacc[V->msa->lastidx], SQINFO_ACC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1078 if (V->msa->sqdesc != NULL && V->msa->sqdesc[V->msa->lastidx] != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1079 SetSeqinfoString(sqinfo, V->msa->sqdesc[V->msa->lastidx], SQINFO_DESC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1080 if (V->msa->ss != NULL && V->msa->ss[V->msa->lastidx] != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1081 /* AW: stopping squid from dealigning sequences and corresponding info */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1082 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1083 sqinfo->ss = sre_strdup(V->msa->ss[V->msa->lastidx], V->msa->alen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1084 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1085 MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1086 V->msa->ss[V->msa->lastidx], &(sqinfo->ss));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1087 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1088 sqinfo->flags |= SQINFO_SS;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1089 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1090 if (V->msa->sa != NULL && V->msa->sa[V->msa->lastidx] != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1091 /* AW: stopping squid from dealigning sequences and corresponding info */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1092 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1093 sqinfo->sa = sre_strdup(V->msa->sa[V->msa->lastidx], V->msa->alen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1094 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1095 MakeDealignedString(V->msa->aseq[V->msa->lastidx], V->msa->alen,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1096 V->msa->sa[V->msa->lastidx], &(sqinfo->sa));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1097 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1098 sqinfo->flags |= SQINFO_SA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1099 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1100 V->msa->lastidx++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1101 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1102 else {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1103 if (feof(V->f)) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1104
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1105 if (V->ssimode == -1) { /* normal mode */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1106 V->seq = (char*) calloc (kStartLength+1, sizeof(char));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1107 V->maxseq = kStartLength;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1108 } else { /* index mode: discarding seq */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1109 V->seq = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1110 V->maxseq = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1111 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1112 V->seqlen = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1113 V->sqinfo = sqinfo;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1114 V->sqinfo->flags = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1115
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1116 switch (V->format) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1117 case SQFILE_IG : readIG(V); break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1118 case SQFILE_STRIDER : readStrider(V); break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1119 case SQFILE_GENBANK : readGenBank(V); break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1120 case SQFILE_FASTA : readPearson(V); break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1121 case SQFILE_EMBL : readEMBL(V); break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1122 case SQFILE_ZUKER : readZuker(V); break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1123 case SQFILE_PIR : readPIR(V); break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1124 case SQFILE_GCGDATA : readGCGdata(V); break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1125
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1126 case SQFILE_GCG :
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1127 do { /* skip leading comments on GCG file */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1128 gotuw = (strstr(V->buf,"..") != NULL);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1129 if (gotuw) readUWGCG(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1130 SeqfileGetLine(V);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1131 } while (! feof(V->f));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1132 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1133
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1134 case SQFILE_IDRAW: /* SRE: no attempt to read idraw postscript */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1135 default:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1136 squid_errno = SQERR_FORMAT;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1137 free(V->seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1138 return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1139 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1140 if (V->seq != NULL) /* (it can be NULL in indexing mode) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1141 V->seq[V->seqlen] = 0; /* stick a string terminator on it */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1142 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1143
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1144 /* Cleanup
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1145 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1146 sqinfo->len = V->seqlen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1147 sqinfo->flags |= SQINFO_LEN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1148 *ret_seq = V->seq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1149 if (squid_errno == SQERR_OK) return 1; else return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1150 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1151
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1152 /* Function: SeqfileFormat()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1153 * Date: SRE, Tue Jun 22 10:58:58 1999 [Sanger Centre]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1154 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1155 * Purpose: Determine format of an open file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1156 * Returns format code.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1157 * Rewinds the file.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1158 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1159 * Autodetects the following unaligned formats:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1160 * SQFILE_FASTA
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1161 * SQFILE_GENBANK
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1162 * SQFILE_EMBL
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1163 * SQFILE_GCG
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1164 * SQFILE_GCGDATA
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1165 * SQFILE_PIR
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1166 * Also autodetects the following alignment formats:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1167 * MSAFILE_STOCKHOLM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1168 * MSAFILE_MSF
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1169 * MSAFILE_CLUSTAL
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1170 * MSAFILE_SELEX
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1171 * MSAFILE_PHYLIP
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1172 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1173 * Can't autodetect MSAFILE_A2M, calls it SQFILE_FASTA.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1174 * MSAFileFormat() does the opposite.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1175 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1176 * Args: sfp - open SQFILE
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1177 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1178 * Return: format code, or SQFILE_UNKNOWN if unrecognized
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1179 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1180 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1181 SeqfileFormat(FILE *fp)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1182 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1183 char *buf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1184 int len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1185 int fmt = SQFILE_UNKNOWN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1186 int ndataline;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1187 char *bufcpy, *s, *s1, *s2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1188 int has_junk;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1189
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1190 buf = NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1191 len = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1192 ndataline = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1193 has_junk = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1194 while (sre_fgets(&buf, &len, fp) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1195 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1196 if (IsBlankline(buf)) continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1197
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1198 /* Well-behaved formats identify themselves in first nonblank line.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1199 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1200 if (ndataline == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1201 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1202 if (strncmp(buf, ">>>>", 4) == 0 && strstr(buf, "Len: "))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1203 { fmt = SQFILE_GCGDATA; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1204
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1205 if (buf[0] == '>')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1206 { fmt = SQFILE_FASTA; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1207
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1208 if (strncmp(buf, "!!AA_SEQUENCE", 13) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1209 strncmp(buf, "!!NA_SEQUENCE", 13) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1210 { fmt = SQFILE_GCG; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1211
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1212 if (strncmp(buf, "# STOCKHOLM 1.", 14) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1213 { fmt = MSAFILE_STOCKHOLM; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1214
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1215 if (strncmp(buf, "CLUSTAL", 7) == 0 &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1216 strstr(buf, "multiple sequence alignment") != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1217 { fmt = MSAFILE_CLUSTAL; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1218
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1219 if (strncmp(buf, "!!AA_MULTIPLE_ALIGNMENT", 23) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1220 strncmp(buf, "!!NA_MULTIPLE_ALIGNMENT", 23) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1221 { fmt = MSAFILE_MSF; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1222
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1223 /* PHYLIP id: also just a good bet */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1224 bufcpy = sre_strdup(buf, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1225 s = bufcpy;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1226 if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) != NULL &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1227 (s2 = sre_strtok(&s, WHITESPACE, NULL)) != NULL &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1228 IsInt(s1) &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1229 IsInt(s2))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1230 { free(bufcpy); fmt = MSAFILE_PHYLIP; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1231 free(bufcpy);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1232 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1233
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1234 /* We trust that other formats identify themselves soon.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1235 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1236 /* dead giveaways for extended SELEX */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1237 if (strncmp(buf, "#=AU", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1238 strncmp(buf, "#=ID", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1239 strncmp(buf, "#=AC", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1240 strncmp(buf, "#=DE", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1241 strncmp(buf, "#=GA", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1242 strncmp(buf, "#=TC", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1243 strncmp(buf, "#=NC", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1244 strncmp(buf, "#=SQ", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1245 strncmp(buf, "#=SS", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1246 strncmp(buf, "#=CS", 4) == 0 ||
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1247 strncmp(buf, "#=RF", 4) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1248 { fmt = MSAFILE_SELEX; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1249
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1250 if (strncmp(buf, "///", 3) == 0 || strncmp(buf, "ENTRY ", 6) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1251 { fmt = SQFILE_PIR; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1252
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1253 /* a ha, diagnostic of an (old) MSF file */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1254 if ((strstr(buf, "..") != NULL) &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1255 (strstr(buf, "MSF:") != NULL) &&
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1256 (strstr(buf, "Check:")!= NULL))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1257 { fmt = MSAFILE_MSF; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1258
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1259 /* unaligned GCG (must follow MSF test!) */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1260 if (strstr(buf, " Check: ") != NULL && strstr(buf, "..") != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1261 { fmt = SQFILE_GCG; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1262
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1263 if (strncmp(buf,"LOCUS ",6) == 0 || strncmp(buf,"ORIGIN ",6) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1264 { fmt = SQFILE_GENBANK; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1265
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1266 if (strncmp(buf,"ID ",5) == 0 || strncmp(buf,"SQ ",5) == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1267 { fmt = SQFILE_EMBL; goto DONE; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1268
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1269 /* But past here, we're being desperate. A simple SELEX file is
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1270 * very difficult to detect; we can only try to disprove it.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1271 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1272 s = buf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1273 if ((s1 = sre_strtok(&s, WHITESPACE, NULL)) == NULL) continue; /* skip blank lines */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1274 if (strchr("#%", *s1) != NULL) continue; /* skip comment lines */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1275
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1276 /* Disproof 1. Noncomment, nonblank lines in a SELEX file
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1277 * must have at least two space-delimited fields (name/seq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1278 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1279 if ((s2 = sre_strtok(&s, WHITESPACE, NULL)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1280 has_junk = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1281
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1282 /* Disproof 2.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1283 * The sequence field should look like a sequence.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1284 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1285 if (s2 != NULL && Seqtype(s2) == kOtherSeq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1286 has_junk = TRUE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1287
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1288 ndataline++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1289 if (ndataline == 300) break; /* only look at first 300 lines */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1290 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1291
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1292 if (ndataline == 0)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1293 Die("Sequence file contains no data");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1294
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1295 /* If we've made it this far, we've run out of data, but there
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1296 * was at least one line of it; check if we've
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1297 * disproven SELEX. If not, cross our fingers, pray, and guess SELEX.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1298 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1299 if (has_junk == TRUE) fmt = SQFILE_UNKNOWN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1300 else fmt = MSAFILE_SELEX;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1301
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1302 DONE:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1303 if (buf != NULL) free(buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1304 rewind(fp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1305 return fmt;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1306 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1307
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1308 /* Function: GCGBinaryToSequence()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1309 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1310 * Purpose: Convert a GCG 2BIT binary string to DNA sequence.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1311 * 0 = C 1 = T 2 = A 3 = G
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1312 * 4 nts/byte
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1313 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1314 * Args: seq - binary sequence. Converted in place to DNA.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1315 * len - length of DNA. binary is (len+3)/4 bytes
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1316 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1317 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1318 GCGBinaryToSequence(char *seq, int len)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1319 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1320 int bpos; /* position in binary */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1321 int spos; /* position in sequence */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1322 char twobit;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1323 int i;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1324
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1325 for (bpos = (len-1)/4; bpos >= 0; bpos--)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1326 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1327 twobit = seq[bpos];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1328 spos = bpos*4;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1329
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1330 for (i = 3; i >= 0; i--)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1331 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1332 switch (twobit & 0x3) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1333 case 0: seq[spos+i] = 'C'; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1334 case 1: seq[spos+i] = 'T'; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1335 case 2: seq[spos+i] = 'A'; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1336 case 3: seq[spos+i] = 'G'; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1337 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1338 twobit = twobit >> 2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1339 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1340 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1341 seq[len] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1342 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1343 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1344
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1345
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1346 /* Function: GCGchecksum()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1347 * Date: SRE, Mon May 31 11:13:21 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1348 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1349 * Purpose: Calculate a GCG checksum for a sequence.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1350 * Code provided by Steve Smith of Genetics
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1351 * Computer Group.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1352 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1353 * Args: seq - sequence to calculate checksum for.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1354 * may contain gap symbols.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1355 * len - length of sequence (usually known,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1356 * so save a strlen() call)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1357 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1358 * Returns: GCG checksum.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1359 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1360 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1361 GCGchecksum(char *seq, int len)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1362 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1363 int i; /* position in sequence */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1364 int chk = 0; /* calculated checksum */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1365
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1366 for (i = 0; i < len; i++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1367 chk = (chk + (i % 57 + 1) * (sre_toupper((int) seq[i]))) % 10000;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1368 return chk;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1369 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1370
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1371
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1372 /* Function: GCGMultchecksum()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1373 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1374 * Purpose: GCG checksum for a multiple alignment: sum of
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1375 * individual sequence checksums (including their
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1376 * gap characters) modulo 10000.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1377 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1378 * Implemented using spec provided by Steve Smith of
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1379 * Genetics Computer Group.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1380 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1381 * Args: seqs - sequences to be checksummed; aligned or not
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1382 * nseq - number of sequences
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1383 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1384 * Return: the checksum, a number between 0 and 9999
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1385 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1386 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1387 GCGMultchecksum(char **seqs, int nseq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1388 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1389 int chk = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1390 int idx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1391
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1392 for (idx = 0; idx < nseq; idx++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1393 chk = (chk + GCGchecksum(seqs[idx], strlen(seqs[idx]))) % 10000;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1394 return chk;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1395 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1396
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1397
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1398
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1399
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1400 /* Function: Seqtype()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1401 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1402 * Purpose: Returns a (very good) guess about type of sequence:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1403 * kDNA, kRNA, kAmino, or kOtherSeq.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1404 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1405 * Modified from, and replaces, Gilbert getseqtype().
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1406 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1407 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1408 Seqtype(char *seq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1409 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1410 int saw; /* how many non-gap characters I saw */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1411 char c;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1412 int po = 0; /* count of protein-only */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1413 int nt = 0; /* count of t's */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1414 int nu = 0; /* count of u's */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1415 int na = 0; /* count of nucleotides */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1416 int aa = 0; /* count of amino acids */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1417 int no = 0; /* count of others */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1418
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1419 /* Look at the first 300 non-gap characters
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1420 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1421
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1422 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1423 /* VGGNGDDYLSGGTGNDTL is recognized as unknown using squid's default
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1424 * approach.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1425 * We change it to the following:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1426
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1427 * 1. counting: ignore gaps and not alpha characters. if protein-only then
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1428 * count as such (po). otherwise decide if amino-acid (aa) or nucleic-acid
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1429 * (na) or unknown (no)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1430 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1431 * 2. determine type: if we saw more unknown than aa or na, return unknown.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1432 * if encountered protein-only return protein-only. otherwise decide based
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1433 * on majority. (if aa==na return na)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1434 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1435 for (saw = 0; *seq != '\0' && saw < 300; seq++) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1436 c = sre_toupper((int) *seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1437 int unknown = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1438
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1439 if (isgap(c) || ! isalpha((int) c)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1440 continue;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1441 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1442
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1443 if (strchr(protonly, c)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1444 po++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1445 unknown = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1446 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1447
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1448 if (strchr(aminos,c)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1449 aa++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1450 unknown = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1451 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1452
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1453 if (strchr(primenuc,c)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1454 na++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1455 unknown = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1456
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1457 if (c == 'T')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1458 nt++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1459 else if (c == 'U')
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1460 nu++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1461 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1462
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1463 if (unknown) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1464 no ++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1465 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1466
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1467 saw++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1468 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1469
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1470 if (no > aa && no > na)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1471 return kOtherSeq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1472
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1473 if (po > 0 || aa>na)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1474 return kAmino;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1475
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1476 if (na >= aa) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1477 if (nu > nt)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1478 return kRNA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1479 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1480 return kDNA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1481 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1482
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1483 return kOtherSeq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1484
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1485
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1486 #else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1487 for (saw = 0; *seq != '\0' && saw < 300; seq++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1488 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1489 c = sre_toupper((int) *seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1490 if (! isgap(c))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1491 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1492 if (strchr(protonly, c)) po++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1493 else if (strchr(primenuc,c)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1494 na++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1495 if (c == 'T') nt++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1496 else if (c == 'U') nu++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1497 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1498 else if (strchr(aminos,c)) aa++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1499 else if (isalpha((int) c)) no++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1500 saw++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1501 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1502 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1503
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1504 if (no > 0) return kOtherSeq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1505 else if (po > 0) return kAmino;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1506 else if (na > aa) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1507 if (nu > nt) return kRNA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1508 else return kDNA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1509 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1510 else return kAmino; /* ooooh. risky. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1511 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1512
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1513 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1514
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1515
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1516 /* Function: GuessAlignmentSeqtype()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1517 * Date: SRE, Wed Jul 7 09:42:34 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1518 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1519 * Purpose: Try to guess whether an alignment is protein
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1520 * or nucleic acid; return a code for the
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1521 * type (kRNA, kDNA, or kAmino).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1522 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1523 * Args: aseq - array of aligned sequences. (Could also
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1524 * be an rseq unaligned sequence array)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1525 * nseq - number of aseqs
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1526 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1527 * Returns: kRNA, kDNA, kAmino;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1528 * kOtherSeq if inconsistency is detected.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1529 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1530 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1531 GuessAlignmentSeqtype(char **aseq, int nseq)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1532 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1533 int idx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1534 int nrna = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1535 int ndna = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1536 int namino = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1537 int nother = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1538
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1539 for (idx = 0; idx < nseq; idx++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1540 switch (Seqtype(aseq[idx])) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1541 case kRNA: nrna++; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1542 case kDNA: ndna++; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1543 case kAmino: namino++; break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1544 default: nother++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1545 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1546
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1547 /* Unambiguous decisions:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1548 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1549 if (nother) return kOtherSeq;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1550 if (namino == nseq) return kAmino;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1551 if (ndna == nseq) return kDNA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1552 if (nrna == nseq) return kRNA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1553
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1554 /* Ambiguous decisions:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1555 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1556 if (namino == 0) return kRNA; /* it's nucleic acid, but seems mixed RNA/DNA */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1557 return kAmino; /* some amino acid seen; others probably short seqs, some
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1558 of which may be entirely ACGT (ala,cys,gly,thr). We
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1559 could be a little more sophisticated: U would be a giveaway
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1560 that we're not in protein seqs */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1561 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1562
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1563 /* Function: WriteSimpleFASTA()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1564 * Date: SRE, Tue Nov 16 18:06:00 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1565 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1566 * Purpose: Just write a FASTA format sequence to a file;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1567 * minimal interface, mostly for quick and dirty programs.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1568 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1569 * Args: fp - open file handle (stdout, possibly)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1570 * seq - sequence to output
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1571 * name - name for the sequence
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1572 * desc - optional description line, or NULL.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1573 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1574 * Returns: void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1575 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1576 void
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1577 WriteSimpleFASTA(FILE *fp, char *seq, char *name, char *desc)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1578 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1579 char buf[61];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1580 int len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1581 int pos;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1582
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1583 len = strlen(seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1584 buf[60] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1585 fprintf(fp, ">%s %s\n", name, desc != NULL ? desc : "");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1586 for (pos = 0; pos < len; pos += 60)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1587 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1588 strncpy(buf, seq+pos, 60);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1589 fprintf(fp, "%s\n", buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1590 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1591 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1592
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1593 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1594 WriteSeq(FILE *outf, int outform, char *seq, SQINFO *sqinfo)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1595 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1596 int numline = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1597 int lines = 0, spacer = 0, width = 50, tab = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1598 int i, j, l, l1, ibase;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1599 char endstr[10];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1600 char s[100]; /* buffer for sequence */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1601 char ss[100]; /* buffer for structure */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1602 int checksum = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1603 int seqlen;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1604 int which_case; /* 0 = do nothing. 1 = upper case. 2 = lower case */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1605 int dostruc; /* TRUE to print structure lines*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1606
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1607 which_case = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1608 dostruc = FALSE;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1609 seqlen = (sqinfo->flags & SQINFO_LEN) ? sqinfo->len : strlen(seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1610
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1611 if (IsAlignmentFormat(outform))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1612 Die("Tried to write an aligned format with WriteSeq() -- bad, bad.");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1613
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1614
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1615 strcpy( endstr,"");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1616 l1 = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1617 checksum = GCGchecksum(seq, seqlen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1618
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1619 switch (outform) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1620 case SQFILE_UNKNOWN: /* no header, just sequence */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1621 strcpy(endstr,"\n"); /* end w/ extra blank line */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1622 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1623
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1624 case SQFILE_GENBANK:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1625 fprintf(outf,"LOCUS %s %d bp\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1626 sqinfo->name, seqlen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1627 fprintf(outf,"ACCESSION %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1628 (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : ".");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1629 fprintf(outf,"DEFINITION %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1630 (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : ".");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1631 fprintf(outf,"VERSION %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1632 (sqinfo->flags & SQINFO_ID) ? sqinfo->id : ".");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1633 fprintf(outf,"ORIGIN \n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1634 spacer = 11;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1635 numline = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1636 strcpy(endstr, "\n//");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1637 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1638
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1639 case SQFILE_GCGDATA:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1640 fprintf(outf, ">>>>%s 9/95 ASCII Len: %d\n", sqinfo->name, seqlen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1641 fprintf(outf, "%s\n", (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1642 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1643
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1644 case SQFILE_PIR:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1645 fprintf(outf, "ENTRY %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1646 (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1647 fprintf(outf, "TITLE %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1648 (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1649 fprintf(outf, "ACCESSION %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1650 (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1651 fprintf(outf, "SUMMARY #Length %d #Checksum %d\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1652 sqinfo->len, checksum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1653 fprintf(outf, "SEQUENCE\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1654 fprintf(outf, " 5 10 15 20 25 30\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1655 spacer = 2; /* spaces after every residue */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1656 numline = 1; /* number lines w/ coords */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1657 width = 30; /* 30 aa per line */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1658 strcpy(endstr, "\n///");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1659 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1660
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1661 case SQFILE_SQUID:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1662 fprintf(outf, "NAM %s\n", sqinfo->name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1663 if (sqinfo->flags & (SQINFO_ID | SQINFO_ACC | SQINFO_START | SQINFO_STOP | SQINFO_OLEN))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1664 fprintf(outf, "SRC %s %s %d..%d::%d\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1665 (sqinfo->flags & SQINFO_ID) ? sqinfo->id : "-",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1666 (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1667 (sqinfo->flags & SQINFO_START) ? sqinfo->start : 0,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1668 (sqinfo->flags & SQINFO_STOP) ? sqinfo->stop : 0,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1669 (sqinfo->flags & SQINFO_OLEN) ? sqinfo->olen : 0);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1670 if (sqinfo->flags & SQINFO_DESC)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1671 fprintf(outf, "DES %s\n", sqinfo->desc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1672 if (sqinfo->flags & SQINFO_SS)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1673 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1674 fprintf(outf, "SEQ +SS\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1675 dostruc = TRUE; /* print structure lines too */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1676 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1677 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1678 fprintf(outf, "SEQ\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1679 numline = 1; /* number seq lines w/ coords */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1680 strcpy(endstr, "\n++");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1681 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1682
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1683 case SQFILE_EMBL:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1684 fprintf(outf,"ID %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1685 (sqinfo->flags & SQINFO_ID) ? sqinfo->id : sqinfo->name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1686 fprintf(outf,"AC %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1687 (sqinfo->flags & SQINFO_ACC) ? sqinfo->acc : "-");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1688 fprintf(outf,"DE %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1689 (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "-");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1690 fprintf(outf,"SQ %d BP\n", seqlen);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1691 strcpy(endstr, "\n//"); /* 11Oct90: bug fix*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1692 tab = 5; /** added 31jan91 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1693 spacer = 11; /** added 31jan91 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1694 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1695
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1696 case SQFILE_GCG:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1697 fprintf(outf,"%s\n", sqinfo->name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1698 if (sqinfo->flags & SQINFO_ACC)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1699 fprintf(outf,"ACCESSION %s\n", sqinfo->acc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1700 if (sqinfo->flags & SQINFO_DESC)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1701 fprintf(outf,"DEFINITION %s\n", sqinfo->desc);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1702 fprintf(outf," %s Length: %d (today) Check: %d ..\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1703 sqinfo->name, seqlen, checksum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1704 spacer = 11;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1705 numline = 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1706 strcpy(endstr, "\n"); /* this is insurance to help prevent misreads at eof */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1707 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1708
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1709 case SQFILE_STRIDER: /* ?? map ?*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1710 fprintf(outf,"; ### from DNA Strider ;-)\n");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1711 fprintf(outf,"; DNA sequence %s, %d bases, %d checksum.\n;\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1712 sqinfo->name, seqlen, checksum);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1713 strcpy(endstr, "\n//");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1714 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1715
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1716 /* SRE: Don had Zuker default to Pearson, which is not
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1717 intuitive or helpful, since Zuker's MFOLD can't read
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1718 Pearson format. More useful to use kIG */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1719 case SQFILE_ZUKER:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1720 which_case = 1; /* MFOLD requires upper case. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1721 /*FALLTHRU*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1722 case SQFILE_IG:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1723 fprintf(outf,";%s %s\n",
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1724 sqinfo->name,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1725 (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1726 fprintf(outf,"%s\n", sqinfo->name);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1727 strcpy(endstr,"1"); /* == linear dna */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1728 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1729
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1730 case SQFILE_RAW: /* Raw: no header at all. */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1731 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1732
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1733 default :
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1734 case SQFILE_FASTA:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1735 fprintf(outf,">%s %s\n", sqinfo->name,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1736 (sqinfo->flags & SQINFO_DESC) ? sqinfo->desc : "");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1737 break;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1738 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1739
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1740 if (which_case == 1) s2upper(seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1741 if (which_case == 2) s2lower(seq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1742
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1743
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1744 width = MIN(width,100);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1745 for (i=0, l=0, ibase = 1, lines = 0; i < seqlen; ) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1746 if (l1 < 0) l1 = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1747 else if (l1 == 0) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1748 if (numline) fprintf(outf,"%8d ",ibase);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1749 for (j=0; j<tab; j++) fputc(' ',outf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1750 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1751 if ((spacer != 0) && ((l+1) % spacer == 1))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1752 { s[l] = ' '; ss[l] = ' '; l++; }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1753 s[l] = seq[i];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1754 ss[l] = (sqinfo->flags & SQINFO_SS) ? sqinfo->ss[i] : '.';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1755 l++; i++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1756 l1++; /* don't count spaces for width*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1757 if (l1 == width || i == seqlen) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1758 s[l] = ss[l] = '\0';
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1759 l = 0; l1 = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1760 if (dostruc)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1761 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1762 fprintf(outf, "%s\n", s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1763 if (numline) fprintf(outf," ");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1764 for (j=0; j<tab; j++) fputc(' ',outf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1765 if (i == seqlen) fprintf(outf,"%s%s\n",ss,endstr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1766 else fprintf(outf,"%s\n",ss);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1767 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1768 else
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1769 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1770 if (i == seqlen) fprintf(outf,"%s%s\n",s,endstr);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1771 else fprintf(outf,"%s\n",s);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1772 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1773 lines++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1774 ibase = i+1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1775 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1776 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1777 return lines;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1778 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1779
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1780
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1781 /* Function: ReadMultipleRseqs()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1782 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1783 * Purpose: Open a data file and
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1784 * parse it into an array of rseqs (raw, unaligned
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1785 * sequences).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1786 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1787 * Caller is responsible for free'ing memory allocated
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1788 * to ret_rseqs, ret_weights, and ret_names.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1789 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1790 * Weights are currently only supported for MSF format.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1791 * Sequences read from all other formats will be assigned
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1792 * weights of 1.0. If the caller isn't interested in
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1793 * weights, it passes NULL as ret_weights.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1794 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1795 * Returns 1 on success. Returns 0 on failure and sets
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1796 * squid_errno to indicate the cause.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1797 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1798 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1799 ReadMultipleRseqs(char *seqfile,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1800 int fformat,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1801 char ***ret_rseqs,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1802 SQINFO **ret_sqinfo,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1803 int *ret_num)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1804 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1805 SQINFO *sqinfo; /* array of sequence optional info */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1806 SQFILE *dbfp; /* open ptr for sequential access of file */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1807 char **rseqs; /* sequence array */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1808 int numalloced; /* num of seqs currently alloced for */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1809 int num;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1810
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1811
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1812 num = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1813 numalloced = 16;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1814 rseqs = (char **) MallocOrDie (numalloced * sizeof(char *));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1815 sqinfo = (SQINFO *) MallocOrDie (numalloced * sizeof(SQINFO));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1816 if ((dbfp = SeqfileOpen(seqfile, fformat, NULL)) == NULL) return 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1817
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1818 while (ReadSeq(dbfp, dbfp->format, &rseqs[num], &(sqinfo[num])))
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1819 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1820 num++;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1821 if (num == numalloced) /* more seqs coming, alloc more room */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1822 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1823 numalloced += 16;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1824 rseqs = (char **) ReallocOrDie (rseqs, numalloced*sizeof(char *));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1825 sqinfo = (SQINFO *) ReallocOrDie (sqinfo, numalloced * sizeof(SQINFO));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1826 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1827 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1828 SeqfileClose(dbfp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1829
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1830 *ret_rseqs = rseqs;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1831 *ret_sqinfo = sqinfo;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1832 *ret_num = num;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1833 return 1;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1834 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1835
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1836
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1837 /* Function: String2SeqfileFormat()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1838 * Date: SRE, Sun Jun 27 15:25:54 1999 [TW 723 over Canadian Shield]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1839 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1840 * Purpose: Convert a string (e.g. from command line option arg)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1841 * to a format code. Case insensitive. Return
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1842 * MSAFILE_UNKNOWN/SQFILE_UNKNOWN if string is bad.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1843 * Uses codes defined in squid.h (unaligned formats) and
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1844 * msa.h (aligned formats).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1845 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1846 * Args: s - string to convert; e.g. "stockholm"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1847 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1848 * Returns: format code; e.g. MSAFILE_STOCKHOLM
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1849 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1850 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1851 String2SeqfileFormat(char *s)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1852 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1853 char *s2;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1854 int code = SQFILE_UNKNOWN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1855
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1856 if (s == NULL) return SQFILE_UNKNOWN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1857 s2 = sre_strdup(s, -1);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1858 s2upper(s2);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1859
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1860 if (strcmp(s2, "FASTA") == 0) code = SQFILE_FASTA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1861 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1862 if (strcmp(s2, "FA") == 0) code = SQFILE_FASTA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1863 else if (strcmp(s2, "VIENNA") == 0) code = SQFILE_VIENNA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1864 else if (strcmp(s2, "VIE") == 0) code = SQFILE_VIENNA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1865 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1866 else if (strcmp(s2, "GENBANK") == 0) code = SQFILE_GENBANK;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1867 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1868 else if (strcmp(s2, "GB") == 0) code = SQFILE_GENBANK;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1869 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1870 else if (strcmp(s2, "EMBL") == 0) code = SQFILE_EMBL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1871 else if (strcmp(s2, "GCG") == 0) code = SQFILE_GCG;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1872 else if (strcmp(s2, "GCGDATA") == 0) code = SQFILE_GCGDATA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1873 else if (strcmp(s2, "RAW") == 0) code = SQFILE_RAW;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1874 else if (strcmp(s2, "IG") == 0) code = SQFILE_IG;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1875 else if (strcmp(s2, "STRIDER") == 0) code = SQFILE_STRIDER;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1876 else if (strcmp(s2, "IDRAW") == 0) code = SQFILE_IDRAW;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1877 else if (strcmp(s2, "ZUKER") == 0) code = SQFILE_ZUKER;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1878 else if (strcmp(s2, "PIR") == 0) code = SQFILE_PIR;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1879 else if (strcmp(s2, "SQUID") == 0) code = SQFILE_SQUID;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1880 else if (strcmp(s2, "STOCKHOLM") == 0) code = MSAFILE_STOCKHOLM;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1881 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1882 else if (strcmp(s2, "ST") == 0) code = MSAFILE_STOCKHOLM;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1883 else if (strcmp(s2, "STK") == 0) code = MSAFILE_STOCKHOLM;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1884 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1885 else if (strcmp(s2, "SELEX") == 0) code = MSAFILE_SELEX;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1886 else if (strcmp(s2, "MSF") == 0) code = MSAFILE_MSF;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1887 else if (strcmp(s2, "CLUSTAL") == 0) code = MSAFILE_CLUSTAL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1888 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1889 else if (strcmp(s2, "CLU") == 0) code = MSAFILE_CLUSTAL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1890 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1891 else if (strcmp(s2, "A2M") == 0) code = MSAFILE_A2M;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1892 else if (strcmp(s2, "PHYLIP") == 0) code = MSAFILE_PHYLIP;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1893 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1894 else if (strcmp(s2, "PHY") == 0) code = MSAFILE_PHYLIP;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1895 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1896 else if (strcmp(s2, "EPS") == 0) code = MSAFILE_EPS;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1897 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1898 else code = SQFILE_UNKNOWN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1899 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1900 free(s2);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1901 return code;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1902 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1903 char *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1904 SeqfileFormat2String(int code)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1905 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1906 switch (code) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1907 case SQFILE_UNKNOWN: return "unknown";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1908 case SQFILE_FASTA: return "FASTA";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1909 #ifdef CLUSTALO
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1910 case SQFILE_VIENNA: return "Vienna";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1911 #endif
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1912 case SQFILE_GENBANK: return "Genbank";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1913 case SQFILE_EMBL: return "EMBL";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1914 case SQFILE_GCG: return "GCG";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1915 case SQFILE_GCGDATA: return "GCG data library";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1916 case SQFILE_RAW: return "raw";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1917 case SQFILE_IG: return "Intelligenetics";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1918 case SQFILE_STRIDER: return "MacStrider";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1919 case SQFILE_IDRAW: return "Idraw Postscript";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1920 case SQFILE_ZUKER: return "Zuker";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1921 case SQFILE_PIR: return "PIR";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1922 case SQFILE_SQUID: return "SQUID";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1923 case MSAFILE_STOCKHOLM: return "Stockholm";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1924 case MSAFILE_SELEX: return "SELEX";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1925 case MSAFILE_MSF: return "MSF";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1926 case MSAFILE_CLUSTAL: return "Clustal";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1927 case MSAFILE_A2M: return "a2m";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1928 case MSAFILE_PHYLIP: return "Phylip";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1929 case MSAFILE_EPS: return "EPS";
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1930 default:
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1931 Die("Bad code passed to MSAFormat2String()");
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1932 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1933 /*NOTREACHED*/
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1934 return NULL;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1935 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1936
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1937
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1938 /* Function: MSAToSqinfo()
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1939 * Date: SRE, Tue Jul 20 14:36:56 1999 [St. Louis]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1940 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1941 * Purpose: Take an MSA and generate a SQINFO array suitable
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1942 * for use in annotating the unaligned sequences.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1943 * Return the array.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1944 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1945 * Permanent temporary code. sqinfo was poorly designed.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1946 * it must eventually be replaced, but the odds
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1947 * of this happening soon are nil, so I have to deal.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1948 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1949 * Args: msa - the alignment
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1950 *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1951 * Returns: ptr to allocated sqinfo array.
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1952 * Freeing is ghastly: free in each individual sqinfo[i]
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1953 * with FreeSequence(NULL, &(sqinfo[i])), then
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1954 * free(sqinfo).
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1955 */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1956 SQINFO *
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1957 MSAToSqinfo(MSA *msa)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1958 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1959 int idx;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1960 SQINFO *sqinfo;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1961
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1962 sqinfo = MallocOrDie(sizeof(SQINFO) * msa->nseq);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1963
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1964 for (idx = 0; idx < msa->nseq; idx++)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1965 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1966 sqinfo[idx].flags = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1967 SetSeqinfoString(&(sqinfo[idx]),
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1968 msa->sqname[idx], SQINFO_NAME);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1969 SetSeqinfoString(&(sqinfo[idx]),
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1970 MSAGetSeqAccession(msa, idx), SQINFO_ACC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1971 SetSeqinfoString(&(sqinfo[idx]),
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1972 MSAGetSeqDescription(msa, idx), SQINFO_DESC);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1973
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1974 if (msa->ss != NULL && msa->ss[idx] != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1975 MakeDealignedString(msa->aseq[idx], msa->alen,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1976 msa->ss[idx], &(sqinfo[idx].ss));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1977 sqinfo[idx].flags |= SQINFO_SS;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1978 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1979
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1980 if (msa->sa != NULL && msa->sa[idx] != NULL) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1981 MakeDealignedString(msa->aseq[idx], msa->alen,
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1982 msa->sa[idx], &(sqinfo[idx].sa));
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1983 sqinfo[idx].flags |= SQINFO_SA;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1984 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1985
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1986 sqinfo[idx].len = DealignedLength(msa->aseq[idx]);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1987 sqinfo[idx].flags |= SQINFO_LEN;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1988 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1989 return sqinfo;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1990 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1991
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1992
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1993
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1994 /* cc -o sqio_test -DA_QUIET_DAY -L. sqio.c -lsquid */
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1995 #ifdef A_QUIET_DAY
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1996 #include "ssi.h"
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1997 int
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1998 main(int argc, char **argv)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
1999 {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2000 FILE *fp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2001 char *filename;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2002 char *buf;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2003 int len;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2004 int mode = 3;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2005 SSIOFFSET off;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2006
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2007 filename = argv[1];
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2008
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2009 if (mode == 1) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2010 buf = malloc(sizeof(char) * 256);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2011 if ((fp = fopen(filename, "r")) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2012 Die("open of %s failed", filename);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2013 while (fgets(buf, 255, fp) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2014 ;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2015 fclose(fp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2016 free(buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2017 } else if (mode == 2) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2018 if ((fp = fopen(filename, "r")) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2019 Die("open of %s failed", filename);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2020 buf = NULL; len = 0;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2021 while (sre_fgets(&buf, &len, fp) != NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2022 SSIGetFilePosition(fp, SSI_OFFSET_I32, &off);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2023 fclose(fp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2024 free(buf);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2025 } else if (mode == 3) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2026 SQFILE *dbfp;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2027 SQINFO info;
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2028
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2029 if ((dbfp = SeqfileOpen(filename, SQFILE_FASTA, NULL)) == NULL)
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2030 Die("open of %s failed", filename);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2031 while (ReadSeq(dbfp, dbfp->format, &buf, &info)) {
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2032 SSIGetFilePosition(dbfp->f, SSI_OFFSET_I32, &off);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2033 FreeSequence(buf, &info);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2034 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2035 SeqfileClose(dbfp);
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2036 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2037
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2038 }
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2039
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2040
bc707542e5de Uploaded
clustalomega
parents:
diff changeset
2041 #endif