1
|
1 /*****************************************************************
|
|
2 * SQUID - a library of functions for biological sequence analysis
|
|
3 * Copyright (C) 1992-2002 Washington University School of Medicine
|
|
4 *
|
|
5 * This source code is freely distributed under the terms of the
|
|
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
|
|
7 * for details.
|
|
8 *****************************************************************/
|
|
9
|
|
10 /* a2m.c
|
|
11 *
|
|
12 * reading/writing A2M (aligned FASTA) files.
|
|
13 *
|
|
14 * RCS $Id: a2m.c 242 2011-05-27 14:04:21Z andreas $ (Original squid RCS Id: a2m.c,v 1.1 1999/07/15 22:26:40 eddy Exp)
|
|
15 */
|
|
16
|
|
17 #include <stdio.h>
|
|
18 #include <stdlib.h>
|
|
19 #include <string.h>
|
|
20 #include "squid.h"
|
|
21 #include "msa.h"
|
|
22
|
|
23 /* Function: ReadA2M()
|
|
24 * Date: SRE, Sun Jun 6 17:11:29 1999 [bus from Madison 1999 worm mtg]
|
|
25 *
|
|
26 * Purpose: Parse an alignment read from an open A2M format
|
|
27 * alignment file. A2M is a single alignment format.
|
|
28 * Return the alignment, or NULL if we've already
|
|
29 * read the alignment.
|
|
30 *
|
|
31 * Args: afp - open alignment file
|
|
32 *
|
|
33 * Returns: MSA * - an alignment object.
|
|
34 * Caller responsible for an MSAFree()
|
|
35 */
|
|
36 MSA *
|
|
37 ReadA2M(MSAFILE *afp)
|
|
38 {
|
|
39 MSA *msa;
|
|
40 char *buf;
|
|
41 char *name;
|
|
42 char *desc;
|
|
43 char *seq;
|
|
44 int idx;
|
|
45 int len1, len2;
|
|
46
|
|
47 if (feof(afp->f)) return NULL;
|
|
48
|
|
49 name = NULL;
|
|
50 msa = MSAAlloc(10, 0);
|
|
51 idx = 0;
|
|
52 while ((buf = MSAFileGetLine(afp)) != NULL)
|
|
53 {
|
|
54 if (*buf == '>')
|
|
55 {
|
|
56 buf++; /* skip the '>' */
|
|
57 if ((name = sre_strtok(&buf, WHITESPACE, &len1)) == NULL)
|
|
58 Die("Blank name in A2M file %s (line %d)\n", afp->fname, afp->linenumber);
|
|
59 desc = sre_strtok(&buf, "\n", &len2);
|
|
60
|
|
61 idx = GKIStoreKey(msa->index, name);
|
|
62 if (idx >= msa->nseqalloc) MSAExpand(msa);
|
|
63
|
|
64 msa->sqname[idx] = sre_strdup(name, len1);
|
|
65 if (desc != NULL) MSASetSeqDescription(msa, idx, desc);
|
|
66 msa->nseq++;
|
|
67 }
|
|
68 else if (name != NULL)
|
|
69 {
|
|
70 if ((seq = sre_strtok(&buf, WHITESPACE, &len1)) == NULL) continue;
|
|
71 msa->sqlen[idx] = sre_strcat(&(msa->aseq[idx]), msa->sqlen[idx], seq, len1);
|
|
72 }
|
|
73 }
|
|
74 if (name == NULL) { MSAFree(msa); return NULL; }
|
|
75
|
|
76 MSAVerifyParse(msa);
|
|
77 return msa;
|
|
78 }
|
|
79
|
|
80
|
|
81 /* Function: WriteA2M()
|
|
82 * Date: SRE, Sun Jun 6 17:40:35 1999 [bus from Madison, 1999 worm mtg]
|
|
83 *
|
|
84 * Purpose: Write an "aligned FASTA" (aka a2m, to UCSC) formatted
|
|
85 * alignment.
|
|
86 *
|
|
87 * Args: fp - open FILE to write to.
|
|
88 * msa - alignment to write
|
|
89 *
|
|
90 * Returns: void
|
|
91 */
|
|
92 void
|
|
93 #ifdef CLUSTALO
|
|
94 WriteA2M(FILE *fp, MSA *msa, int vienna)
|
|
95 #else
|
|
96 WriteA2M(FILE *fp, MSA *msa)
|
|
97 #endif
|
|
98 {
|
|
99 int idx; /* sequence index */
|
|
100 int pos; /* position in sequence */
|
|
101 char buf[64]; /* buffer for individual lines */
|
|
102 int cpl = 60; /* char per line; must be < 64 unless buf is bigger */
|
|
103
|
|
104 buf[cpl] = '\0';
|
|
105 for (idx = 0; idx < msa->nseq; idx++)
|
|
106 {
|
|
107 #ifdef CLUSTALO
|
|
108 /* most fasta sequences don't have a description, which
|
|
109 * leads to a trailing white space in the original code
|
|
110 */
|
|
111 fprintf(fp, ">%s", msa->sqname[idx]);
|
|
112
|
|
113 if (msa->sqdesc != NULL && msa->sqdesc[idx] != NULL && !vienna) {
|
|
114 fprintf(fp, " %s", msa->sqdesc[idx]);
|
|
115 }
|
|
116 fprintf(fp, "\n");
|
|
117 #else
|
|
118 fprintf(fp, ">%s %s\n",
|
|
119 msa->sqname[idx],
|
|
120 (msa->sqdesc != NULL && msa->sqdesc[idx] != NULL) ? msa->sqdesc[idx] : "");
|
|
121 #endif
|
|
122 for (pos = 0; pos < msa->alen; pos+=cpl)
|
|
123 {
|
|
124 strncpy(buf, &(msa->aseq[idx][pos]), cpl);
|
|
125 if (vienna)
|
|
126 fprintf(fp, "%s", buf);
|
|
127 else
|
|
128 fprintf(fp, "%s\n", buf);
|
|
129 }
|
|
130 if (vienna)
|
|
131 fprintf(fp, "\n");
|
|
132
|
|
133 }
|
|
134 }
|