comparison srf2fastq/io_lib-1.12.2/io_lib/seqIOPlain.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 /*
2 * Copyright (c) Medical Research Council 1994. All rights reserved.
3 *
4 * Permission to use, copy, modify and distribute this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * this copyright and notice appears in all copies.
7 *
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
9 * as part of the Staden Package at the MRC Laboratory of Molecular
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
11 *
12 * MRC disclaims all warranties with regard to this software.
13 */
14
15 /*
16 Title: seqIOPlain
17
18 File: seqIOPlain.c
19 Purpose: IO of plain sequences
20 Last update: Tuesday Jan 15 1991
21
22 Change log:
23
24 28.11.90 SD put undesirables under STLOUIS compilation flag
25 15.01.91 SD new include file (opp.h)
26 17.09.91 LFW changed STLOUIS compilation flag to SAVE_EDITS
27 and AUTO_CLIP
28 08.09.94 JKB Plain files now also uses the ';<' and ';>' lines.
29 09.09.94 JKB Update to use Read instead of Seq library.
30 01.06.07 JKB Supports single-read fasta files; about time too!
31 */
32
33
34 #define LINE_LENGTH 60
35
36 /*
37 * This module should be regarded as part of `read' since it is
38 * privy to the internal structure of `Read'.
39 *
40 * This library also requires use of the mach-io code for the endian
41 * independent machine IO.
42 *
43 * Any references to the writing or reading of edited sequences,
44 * or to the bottom strand were added by lfw
45 */
46
47
48
49
50 /* ---- Imports ---- */
51
52 #include <stdio.h> /* IMPORT: fopen, fclose, fseek, ftell, fgetc */
53 #include <ctype.h> /* IMPORT: isprint */
54 #include <string.h>
55
56 #include "io_lib/misc.h"
57 #include "io_lib/plain.h"
58 #include "io_lib/Read.h"
59 #include "io_lib/xalloc.h"
60 #include "io_lib/traceType.h"
61
62 #include "io_lib/stdio_hack.h"
63 /* ---- Constants ---- */
64
65 #define BasesPerLine 50 /* For output formatting */
66
67
68 /* ---- Exports ---- */
69
70
71 /*
72 * Read the plain format sequence from FILE *fp into a Read structure.
73 * All printing characters (as defined by ANSII C `isprint')
74 * are accepted, but `N's are translated to `-'s.
75 *
76 * Returns:
77 * Read * - Success, the Read structure read.
78 * NULLRead - Failure.
79 */
80 Read *fread_pln(FILE *fp) {
81 Read *read = NULLRead;
82 off_t fileLen;
83 int ch;
84 char *leftc, *rightc, *leftcp, *rightcp;
85 int first = 1;
86
87 /*
88 * Find the length of the file.
89 * Use this as an overestimate of the length of the sequence.
90 */
91 fseek(fp, (off_t) 0, 2);
92 if ((fileLen = ftell(fp)) > INT_MAX /*Was MAXINT2*/)
93 goto bail_out;
94
95 fseek(fp, (off_t) 0, 0);
96
97 /* Allocate the sequence */
98 if (NULLRead == (read = read_allocate(0, fileLen)))
99 goto bail_out;
100
101 if (NULL == (leftc = (char *)xmalloc(fileLen)))
102 goto bail_out;
103
104 if (NULL == (rightc = (char *)xmalloc(fileLen)))
105 goto bail_out;
106
107 leftcp = leftc;
108 rightcp = rightc;
109
110 /* Read in the bases */
111
112 read->NBases = 0;
113 read->format = TT_PLN;
114
115 while ((ch = fgetc(fp)) != EOF) {
116 if (ch == '>') {
117 /* Fasta format file - skip the header and load the first
118 * fasta sequence only. We don't even attempt to worry about
119 * multi-sequence file formats for now.
120 */
121 if (!first)
122 break;
123
124 while(ch != '\n' && ch != EOF)
125 ch = fgetc(fp);
126
127 } else if (ch==';') {
128 /*
129 * ;< is left cutoff,
130 * ;> is right cutoff.
131 * Any other ';'s we can treat as a comments.
132 */
133 ch = fgetc(fp);
134
135 if (first == 1 && ch != '<' && ch != '>') {
136 int d;
137 char type[5], name[17], line[1024];
138
139 line[0] = ch;
140 fgets(&line[1], 1022, fp);
141
142 if (5 == sscanf(line, "%6d%6d%6d%4c%s",
143 &d, &d, &d, type, name)) {
144 char * p;
145
146 if (p = strchr(type, ' '))
147 *p = 0;
148
149 read->format = trace_type_str2int(type);
150 read->trace_name = (char *)xmalloc(strlen(name)+1);
151 if (read->trace_name)
152 strcpy(read->trace_name, name);
153 }
154 }
155
156 else if (ch == '<') {
157 ch = fgetc(fp);
158 while (ch != '\n') {
159 *leftcp++ = ch;
160 ch = fgetc(fp);
161 }
162 } else if (ch == '>') {
163 ch = fgetc(fp);
164 while (ch != '\n') {
165 *rightcp++ = ch;
166 ch = fgetc(fp);
167 }
168 } else {
169 while(ch != '\n' && ch != EOF)
170 ch = fgetc(fp);
171 }
172 } else if (isprint(ch) && !isspace(ch)) {
173 read->base[read->NBases++] = ((ch)=='N') ? '-' : (ch);
174 }
175
176 first = 0;
177 }
178
179 *leftcp = *rightcp = 0;
180
181 read->leftCutoff = strlen(leftc);
182 read->rightCutoff = read->leftCutoff + read->NBases + 1;
183 memmove(&read->base[read->leftCutoff], read->base, read->NBases);
184 memmove(read->base, leftc, read->leftCutoff);
185 memmove(&read->base[read->leftCutoff + read->NBases],
186 rightc, strlen(rightc));
187
188 read->NBases += read->leftCutoff + strlen(rightc);
189 read->base[read->NBases] = 0;
190
191 xfree(leftc);
192 xfree(rightc);
193
194 /* SUCCESS */
195 return(read);
196
197 /* FAILURE */
198 bail_out:
199 if (read)
200 read_deallocate(read);
201
202 return NULLRead;
203 }
204
205 /*
206 * Read the plain format sequence with name `fn' into a Read structure.
207 * All printing characters (as defined by ANSII C `isprint')
208 * are accepted, but `N's are translated to `-'s.
209 *
210 * Returns:
211 * Read * - Success, the Read structure read.
212 * NULLRead - Failure.
213 */
214 Read *read_pln(char *fn) {
215 FILE *fp;
216 Read *read;
217
218 /* Open file */
219 if ((fp = fopen(fn, "r")) == NULL)
220 return NULLRead;
221
222 read = fread_pln(fp);
223 fclose(fp);
224
225 if (read && read->trace_name == NULL &&
226 (read->trace_name = (char *)xmalloc(strlen(fn)+1)))
227 strcpy(read->trace_name, fn);
228
229 return read;
230 }
231
232
233 /*
234 * Write to a Plain file
235 */
236 int fwrite_pln(FILE *fp, Read *read) {
237 int i, err = 0;
238
239 for (i = 0; i < read->NBases; i += LINE_LENGTH)
240 if (-1 == fprintf(fp, "%.*s\n",
241 read->NBases - i > LINE_LENGTH
242 ? LINE_LENGTH : read->NBases - i,
243 &read->base[i]))
244 err = 1;
245
246 return err ? -1 : 0;
247 }
248
249 int write_pln(char *fn, Read *read) {
250 FILE *fp;
251
252 if ((fp = fopen(fn,"w")) == NULL)
253 return -1;
254
255 if (fwrite_pln(fp, read)) {
256 fclose(fp);
257 return -1;
258 }
259
260 fclose(fp);
261 return 0;
262 }
263