Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/io_lib/seqIOPlain.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 /* | |
2 * Copyright (c) Medical Research Council 1994. All rights reserved. | |
3 * | |
4 * Permission to use, copy, modify and distribute this software and its | |
5 * documentation for any purpose is hereby granted without fee, provided that | |
6 * this copyright and notice appears in all copies. | |
7 * | |
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden, | |
9 * as part of the Staden Package at the MRC Laboratory of Molecular | |
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. | |
11 * | |
12 * MRC disclaims all warranties with regard to this software. | |
13 */ | |
14 | |
15 /* | |
16 Title: seqIOPlain | |
17 | |
18 File: seqIOPlain.c | |
19 Purpose: IO of plain sequences | |
20 Last update: Tuesday Jan 15 1991 | |
21 | |
22 Change log: | |
23 | |
24 28.11.90 SD put undesirables under STLOUIS compilation flag | |
25 15.01.91 SD new include file (opp.h) | |
26 17.09.91 LFW changed STLOUIS compilation flag to SAVE_EDITS | |
27 and AUTO_CLIP | |
28 08.09.94 JKB Plain files now also uses the ';<' and ';>' lines. | |
29 09.09.94 JKB Update to use Read instead of Seq library. | |
30 01.06.07 JKB Supports single-read fasta files; about time too! | |
31 */ | |
32 | |
33 | |
34 #define LINE_LENGTH 60 | |
35 | |
36 /* | |
37 * This module should be regarded as part of `read' since it is | |
38 * privy to the internal structure of `Read'. | |
39 * | |
40 * This library also requires use of the mach-io code for the endian | |
41 * independent machine IO. | |
42 * | |
43 * Any references to the writing or reading of edited sequences, | |
44 * or to the bottom strand were added by lfw | |
45 */ | |
46 | |
47 | |
48 | |
49 | |
50 /* ---- Imports ---- */ | |
51 | |
52 #include <stdio.h> /* IMPORT: fopen, fclose, fseek, ftell, fgetc */ | |
53 #include <ctype.h> /* IMPORT: isprint */ | |
54 #include <string.h> | |
55 | |
56 #include "io_lib/misc.h" | |
57 #include "io_lib/plain.h" | |
58 #include "io_lib/Read.h" | |
59 #include "io_lib/xalloc.h" | |
60 #include "io_lib/traceType.h" | |
61 | |
62 #include "io_lib/stdio_hack.h" | |
63 /* ---- Constants ---- */ | |
64 | |
65 #define BasesPerLine 50 /* For output formatting */ | |
66 | |
67 | |
68 /* ---- Exports ---- */ | |
69 | |
70 | |
71 /* | |
72 * Read the plain format sequence from FILE *fp into a Read structure. | |
73 * All printing characters (as defined by ANSII C `isprint') | |
74 * are accepted, but `N's are translated to `-'s. | |
75 * | |
76 * Returns: | |
77 * Read * - Success, the Read structure read. | |
78 * NULLRead - Failure. | |
79 */ | |
80 Read *fread_pln(FILE *fp) { | |
81 Read *read = NULLRead; | |
82 off_t fileLen; | |
83 int ch; | |
84 char *leftc, *rightc, *leftcp, *rightcp; | |
85 int first = 1; | |
86 | |
87 /* | |
88 * Find the length of the file. | |
89 * Use this as an overestimate of the length of the sequence. | |
90 */ | |
91 fseek(fp, (off_t) 0, 2); | |
92 if ((fileLen = ftell(fp)) > INT_MAX /*Was MAXINT2*/) | |
93 goto bail_out; | |
94 | |
95 fseek(fp, (off_t) 0, 0); | |
96 | |
97 /* Allocate the sequence */ | |
98 if (NULLRead == (read = read_allocate(0, fileLen))) | |
99 goto bail_out; | |
100 | |
101 if (NULL == (leftc = (char *)xmalloc(fileLen))) | |
102 goto bail_out; | |
103 | |
104 if (NULL == (rightc = (char *)xmalloc(fileLen))) | |
105 goto bail_out; | |
106 | |
107 leftcp = leftc; | |
108 rightcp = rightc; | |
109 | |
110 /* Read in the bases */ | |
111 | |
112 read->NBases = 0; | |
113 read->format = TT_PLN; | |
114 | |
115 while ((ch = fgetc(fp)) != EOF) { | |
116 if (ch == '>') { | |
117 /* Fasta format file - skip the header and load the first | |
118 * fasta sequence only. We don't even attempt to worry about | |
119 * multi-sequence file formats for now. | |
120 */ | |
121 if (!first) | |
122 break; | |
123 | |
124 while(ch != '\n' && ch != EOF) | |
125 ch = fgetc(fp); | |
126 | |
127 } else if (ch==';') { | |
128 /* | |
129 * ;< is left cutoff, | |
130 * ;> is right cutoff. | |
131 * Any other ';'s we can treat as a comments. | |
132 */ | |
133 ch = fgetc(fp); | |
134 | |
135 if (first == 1 && ch != '<' && ch != '>') { | |
136 int d; | |
137 char type[5], name[17], line[1024]; | |
138 | |
139 line[0] = ch; | |
140 fgets(&line[1], 1022, fp); | |
141 | |
142 if (5 == sscanf(line, "%6d%6d%6d%4c%s", | |
143 &d, &d, &d, type, name)) { | |
144 char * p; | |
145 | |
146 if (p = strchr(type, ' ')) | |
147 *p = 0; | |
148 | |
149 read->format = trace_type_str2int(type); | |
150 read->trace_name = (char *)xmalloc(strlen(name)+1); | |
151 if (read->trace_name) | |
152 strcpy(read->trace_name, name); | |
153 } | |
154 } | |
155 | |
156 else if (ch == '<') { | |
157 ch = fgetc(fp); | |
158 while (ch != '\n') { | |
159 *leftcp++ = ch; | |
160 ch = fgetc(fp); | |
161 } | |
162 } else if (ch == '>') { | |
163 ch = fgetc(fp); | |
164 while (ch != '\n') { | |
165 *rightcp++ = ch; | |
166 ch = fgetc(fp); | |
167 } | |
168 } else { | |
169 while(ch != '\n' && ch != EOF) | |
170 ch = fgetc(fp); | |
171 } | |
172 } else if (isprint(ch) && !isspace(ch)) { | |
173 read->base[read->NBases++] = ((ch)=='N') ? '-' : (ch); | |
174 } | |
175 | |
176 first = 0; | |
177 } | |
178 | |
179 *leftcp = *rightcp = 0; | |
180 | |
181 read->leftCutoff = strlen(leftc); | |
182 read->rightCutoff = read->leftCutoff + read->NBases + 1; | |
183 memmove(&read->base[read->leftCutoff], read->base, read->NBases); | |
184 memmove(read->base, leftc, read->leftCutoff); | |
185 memmove(&read->base[read->leftCutoff + read->NBases], | |
186 rightc, strlen(rightc)); | |
187 | |
188 read->NBases += read->leftCutoff + strlen(rightc); | |
189 read->base[read->NBases] = 0; | |
190 | |
191 xfree(leftc); | |
192 xfree(rightc); | |
193 | |
194 /* SUCCESS */ | |
195 return(read); | |
196 | |
197 /* FAILURE */ | |
198 bail_out: | |
199 if (read) | |
200 read_deallocate(read); | |
201 | |
202 return NULLRead; | |
203 } | |
204 | |
205 /* | |
206 * Read the plain format sequence with name `fn' into a Read structure. | |
207 * All printing characters (as defined by ANSII C `isprint') | |
208 * are accepted, but `N's are translated to `-'s. | |
209 * | |
210 * Returns: | |
211 * Read * - Success, the Read structure read. | |
212 * NULLRead - Failure. | |
213 */ | |
214 Read *read_pln(char *fn) { | |
215 FILE *fp; | |
216 Read *read; | |
217 | |
218 /* Open file */ | |
219 if ((fp = fopen(fn, "r")) == NULL) | |
220 return NULLRead; | |
221 | |
222 read = fread_pln(fp); | |
223 fclose(fp); | |
224 | |
225 if (read && read->trace_name == NULL && | |
226 (read->trace_name = (char *)xmalloc(strlen(fn)+1))) | |
227 strcpy(read->trace_name, fn); | |
228 | |
229 return read; | |
230 } | |
231 | |
232 | |
233 /* | |
234 * Write to a Plain file | |
235 */ | |
236 int fwrite_pln(FILE *fp, Read *read) { | |
237 int i, err = 0; | |
238 | |
239 for (i = 0; i < read->NBases; i += LINE_LENGTH) | |
240 if (-1 == fprintf(fp, "%.*s\n", | |
241 read->NBases - i > LINE_LENGTH | |
242 ? LINE_LENGTH : read->NBases - i, | |
243 &read->base[i])) | |
244 err = 1; | |
245 | |
246 return err ? -1 : 0; | |
247 } | |
248 | |
249 int write_pln(char *fn, Read *read) { | |
250 FILE *fp; | |
251 | |
252 if ((fp = fopen(fn,"w")) == NULL) | |
253 return -1; | |
254 | |
255 if (fwrite_pln(fp, read)) { | |
256 fclose(fp); | |
257 return -1; | |
258 } | |
259 | |
260 fclose(fp); | |
261 return 0; | |
262 } | |
263 |