Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/io_lib/read_scf.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 /* | |
2 * Copyright (c) Medical Research Council 1994. All rights reserved. | |
3 * | |
4 * Permission to use, copy, modify and distribute this software and its | |
5 * documentation for any purpose is hereby granted without fee, provided that | |
6 * this copyright and notice appears in all copies. | |
7 * | |
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden, | |
9 * as part of the Staden Package at the MRC Laboratory of Molecular | |
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom. | |
11 * | |
12 * MRC disclaims all warranties with regard to this software. | |
13 */ | |
14 | |
15 /* | |
16 Title: read_scf.c | |
17 | |
18 Purpose: read IO of Standard Chromatogram Format sequences | |
19 Last update: August 18 1994 | |
20 | |
21 Change log: | |
22 4 Feb 1992, Now draft proposal version 2 | |
23 20 Feb 1992, Grab info from comment lines | |
24 19 Aug 1992, If SCF file has clip information, don't clip automatically | |
25 10 Nov 1992 SCF comments now stored in seq data structure | |
26 18 Aug 1994 Renamed from ReadIOSCF.c; now purely SCF IO (no Seq structs) | |
27 | |
28 */ | |
29 | |
30 /* ---- Imports ---- */ | |
31 | |
32 #include <ctype.h> | |
33 #include <stdio.h> /* IMPORT: fopen, fclose, fseek, ftell, fgetc, | |
34 EOF */ | |
35 #include <stdlib.h> | |
36 #include <string.h> | |
37 #include <sys/types.h> | |
38 | |
39 #include "io_lib/mach-io.h" | |
40 #include "io_lib/xalloc.h" | |
41 #include "io_lib/compress.h" | |
42 #include "io_lib/Read.h" | |
43 | |
44 #include "io_lib/stdio_hack.h" | |
45 #include "io_lib/scf.h" /* SCF structures */ | |
46 | |
47 | |
48 /* SunOS4 has it's definitions in unistd, which we won't include for compat. */ | |
49 #ifndef SEEK_SET | |
50 #define SEEK_SET 0 | |
51 #define SEEK_CUR 1 | |
52 #define SEEK_END 2 | |
53 #endif | |
54 | |
55 /* ---- Exported functions ---- */ | |
56 | |
57 int read_scf_header(FILE *fp, Header *h) | |
58 { | |
59 int i; | |
60 | |
61 if (be_read_int_4(fp,&h->magic_number)==False) return -1; | |
62 | |
63 if (h->magic_number != SCF_MAGIC) | |
64 return -1; | |
65 | |
66 if (be_read_int_4(fp,&h->samples)==False) return -1; | |
67 if (be_read_int_4(fp,&h->samples_offset)==False) return -1; | |
68 if (be_read_int_4(fp,&h->bases)==False) return -1; | |
69 if (be_read_int_4(fp,&h->bases_left_clip)==False) return -1; | |
70 if (be_read_int_4(fp,&h->bases_right_clip)==False) return -1; | |
71 if (be_read_int_4(fp,&h->bases_offset)==False) return -1; | |
72 if (be_read_int_4(fp,&h->comments_size)==False) return -1; | |
73 if (be_read_int_4(fp,&h->comments_offset)==False) return -1; | |
74 if (fread(&h->version[0],sizeof(h->version),1,fp)!=1) return -1; | |
75 if (be_read_int_4(fp,&h->sample_size)==False) return -1; | |
76 if (be_read_int_4(fp,&h->code_set)==False) return -1; | |
77 if (be_read_int_4(fp,&h->private_size)==False) return -1; | |
78 if (be_read_int_4(fp,&h->private_offset)==False) return -1; | |
79 for (i=0;i<18;i++) | |
80 if (be_read_int_4(fp,&h->spare[i])==False) return -1; | |
81 | |
82 return 0; | |
83 } | |
84 | |
85 | |
86 int read_scf_sample1(FILE *fp, Samples1 *s) | |
87 { | |
88 uint_1 buf[4]; | |
89 | |
90 if (4 != fread(buf, 1, 4, fp)) return -1; | |
91 s->sample_A = buf[0]; | |
92 s->sample_C = buf[1]; | |
93 s->sample_G = buf[2]; | |
94 s->sample_T = buf[3]; | |
95 | |
96 /* | |
97 if (1 != fread(s, 4, 1, fp)) return -1; | |
98 */ | |
99 | |
100 return 0; | |
101 } | |
102 | |
103 | |
104 int read_scf_sample2(FILE *fp, Samples2 *s) | |
105 { | |
106 uint_2 buf[4]; | |
107 | |
108 if (4 != fread(buf, 2, 4, fp)) return -1; | |
109 s->sample_A = be_int2(buf[0]); | |
110 s->sample_C = be_int2(buf[1]); | |
111 s->sample_G = be_int2(buf[2]); | |
112 s->sample_T = be_int2(buf[3]); | |
113 | |
114 return 0; | |
115 } | |
116 | |
117 int read_scf_samples1(FILE *fp, Samples1 *s, size_t num_samples) { | |
118 size_t i; | |
119 | |
120 for (i = 0; i < num_samples; i++) { | |
121 if (-1 == read_scf_sample1(fp, &(s[i]))) | |
122 return -1; | |
123 } | |
124 | |
125 return 0; | |
126 } | |
127 | |
128 | |
129 int read_scf_samples2(FILE *fp, Samples2 *s, size_t num_samples) { | |
130 size_t i; | |
131 | |
132 for (i = 0; i < num_samples; i++) { | |
133 if (-1 == read_scf_sample2(fp, &(s[i]))) | |
134 return -1; | |
135 } | |
136 | |
137 return 0; | |
138 } | |
139 | |
140 | |
141 int read_scf_samples32(FILE *fp, Samples2 *s, size_t num_samples) { | |
142 size_t i; | |
143 uint2 *samples_out; | |
144 | |
145 /* version to read delta delta data in 2 bytes */ | |
146 | |
147 if ( ! (samples_out = (uint2 *)xmalloc((num_samples+1) * | |
148 sizeof(uint2)))) { | |
149 return -1; | |
150 } | |
151 | |
152 | |
153 if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1; | |
154 #ifdef SP_LITTLE_ENDIAN | |
155 for (i = 0; i < num_samples; i++) { | |
156 samples_out[i] = be_int2(samples_out[i]); | |
157 } | |
158 #endif | |
159 scf_delta_samples2 ( samples_out, num_samples, 0); | |
160 for (i = 0; i < num_samples; i++) { | |
161 (&s[i])->sample_A = samples_out[i]; | |
162 } | |
163 | |
164 if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1; | |
165 #ifdef SP_LITTLE_ENDIAN | |
166 for (i = 0; i < num_samples; i++) { | |
167 samples_out[i] = be_int2(samples_out[i]); | |
168 } | |
169 #endif | |
170 scf_delta_samples2 ( samples_out, num_samples, 0); | |
171 for (i = 0; i < num_samples; i++) { | |
172 (&s[i])->sample_C = samples_out[i]; | |
173 } | |
174 | |
175 if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1; | |
176 #ifdef SP_LITTLE_ENDIAN | |
177 for (i = 0; i < num_samples; i++) { | |
178 samples_out[i] = be_int2(samples_out[i]); | |
179 } | |
180 #endif | |
181 scf_delta_samples2 ( samples_out, num_samples, 0); | |
182 for (i = 0; i < num_samples; i++) { | |
183 (&s[i])->sample_G = samples_out[i]; | |
184 } | |
185 | |
186 if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1; | |
187 #ifdef SP_LITTLE_ENDIAN | |
188 for (i = 0; i < num_samples; i++) { | |
189 samples_out[i] = be_int2(samples_out[i]); | |
190 } | |
191 #endif | |
192 scf_delta_samples2 ( samples_out, num_samples, 0); | |
193 for (i = 0; i < num_samples; i++) { | |
194 (&s[i])->sample_T = samples_out[i]; | |
195 } | |
196 xfree(samples_out); | |
197 return 0; | |
198 } | |
199 | |
200 int read_scf_samples31(FILE *fp, Samples1 *s, size_t num_samples) { | |
201 size_t i; | |
202 int1 *samples_out; | |
203 | |
204 /* version to read delta delta data in 1 byte */ | |
205 | |
206 if ( ! (samples_out = (int1 *)xmalloc((num_samples+1) * | |
207 sizeof(int1)))) { | |
208 return -1; | |
209 } | |
210 | |
211 if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1; | |
212 scf_delta_samples1 ( samples_out, num_samples, 0); | |
213 for (i = 0; i < num_samples; i++) { | |
214 (&s[i])->sample_A = samples_out[i]; | |
215 } | |
216 | |
217 if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1; | |
218 scf_delta_samples1 ( samples_out, num_samples, 0); | |
219 for (i = 0; i < num_samples; i++) { | |
220 (&s[i])->sample_C = samples_out[i]; | |
221 } | |
222 | |
223 if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1; | |
224 scf_delta_samples1 ( samples_out, num_samples, 0); | |
225 for (i = 0; i < num_samples; i++) { | |
226 (&s[i])->sample_G = samples_out[i]; | |
227 } | |
228 | |
229 if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1; | |
230 scf_delta_samples1 ( samples_out, num_samples, 0); | |
231 for (i = 0; i < num_samples; i++) { | |
232 (&s[i])->sample_T = samples_out[i]; | |
233 } | |
234 | |
235 xfree(samples_out); | |
236 return 0; | |
237 } | |
238 | |
239 int read_scf_base(FILE *fp, Bases *b) | |
240 { | |
241 uint_1 buf[12]; | |
242 | |
243 if (1 != fread(buf, 12, 1, fp)) return -1; | |
244 b->peak_index = be_int4(((uint_4 *)buf)[0]); | |
245 b->prob_A = buf[4]; | |
246 b->prob_C = buf[5]; | |
247 b->prob_G = buf[6]; | |
248 b->prob_T = buf[7]; | |
249 b->base = buf[8]; | |
250 b->spare[0] = buf[9]; | |
251 b->spare[1] = buf[10]; | |
252 b->spare[2] = buf[11]; | |
253 | |
254 return 0; | |
255 } | |
256 | |
257 | |
258 int read_scf_bases(FILE *fp, Bases *b, size_t num_bases) { | |
259 size_t i; | |
260 | |
261 for (i = 0; i < num_bases; i++) { | |
262 if (-1 == read_scf_base(fp, &(b[i]))) | |
263 return -1; | |
264 } | |
265 | |
266 return 0; | |
267 } | |
268 | |
269 int read_scf_bases3(FILE *fp, Bases *b, size_t num_bases) | |
270 { | |
271 size_t i; | |
272 uint_4 *buf4; | |
273 uint_1 *buf1; | |
274 | |
275 if (NULL == (buf4 = (uint_4 *)xmalloc(1 + 4 * num_bases))) | |
276 return -1; | |
277 | |
278 if (NULL == (buf1 = (uint_1 *)xmalloc(1 + 8 * num_bases))) { | |
279 xfree(buf4); | |
280 return -1; | |
281 } | |
282 | |
283 if (num_bases != fread(buf4, 4, num_bases, fp)) return -1; | |
284 for (i=0; i < num_bases; i++) | |
285 (&b[i])->peak_index = be_int4(buf4[i]); | |
286 | |
287 if (8 * num_bases != fread(buf1, 1, 8 * num_bases, fp)) return -1; | |
288 | |
289 for (i=0; i < num_bases; i++) { | |
290 (&b[i])->prob_A = buf1[i]; | |
291 (&b[i])->prob_C = buf1[i+num_bases]; | |
292 (&b[i])->prob_G = buf1[i+2*num_bases]; | |
293 (&b[i])->prob_T = buf1[i+3*num_bases]; | |
294 (&b[i])->base = buf1[i+4*num_bases]; | |
295 (&b[i])->spare[0] = buf1[i+5*num_bases]; | |
296 (&b[i])->spare[1] = buf1[i+6*num_bases]; | |
297 (&b[i])->spare[2] = buf1[i+7*num_bases]; | |
298 } | |
299 | |
300 xfree(buf4); | |
301 xfree(buf1); | |
302 | |
303 return 0; | |
304 } | |
305 | |
306 | |
307 | |
308 int read_scf_comment(FILE *fp, Comments *c, size_t s) | |
309 { | |
310 if (fread(c, 1, s, fp) != s) return -1; | |
311 | |
312 return 0; | |
313 } | |
314 | |
315 | |
316 /* | |
317 * Read the SCF format sequence from FILE *fp into a 'scf' structure. | |
318 * A NULL result indicates failure. | |
319 */ | |
320 Scf *fread_scf(FILE *fp) { | |
321 Scf *scf; | |
322 Header h; | |
323 int err; | |
324 float scf_version; | |
325 int sections = read_sections(0); | |
326 | |
327 /* Read header */ | |
328 if (read_scf_header(fp, &h) == -1) { | |
329 return NULL; | |
330 } | |
331 | |
332 /* Allocate memory */ | |
333 if (NULL == (scf = scf_allocate(h.samples, h.sample_size, | |
334 h.bases, h.comments_size, | |
335 h.private_size))) | |
336 return NULL; | |
337 | |
338 /* fake things for older style SCF -- SD */ | |
339 if (h.sample_size != 1 && h.sample_size != 2) h.sample_size = 1; | |
340 | |
341 scf_version = scf_version_str2float(h.version); | |
342 | |
343 memcpy(&scf->header, &h, sizeof(Header)); | |
344 | |
345 if (sections & READ_SAMPLES) { | |
346 /* Read samples */ | |
347 if (fseek(fp, (off_t)h.samples_offset, 0 /* SEEK_SET */) != 0) { | |
348 scf_deallocate(scf); | |
349 return NULL; | |
350 } | |
351 | |
352 if ( 2.9 > scf_version ) { | |
353 | |
354 if (h.sample_size == 1) { | |
355 err= read_scf_samples1(fp, scf->samples.samples1, h.samples); | |
356 } | |
357 else { | |
358 err= read_scf_samples2(fp, scf->samples.samples2, h.samples); | |
359 } | |
360 } | |
361 else { | |
362 | |
363 if (h.sample_size == 1) { | |
364 err= read_scf_samples31(fp, scf->samples.samples1, h.samples); | |
365 } | |
366 else { | |
367 err= read_scf_samples32(fp, scf->samples.samples2, h.samples); | |
368 } | |
369 } | |
370 if (-1 == err) { | |
371 scf_deallocate(scf); | |
372 return NULL; | |
373 } | |
374 } | |
375 | |
376 if (sections & READ_BASES) { | |
377 /* Read bases */ | |
378 if (fseek(fp, (off_t)h.bases_offset, 0 /* SEEK_SET */) != 0) { | |
379 scf_deallocate(scf); | |
380 return NULL; | |
381 } | |
382 | |
383 if ( 2.9 > scf_version ) { | |
384 | |
385 if (-1 == read_scf_bases(fp, scf->bases, h.bases)) { | |
386 scf_deallocate(scf); | |
387 return NULL; | |
388 } | |
389 } | |
390 else { | |
391 | |
392 if (-1 == read_scf_bases3(fp, scf->bases, h.bases)) { | |
393 scf_deallocate(scf); | |
394 return NULL; | |
395 } | |
396 } | |
397 } | |
398 | |
399 if (sections & READ_COMMENTS) { | |
400 /* Read comments */ | |
401 if (scf->comments) { | |
402 if (fseek(fp,(off_t)(h.comments_offset), 0) != 0 | |
403 || -1 == read_scf_comment(fp, scf->comments, | |
404 h.comments_size)) { | |
405 /* | |
406 * Was: "scf_deallocate(scf); return NULL;". | |
407 * We now simply clear the comments and gracefully continue. | |
408 */ | |
409 fprintf(stderr, "Warning: SCF file had invalid comment field\n"); | |
410 xfree(scf->comments); | |
411 scf->comments = NULL; | |
412 } else { | |
413 scf->comments[h.comments_size] = '\0'; | |
414 } | |
415 } | |
416 } | |
417 | |
418 /* Read private data */ | |
419 if (h.private_size) { | |
420 if (-1 == fseek(fp, (off_t)(h.private_offset), 0) || | |
421 h.private_size != fread(scf->private_data, 1, h.private_size, fp)){ | |
422 scf_deallocate(scf); | |
423 return NULL; | |
424 } | |
425 } | |
426 | |
427 return scf; | |
428 } | |
429 | |
430 /* | |
431 * Read the SCF format sequence with name `fn' into a 'scf' structure. | |
432 * A NULL result indicates failure. | |
433 */ | |
434 Scf *read_scf(char *fn) { | |
435 Scf *scf; | |
436 | |
437 FILE *fp; | |
438 | |
439 /* Open fn for reading in binary mode */ | |
440 | |
441 if (NULL == (fp = fopen_compressed(fn, NULL))) | |
442 return NULL; | |
443 | |
444 scf = fread_scf(fp); | |
445 fclose(fp); | |
446 | |
447 return scf; | |
448 } |