comparison srf2fastq/io_lib-1.12.2/io_lib/read_scf.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 /*
2 * Copyright (c) Medical Research Council 1994. All rights reserved.
3 *
4 * Permission to use, copy, modify and distribute this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * this copyright and notice appears in all copies.
7 *
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
9 * as part of the Staden Package at the MRC Laboratory of Molecular
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
11 *
12 * MRC disclaims all warranties with regard to this software.
13 */
14
15 /*
16 Title: read_scf.c
17
18 Purpose: read IO of Standard Chromatogram Format sequences
19 Last update: August 18 1994
20
21 Change log:
22 4 Feb 1992, Now draft proposal version 2
23 20 Feb 1992, Grab info from comment lines
24 19 Aug 1992, If SCF file has clip information, don't clip automatically
25 10 Nov 1992 SCF comments now stored in seq data structure
26 18 Aug 1994 Renamed from ReadIOSCF.c; now purely SCF IO (no Seq structs)
27
28 */
29
30 /* ---- Imports ---- */
31
32 #include <ctype.h>
33 #include <stdio.h> /* IMPORT: fopen, fclose, fseek, ftell, fgetc,
34 EOF */
35 #include <stdlib.h>
36 #include <string.h>
37 #include <sys/types.h>
38
39 #include "io_lib/mach-io.h"
40 #include "io_lib/xalloc.h"
41 #include "io_lib/compress.h"
42 #include "io_lib/Read.h"
43
44 #include "io_lib/stdio_hack.h"
45 #include "io_lib/scf.h" /* SCF structures */
46
47
48 /* SunOS4 has it's definitions in unistd, which we won't include for compat. */
49 #ifndef SEEK_SET
50 #define SEEK_SET 0
51 #define SEEK_CUR 1
52 #define SEEK_END 2
53 #endif
54
55 /* ---- Exported functions ---- */
56
57 int read_scf_header(FILE *fp, Header *h)
58 {
59 int i;
60
61 if (be_read_int_4(fp,&h->magic_number)==False) return -1;
62
63 if (h->magic_number != SCF_MAGIC)
64 return -1;
65
66 if (be_read_int_4(fp,&h->samples)==False) return -1;
67 if (be_read_int_4(fp,&h->samples_offset)==False) return -1;
68 if (be_read_int_4(fp,&h->bases)==False) return -1;
69 if (be_read_int_4(fp,&h->bases_left_clip)==False) return -1;
70 if (be_read_int_4(fp,&h->bases_right_clip)==False) return -1;
71 if (be_read_int_4(fp,&h->bases_offset)==False) return -1;
72 if (be_read_int_4(fp,&h->comments_size)==False) return -1;
73 if (be_read_int_4(fp,&h->comments_offset)==False) return -1;
74 if (fread(&h->version[0],sizeof(h->version),1,fp)!=1) return -1;
75 if (be_read_int_4(fp,&h->sample_size)==False) return -1;
76 if (be_read_int_4(fp,&h->code_set)==False) return -1;
77 if (be_read_int_4(fp,&h->private_size)==False) return -1;
78 if (be_read_int_4(fp,&h->private_offset)==False) return -1;
79 for (i=0;i<18;i++)
80 if (be_read_int_4(fp,&h->spare[i])==False) return -1;
81
82 return 0;
83 }
84
85
86 int read_scf_sample1(FILE *fp, Samples1 *s)
87 {
88 uint_1 buf[4];
89
90 if (4 != fread(buf, 1, 4, fp)) return -1;
91 s->sample_A = buf[0];
92 s->sample_C = buf[1];
93 s->sample_G = buf[2];
94 s->sample_T = buf[3];
95
96 /*
97 if (1 != fread(s, 4, 1, fp)) return -1;
98 */
99
100 return 0;
101 }
102
103
104 int read_scf_sample2(FILE *fp, Samples2 *s)
105 {
106 uint_2 buf[4];
107
108 if (4 != fread(buf, 2, 4, fp)) return -1;
109 s->sample_A = be_int2(buf[0]);
110 s->sample_C = be_int2(buf[1]);
111 s->sample_G = be_int2(buf[2]);
112 s->sample_T = be_int2(buf[3]);
113
114 return 0;
115 }
116
117 int read_scf_samples1(FILE *fp, Samples1 *s, size_t num_samples) {
118 size_t i;
119
120 for (i = 0; i < num_samples; i++) {
121 if (-1 == read_scf_sample1(fp, &(s[i])))
122 return -1;
123 }
124
125 return 0;
126 }
127
128
129 int read_scf_samples2(FILE *fp, Samples2 *s, size_t num_samples) {
130 size_t i;
131
132 for (i = 0; i < num_samples; i++) {
133 if (-1 == read_scf_sample2(fp, &(s[i])))
134 return -1;
135 }
136
137 return 0;
138 }
139
140
141 int read_scf_samples32(FILE *fp, Samples2 *s, size_t num_samples) {
142 size_t i;
143 uint2 *samples_out;
144
145 /* version to read delta delta data in 2 bytes */
146
147 if ( ! (samples_out = (uint2 *)xmalloc((num_samples+1) *
148 sizeof(uint2)))) {
149 return -1;
150 }
151
152
153 if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
154 #ifdef SP_LITTLE_ENDIAN
155 for (i = 0; i < num_samples; i++) {
156 samples_out[i] = be_int2(samples_out[i]);
157 }
158 #endif
159 scf_delta_samples2 ( samples_out, num_samples, 0);
160 for (i = 0; i < num_samples; i++) {
161 (&s[i])->sample_A = samples_out[i];
162 }
163
164 if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
165 #ifdef SP_LITTLE_ENDIAN
166 for (i = 0; i < num_samples; i++) {
167 samples_out[i] = be_int2(samples_out[i]);
168 }
169 #endif
170 scf_delta_samples2 ( samples_out, num_samples, 0);
171 for (i = 0; i < num_samples; i++) {
172 (&s[i])->sample_C = samples_out[i];
173 }
174
175 if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
176 #ifdef SP_LITTLE_ENDIAN
177 for (i = 0; i < num_samples; i++) {
178 samples_out[i] = be_int2(samples_out[i]);
179 }
180 #endif
181 scf_delta_samples2 ( samples_out, num_samples, 0);
182 for (i = 0; i < num_samples; i++) {
183 (&s[i])->sample_G = samples_out[i];
184 }
185
186 if (num_samples != fread(samples_out, 2, num_samples, fp)) return -1;
187 #ifdef SP_LITTLE_ENDIAN
188 for (i = 0; i < num_samples; i++) {
189 samples_out[i] = be_int2(samples_out[i]);
190 }
191 #endif
192 scf_delta_samples2 ( samples_out, num_samples, 0);
193 for (i = 0; i < num_samples; i++) {
194 (&s[i])->sample_T = samples_out[i];
195 }
196 xfree(samples_out);
197 return 0;
198 }
199
200 int read_scf_samples31(FILE *fp, Samples1 *s, size_t num_samples) {
201 size_t i;
202 int1 *samples_out;
203
204 /* version to read delta delta data in 1 byte */
205
206 if ( ! (samples_out = (int1 *)xmalloc((num_samples+1) *
207 sizeof(int1)))) {
208 return -1;
209 }
210
211 if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
212 scf_delta_samples1 ( samples_out, num_samples, 0);
213 for (i = 0; i < num_samples; i++) {
214 (&s[i])->sample_A = samples_out[i];
215 }
216
217 if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
218 scf_delta_samples1 ( samples_out, num_samples, 0);
219 for (i = 0; i < num_samples; i++) {
220 (&s[i])->sample_C = samples_out[i];
221 }
222
223 if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
224 scf_delta_samples1 ( samples_out, num_samples, 0);
225 for (i = 0; i < num_samples; i++) {
226 (&s[i])->sample_G = samples_out[i];
227 }
228
229 if (num_samples != fread(samples_out, 1, num_samples, fp)) return -1;
230 scf_delta_samples1 ( samples_out, num_samples, 0);
231 for (i = 0; i < num_samples; i++) {
232 (&s[i])->sample_T = samples_out[i];
233 }
234
235 xfree(samples_out);
236 return 0;
237 }
238
239 int read_scf_base(FILE *fp, Bases *b)
240 {
241 uint_1 buf[12];
242
243 if (1 != fread(buf, 12, 1, fp)) return -1;
244 b->peak_index = be_int4(((uint_4 *)buf)[0]);
245 b->prob_A = buf[4];
246 b->prob_C = buf[5];
247 b->prob_G = buf[6];
248 b->prob_T = buf[7];
249 b->base = buf[8];
250 b->spare[0] = buf[9];
251 b->spare[1] = buf[10];
252 b->spare[2] = buf[11];
253
254 return 0;
255 }
256
257
258 int read_scf_bases(FILE *fp, Bases *b, size_t num_bases) {
259 size_t i;
260
261 for (i = 0; i < num_bases; i++) {
262 if (-1 == read_scf_base(fp, &(b[i])))
263 return -1;
264 }
265
266 return 0;
267 }
268
269 int read_scf_bases3(FILE *fp, Bases *b, size_t num_bases)
270 {
271 size_t i;
272 uint_4 *buf4;
273 uint_1 *buf1;
274
275 if (NULL == (buf4 = (uint_4 *)xmalloc(1 + 4 * num_bases)))
276 return -1;
277
278 if (NULL == (buf1 = (uint_1 *)xmalloc(1 + 8 * num_bases))) {
279 xfree(buf4);
280 return -1;
281 }
282
283 if (num_bases != fread(buf4, 4, num_bases, fp)) return -1;
284 for (i=0; i < num_bases; i++)
285 (&b[i])->peak_index = be_int4(buf4[i]);
286
287 if (8 * num_bases != fread(buf1, 1, 8 * num_bases, fp)) return -1;
288
289 for (i=0; i < num_bases; i++) {
290 (&b[i])->prob_A = buf1[i];
291 (&b[i])->prob_C = buf1[i+num_bases];
292 (&b[i])->prob_G = buf1[i+2*num_bases];
293 (&b[i])->prob_T = buf1[i+3*num_bases];
294 (&b[i])->base = buf1[i+4*num_bases];
295 (&b[i])->spare[0] = buf1[i+5*num_bases];
296 (&b[i])->spare[1] = buf1[i+6*num_bases];
297 (&b[i])->spare[2] = buf1[i+7*num_bases];
298 }
299
300 xfree(buf4);
301 xfree(buf1);
302
303 return 0;
304 }
305
306
307
308 int read_scf_comment(FILE *fp, Comments *c, size_t s)
309 {
310 if (fread(c, 1, s, fp) != s) return -1;
311
312 return 0;
313 }
314
315
316 /*
317 * Read the SCF format sequence from FILE *fp into a 'scf' structure.
318 * A NULL result indicates failure.
319 */
320 Scf *fread_scf(FILE *fp) {
321 Scf *scf;
322 Header h;
323 int err;
324 float scf_version;
325 int sections = read_sections(0);
326
327 /* Read header */
328 if (read_scf_header(fp, &h) == -1) {
329 return NULL;
330 }
331
332 /* Allocate memory */
333 if (NULL == (scf = scf_allocate(h.samples, h.sample_size,
334 h.bases, h.comments_size,
335 h.private_size)))
336 return NULL;
337
338 /* fake things for older style SCF -- SD */
339 if (h.sample_size != 1 && h.sample_size != 2) h.sample_size = 1;
340
341 scf_version = scf_version_str2float(h.version);
342
343 memcpy(&scf->header, &h, sizeof(Header));
344
345 if (sections & READ_SAMPLES) {
346 /* Read samples */
347 if (fseek(fp, (off_t)h.samples_offset, 0 /* SEEK_SET */) != 0) {
348 scf_deallocate(scf);
349 return NULL;
350 }
351
352 if ( 2.9 > scf_version ) {
353
354 if (h.sample_size == 1) {
355 err= read_scf_samples1(fp, scf->samples.samples1, h.samples);
356 }
357 else {
358 err= read_scf_samples2(fp, scf->samples.samples2, h.samples);
359 }
360 }
361 else {
362
363 if (h.sample_size == 1) {
364 err= read_scf_samples31(fp, scf->samples.samples1, h.samples);
365 }
366 else {
367 err= read_scf_samples32(fp, scf->samples.samples2, h.samples);
368 }
369 }
370 if (-1 == err) {
371 scf_deallocate(scf);
372 return NULL;
373 }
374 }
375
376 if (sections & READ_BASES) {
377 /* Read bases */
378 if (fseek(fp, (off_t)h.bases_offset, 0 /* SEEK_SET */) != 0) {
379 scf_deallocate(scf);
380 return NULL;
381 }
382
383 if ( 2.9 > scf_version ) {
384
385 if (-1 == read_scf_bases(fp, scf->bases, h.bases)) {
386 scf_deallocate(scf);
387 return NULL;
388 }
389 }
390 else {
391
392 if (-1 == read_scf_bases3(fp, scf->bases, h.bases)) {
393 scf_deallocate(scf);
394 return NULL;
395 }
396 }
397 }
398
399 if (sections & READ_COMMENTS) {
400 /* Read comments */
401 if (scf->comments) {
402 if (fseek(fp,(off_t)(h.comments_offset), 0) != 0
403 || -1 == read_scf_comment(fp, scf->comments,
404 h.comments_size)) {
405 /*
406 * Was: "scf_deallocate(scf); return NULL;".
407 * We now simply clear the comments and gracefully continue.
408 */
409 fprintf(stderr, "Warning: SCF file had invalid comment field\n");
410 xfree(scf->comments);
411 scf->comments = NULL;
412 } else {
413 scf->comments[h.comments_size] = '\0';
414 }
415 }
416 }
417
418 /* Read private data */
419 if (h.private_size) {
420 if (-1 == fseek(fp, (off_t)(h.private_offset), 0) ||
421 h.private_size != fread(scf->private_data, 1, h.private_size, fp)){
422 scf_deallocate(scf);
423 return NULL;
424 }
425 }
426
427 return scf;
428 }
429
430 /*
431 * Read the SCF format sequence with name `fn' into a 'scf' structure.
432 * A NULL result indicates failure.
433 */
434 Scf *read_scf(char *fn) {
435 Scf *scf;
436
437 FILE *fp;
438
439 /* Open fn for reading in binary mode */
440
441 if (NULL == (fp = fopen_compressed(fn, NULL)))
442 return NULL;
443
444 scf = fread_scf(fp);
445 fclose(fp);
446
447 return scf;
448 }