comparison srf2fastq/io_lib-1.12.2/io_lib/seqIOABI.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 /*
2 * Copyright (c) Medical Research Council 1994. All rights reserved.
3 *
4 * Permission to use, copy, modify and distribute this software and its
5 * documentation for any purpose is hereby granted without fee, provided that
6 * this copyright and notice appears in all copies.
7 *
8 * This file was written by James Bonfield, Simon Dear, Rodger Staden,
9 * as part of the Staden Package at the MRC Laboratory of Molecular
10 * Biology, Hills Road, Cambridge, CB2 2QH, United Kingdom.
11 *
12 * MRC disclaims all warranties with regard to this software.
13 */
14
15 /*
16 * Title: seqIOABI
17 *
18 * File: seqIOABI.c
19 * Purpose: Reading (not writing) of ABI sequences
20 * Last update: Fri Sep 02, 1994
21 *
22 * Change log:
23 * 27/11/90 SD writeSeqABI() outputs header to sequence file:
24 * format: ;{noOfBases}{leftCutOff}{basesWritten}{type}{tracefile}
25 * eg: ; 867 45 383ABI a09b7.s1RES
26 * 28.11.90 SD put undesirables under STLOUIS compilation flag
27 * 11.12.90 SD new static function tail to find file name in path name
28 * 02.01.91 SD Merged with St.L version
29 * 15.01.91 SD New include added (opp.h)
30 * 30.07.91 SD Those ole FWO_ field blues
31 * 17.09.91 LFW changed STLOUIS compilation flag to SAVE_EDITS
32 * and AUTO_CLIP
33 * 25.10.91 SD Machine independant I/O...removed BIGENDIAN flag
34 * 21.07.92 LFW Added finding of primer position
35 * 11.11.92 LFW added section to actually check that the trace it
36 * is trying to open is an ALF file using traceType sub
37 * 10.11.92 SD FWO_ and S/N% interpretation. Comments for information
38 * window.
39 * 05-Jul-93 SD Added code to check base positions are in order and adjust
40 * them if they are not
41 * 02.09.94 JKB Change to use Read instead of Seq library.
42 */
43
44
45 /*
46 * In the absense of any better format to store our ABI data in we use
47 * the Read structure. Hence this module should be considered part of the
48 * Read libary.
49 *
50 * This library also requires use of the mach-io code for the endian
51 * independent machine IO.
52 *
53 * The ABI results file is controlled by an index found towards
54 * the end --- this is pointed to by a longword found at `IndexPO'.
55 * The index consists of a number of entries, each of which is
56 * four character label followed by 6 long words. The first of these
57 * long words holds a simple count (starting at 1) for those cases
58 * where there are multiple entries with the same label. Entries should
59 * be found by label (and count), rather than their index position,
60 * because entries can be ommited or new ones added. This happens when
61 * ABI changes the version of their software and also depending
62 * on whether the data was analysed or unalaysed. We do, however,
63 * make assumptions about the relative order of entries.
64 *
65 * Ideally we would have a separate module which provides a number
66 * of functions to extract the data we are interested in, keeping
67 * the ABI format well wrapped up and out of harms way.
68 *
69 * Note that we are relying on the endian-ness of the machine being
70 * appropriate so we can just read long words in as integers. This
71 * should be recoded to deal with running on different endians.
72 */
73
74
75
76
77 /* ---- Imports ---- */
78
79 #include <stdio.h>
80 #include <stdlib.h>
81 #include <ctype.h>
82 #include <string.h>
83 #include <time.h>
84
85 #include "io_lib/stdio_hack.h"
86
87 #include "io_lib/seqIOABI.h"
88 #include "io_lib/Read.h"
89 #include "io_lib/abi.h"
90 #include "io_lib/fpoint.h" /* IMPORT: int_to_float */
91 #include "io_lib/mach-io.h"
92 #include "io_lib/xalloc.h"
93 #include "io_lib/misc.h"
94
95 /* ---- Constants ---- */
96
97 #define BasesPerLine 50 /* For output formatting */
98
99 #define baseIndex(B) ((B)=='C'?0:(B)=='A'?1:(B)=='G'?2:3)
100
101 static int header_fudge = 0;
102
103 /* DATA block numbers for traces, in order of FWO_ */
104 static int DataCount[4] = {9, 10, 11, 12};
105
106 int dump_labels(FILE *fp, off_t indexO) {
107 off_t entryNum = -1;
108 uint_4 entryLabel, entryLw1;
109
110 do {
111 entryNum++;
112
113 if (fseek(fp, header_fudge+indexO+(entryNum*IndexEntryLength), 0) != 0)
114 return 0;
115
116 if (!be_read_int_4(fp, &entryLabel))
117 return 0;
118
119 if (!be_read_int_4(fp, &entryLw1))
120 return 0;
121
122 if (entryLabel) {
123 unsigned char c1, c2, c3, c4;
124
125 c1 = (entryLabel >> 24) & 0xff;
126 c2 = (entryLabel >> 16) & 0xff;
127 c3 = (entryLabel >> 8) & 0xff;
128 c4 = (entryLabel >> 0) & 0xff;
129
130 if (!isprint(c1))
131 break;
132
133 printf("%c%c%c%c %d\n", c1, c2, c3, c4, entryLw1);
134 }
135 } while (entryLabel);
136
137 return 0;
138 }
139
140 /*
141 * From the ABI results file connected to `fp' whose index starts
142 * at byte offset `indexO', return in `val' the `lw'th long word
143 * from the `count'th entry labelled `label'.
144 * The result is 0 for failure, or index offset for success.
145 */
146 int getABIIndexEntryLW(FILE *fp, off_t indexO,
147 uint_4 label, uint_4 count, int lw,
148 uint_4 *val) {
149 off_t entryNum=-1;
150 int i;
151 uint_4 entryLabel, entryLw1;
152
153 do {
154 entryNum++;
155
156 if (fseek(fp, header_fudge+indexO+(entryNum*IndexEntryLength), 0) != 0)
157 return 0;
158
159 if (!be_read_int_4(fp, &entryLabel))
160 return 0;
161
162 if (!be_read_int_4(fp, &entryLw1))
163 return 0;
164 } while (!(entryLabel == label && entryLw1 == count));
165
166 for(i=2; i<=lw; i++)
167 if (!be_read_int_4(fp, val))
168 return 0;
169
170 return indexO+(entryNum*IndexEntryLength);
171 }
172
173 /*
174 * From the ABI results file connected to `fp' whose index starts
175 * at byte offset `indexO', return in `val' the `sw'th short word
176 * from the `count'th entry labelled `label'.
177 * The result is 0 for failure, or index offset for success.
178 */
179 int getABIIndexEntrySW(FILE *fp, off_t indexO,
180 uint_4 label, uint_4 count, int sw,
181 uint_2 *val) {
182 off_t entryNum=-1;
183 int i;
184 uint_4 entryLabel, entryLw1;
185
186 do {
187 entryNum++;
188
189 if (fseek(fp, header_fudge+indexO+(entryNum*IndexEntryLength), 0) != 0)
190 return 0;
191
192 if (!be_read_int_4(fp, &entryLabel))
193 return 0;
194
195 if (!be_read_int_4(fp, &entryLw1))
196 return 0;
197 } while (!(entryLabel == label && entryLw1 == count));
198
199 for(i=4; i<=sw; i++)
200 if (!be_read_int_2(fp, val))
201 return 0;
202
203 return indexO+(entryNum*IndexEntryLength);
204 }
205
206
207 /*
208 * Gets the offset of the ABI index.
209 * Returns -1 for failure, 0 for success.
210 */
211 int getABIIndexOffset(FILE *fp, uint_4 *indexO) {
212 uint_4 magic;
213
214 /*
215 * Initialise header_fudge.
216 *
217 * This is usually zero, but maybe we've transfered a file in MacBinary
218 * format in which case we'll have an extra 128 bytes to add to all
219 * our fseeks.
220 */
221 rewind(fp);
222 be_read_int_4(fp, &magic);
223 header_fudge = (magic == ABI_MAGIC ? 0 : 128);
224
225 if ((fseek(fp, header_fudge + IndexPO, 0) != 0) ||
226 (!be_read_int_4(fp, indexO)))
227 return -1;
228 else
229 return 0;
230 }
231
232 /*
233 * Get an "ABI String". These strings are either pointed to by the index
234 * offset, or held in the offset itself when the string is <= 4 characters.
235 * The "type" of the index entry is either 0x12 (a pascal string in which
236 * case the first byte of the string determines its length) or a 0x02 (a
237 * C-style string with length coming from the abi index).
238 *
239 * "string" will be max 256 bytes for the pascal string, but is of unknown
240 * (and hence potentially buggy) length for C-strings. For now we live with
241 * it as this entire file needs rewriting from scratch anyway.
242 *
243 * Returns -1 for failure, string length for success.
244 */
245 int getABIString(FILE *fp, off_t indexO, uint_4 label, uint_4 count,
246 char *string) {
247 uint_4 off;
248 uint_4 len;
249 uint_2 type;
250
251 off = getABIIndexEntrySW(fp, indexO, label, count, 4, &type);
252 if (!off)
253 return -1;
254
255 if (off = getABIIndexEntryLW(fp, indexO, label, count, 4, &len)) {
256 uint_1 len2;
257
258 if (!len)
259 return 0;
260
261 /* Determine offset */
262 if (len <= 4)
263 off += 20;
264 else
265 getABIIndexEntryLW(fp, indexO, label, count, 5, &off);
266
267 /* Read length byte */
268 if (type == 0x12) {
269 fseek(fp, header_fudge + off, 0);
270 be_read_int_1(fp, &len2);
271 } else {
272 len2 = len;
273 }
274
275 /* Read data (max 255 bytes) */
276 fread(string, len2, 1, fp);
277 string[len2] = 0;
278
279 return len2;
280 } else
281 return -1;
282 }
283
284 static void replace_nl(char *string) {
285 char *cp;
286
287 for (cp = string; *cp; cp++) {
288 if (*cp == '\n') *cp = ' ';
289 }
290 }
291
292
293 /*
294 * Get an "ABI Int_1". This is raw 1-byte integer data pointed to by the
295 * offset, or held in the offset itself when the data is <= 4 characters.
296 *
297 * If indexO is 0 then we do not search for (or indeed use) label and count,
298 * but simply assume that we are already at the correct offset and read from
299 * here. (NB: This negates the length <= 4 check.)
300 *
301 * Returns -1 for failure, length desired for success (it'll only fill out
302 * up to max_data_len elements, but it gives an indication of whether there
303 * was more to come).
304 */
305 int getABIint1(FILE *fp, off_t indexO, uint_4 label, uint_4 count,
306 uint_1 *data, int max_data_len) {
307 uint_4 off;
308 uint_4 len, len2;
309
310 if (indexO) {
311 if (!(off = getABIIndexEntryLW(fp, indexO, label, count, 4, &len)))
312 return -1;
313
314 if (!len)
315 return 0;
316
317 /* Determine offset */
318 if (len <= 4)
319 off += 20;
320 else
321 getABIIndexEntryLW(fp, indexO, label, count, 5, &off);
322
323 len2 = MIN((uint_4)max_data_len, len);
324
325 fseek(fp, header_fudge + off, 0);
326 } else {
327 len = len2 = max_data_len;
328 }
329
330 fread(data, len2, 1, fp);
331
332 return len;
333 }
334
335 /*
336 * Get an "ABI Int_2". This is raw 2-byte integer data pointed to by the
337 * offset, or held in the offset itself when the data is <= 4 characters.
338 *
339 * Returns -1 for failure, length desired for success (it'll only fill out
340 * up to max_data_len elements, but it gives an indication of whether there
341 * was more to come).
342 */
343 int getABIint2(FILE *fp, off_t indexO, uint_4 label, uint_4 count,
344 uint_2 *data, int max_data_len) {
345 int len, l2;
346 int i;
347
348 len = getABIint1(fp, indexO, label, count, (uint_1 *)data, max_data_len*2);
349 if (-1 == len)
350 return -1;
351
352 len /= 2;
353 l2 = MIN(len, max_data_len);
354 for (i = 0; i < l2; i++) {
355 data[i] = be_int2(data[i]);
356 }
357
358 return len;
359 }
360
361 /*
362 * Get an "ABI Int_4". This is raw 4-byte integer data pointed to by the
363 * offset, or held in the offset itself when the data is <= 4 characters.
364 *
365 * Returns -1 for failure, length desired for success (it'll only fill out
366 * up to max_data_len elements, but it gives an indication of whether there
367 * was more to come).
368 */
369 int getABIint4(FILE *fp, off_t indexO, uint_4 label, uint_4 count,
370 uint_4 *data, int max_data_len) {
371 int len, l2;
372 int i;
373
374 len = getABIint1(fp, indexO, label, count, (uint_1 *)data, max_data_len*4);
375 if (-1 == len)
376 return -1;
377
378 len /= 4;
379 l2 = MIN(len, max_data_len);
380 for (i = 0; i < l2; i++) {
381 data[i] = be_int4(data[i]);
382 }
383
384 return len;
385 }
386
387 /*
388 * Change the DATA counts for fetching traces
389 */
390 void abi_set_data_counts(int f, int w, int o, int _) {
391 DataCount[0] = f;
392 DataCount[1] = w;
393 DataCount[2] = o;
394 DataCount[3] = _;
395 }
396
397 /*
398 * Put the DATA counts back to their defaults.
399 */
400 void abi_reset_data_counts(void) {
401 DataCount[0] = 9;
402 DataCount[1] = 10;
403 DataCount[2] = 11;
404 DataCount[3] = 12;
405 }
406
407 /*
408 * Read the ABI format sequence from FILE *fp into a Read structure.
409 * All printing characters (as defined by ANSII C `isprint')
410 * are accepted, but `N's are translated to `-'s. In this respect we
411 * are adhering (more or less) to the CSET_DEFAULT uncertainty code set.
412 *
413 * Returns:
414 * Read * - Success, the Read structure read.
415 * NULLRead - Failure.
416 */
417 Read *fread_abi(FILE *fp) {
418 Read *read = NULLRead;
419 int i;
420 float fspacing; /* average base spacing */
421 uint_4 numPoints, numBases;
422 uint_4 signalO;
423 int no_bases = 0;
424 int sections = read_sections(0);
425 uint_1 *conf;
426
427 uint_4 fwo_; /* base -> lane mapping */
428 uint_4 indexO; /* File offset where the index is */
429 uint_4 baseO; /* File offset where the bases are stored */
430 uint_4 dataCO; /* File offset where the C trace is stored */
431 uint_4 dataAO; /* File offset where the A trace is stored */
432 uint_4 dataGO; /* File offset where the G trace is stored */
433 uint_4 dataTO; /* File offset where the T trace is stored */
434 uint_4 offset; /* Generic offset */
435 uint_4 offset2; /* Generic offset */
436 uint_4 offset3; /* Generic offset */
437 uint_4 offset4; /* Generic offset */
438
439
440
441 /* Get the index offset */
442 if (-1 == getABIIndexOffset(fp, &indexO))
443 goto bail_out;
444
445 /* Get the number of points */
446 if (!getABIIndexEntryLW(fp,(off_t)indexO,DataEntryLabel,DataCount[0],
447 3,&numPoints))
448 goto bail_out;
449
450 /* Get the number of bases */
451 if (!getABIIndexEntryLW(fp,(off_t)indexO,BaseEntryLabel,1,3,&numBases)) {
452 no_bases = 1;
453 numBases = 0;
454 }
455
456
457 /* Allocate the sequence */
458 if (NULLRead == (read = read_allocate(numPoints, numBases)))
459 goto bail_out;
460
461 /* Get the Filter Wheel Order (FWO_) field ... */
462 if (!getABIIndexEntryLW(fp,(off_t)indexO,FWO_Label,1,5,&fwo_)) {
463 /* Guess at CAGT */
464 fwo_ = 0x43414754;
465 }
466
467 /*
468 * The order of the DATA fields is determined by the field FWO_
469 * Juggle around with data pointers to get it right
470 */
471 if (sections & READ_SAMPLES) {
472 uint_4 *dataxO[4];
473
474 dataxO[0] = &dataCO;
475 dataxO[1] = &dataAO;
476 dataxO[2] = &dataGO;
477 dataxO[3] = &dataTO;
478
479 /*Get the positions of the four traces */
480 if (!(getABIIndexEntryLW(fp, (off_t)indexO, DataEntryLabel,
481 DataCount[0], 5,
482 dataxO[baseIndex((char)(fwo_>>24&255))]) &&
483 getABIIndexEntryLW(fp, (off_t)indexO, DataEntryLabel,
484 DataCount[1], 5,
485 dataxO[baseIndex((char)(fwo_>>16&255))]) &&
486 getABIIndexEntryLW(fp, (off_t)indexO, DataEntryLabel,
487 DataCount[2], 5,
488 dataxO[baseIndex((char)(fwo_>>8&255))]) &&
489 getABIIndexEntryLW(fp, (off_t)indexO, DataEntryLabel,
490 DataCount[3], 5,
491 dataxO[baseIndex((char)(fwo_&255))]))) {
492 goto bail_out;
493 }
494 }
495
496
497 /*************************************************************
498 * Read the traces and bases information
499 *************************************************************/
500
501 if (sections & READ_SAMPLES) {
502 /* Read in the C trace */
503 if (fseek(fp, header_fudge + (off_t)dataCO, 0) == -1) goto bail_out;
504 getABIint2(fp, 0, 0, 0, read->traceC, read->NPoints);
505
506 /* Read in the A trace */
507 if (fseek(fp, header_fudge + (off_t)dataAO, 0) == -1) goto bail_out;
508 getABIint2(fp, 0, 0, 0, read->traceA, read->NPoints);
509
510 /* Read in the G trace */
511 if (fseek(fp, header_fudge + (off_t)dataGO, 0) == -1) goto bail_out;
512 getABIint2(fp, 0, 0, 0, read->traceG, read->NPoints);
513
514 /* Read in the T trace */
515 if (fseek(fp, header_fudge + (off_t)dataTO, 0) == -1) goto bail_out;
516 getABIint2(fp, 0, 0, 0, read->traceT, read->NPoints);
517
518 /* Compute highest trace peak */
519 for (i=0; i < read->NPoints; i++) {
520 if (read->maxTraceVal < read->traceA[i])
521 read->maxTraceVal = read->traceA[i];
522 if (read->maxTraceVal < read->traceC[i])
523 read->maxTraceVal = read->traceC[i];
524 if (read->maxTraceVal < read->traceG[i])
525 read->maxTraceVal = read->traceG[i];
526 if (read->maxTraceVal < read->traceT[i])
527 read->maxTraceVal = read->traceT[i];
528 }
529 }
530
531 if (no_bases || !(sections & READ_BASES))
532 goto skip_bases;
533
534 /* Read in base confidence values */
535 if (!(conf = (uint_1 *)xcalloc(sizeof(*conf), read->NBases)))
536 goto bail_out;
537 getABIint1(fp, indexO, BaseConfLabel, 1, conf, read->NBases);
538
539 /* Read in the bases */
540 if (!(getABIIndexEntryLW(fp, (off_t)indexO, BaseEntryLabel, 1, 5, &baseO)
541 && (fseek(fp, header_fudge + (off_t)baseO, 0) == 0) ))
542 goto bail_out;
543
544 for (i = 0; i < (read->NBases); i++) {
545 int ch;
546
547 if ((ch = fgetc(fp)) == EOF)
548 goto bail_out;
549
550 read->base[i] = (ch == 'N') ? '-' : (char)ch;
551 switch(read->base[i]) {
552 case 'A':
553 case 'a':
554 read->prob_A[i] = conf[i];
555 read->prob_C[i] = 0;
556 read->prob_G[i] = 0;
557 read->prob_T[i] = 0;
558 break;
559
560 case 'C':
561 case 'c':
562 read->prob_A[i] = 0;
563 read->prob_C[i] = conf[i];
564 read->prob_G[i] = 0;
565 read->prob_T[i] = 0;
566 break;
567
568 case 'G':
569 case 'g':
570 read->prob_A[i] = 0;
571 read->prob_C[i] = 0;
572 read->prob_G[i] = conf[i];
573 read->prob_T[i] = 0;
574 break;
575
576 case 'T':
577 case 't':
578 read->prob_A[i] = 0;
579 read->prob_C[i] = 0;
580 read->prob_G[i] = 0;
581 read->prob_T[i] = conf[i];
582 break;
583
584 default:
585 read->prob_A[i] = 0;
586 read->prob_C[i] = 0;
587 read->prob_G[i] = 0;
588 read->prob_T[i] = 0;
589 break;
590 }
591 }
592 read->base[i] = 0;
593 xfree(conf);
594
595
596 /* Read in the base positions */
597 if (-1 == getABIint2(fp, indexO, BasePosEntryLabel, 1, read->basePos,
598 read->NBases))
599 goto bail_out;
600
601 /*
602 * Check for corrupted traces where the bases are positioned on sample
603 * coordinates which do not exist. Witnessed on some MegaBACE files.
604 */
605 if (read->basePos[read->NBases-1] > read->NPoints) {
606 int n = read->basePos[read->NBases-1]+1;
607 read->traceA = (TRACE *)xrealloc(read->traceA, n * sizeof(TRACE));
608 read->traceC = (TRACE *)xrealloc(read->traceC, n * sizeof(TRACE));
609 read->traceG = (TRACE *)xrealloc(read->traceG, n * sizeof(TRACE));
610 read->traceT = (TRACE *)xrealloc(read->traceT, n * sizeof(TRACE));
611
612 if (read->traceA == NULL || read->traceC == NULL ||
613 read->traceC == NULL || read->traceG == NULL)
614 goto bail_out;
615
616 for (i = read->NPoints; i < n; i++) {
617 read->traceA[i] = 0;
618 read->traceC[i] = 0;
619 read->traceG[i] = 0;
620 read->traceT[i] = 0;
621 }
622 read->NPoints = n;
623 }
624
625 skip_bases:
626
627
628 /*************************************************************
629 * Gather useful information - the comments field
630 *************************************************************/
631 if (sections & READ_COMMENTS) {
632 char buffer[257];
633 char comment[8192], line[8192];
634 char commstr[256], *commstrp;
635 int clen;
636 int_4 spacing;
637 uint_2 i2;
638 uint_4 i4;
639
640 *comment = '\0';
641
642 /* The ABI comments */
643 clen = getABIString(fp, indexO, CMNTLabel, 1, commstr);
644 if (clen != -1) {
645 char *p;
646
647 commstr[clen] = 0;
648 commstrp = commstr;
649
650 do {
651 char line[300];
652
653 if (p = strchr(commstrp, '\n'))
654 *p++ = 0;
655
656 sprintf(line, "COMM=%s\n", commstrp);
657 strcat(comment, line);
658 } while(commstrp = p);
659 }
660
661
662 /* Get Sample Name Offset */
663 if (-1 != getABIString(fp, indexO, SMPLLabel, 1, buffer)) {
664 replace_nl(buffer);
665 sprintf(line, "NAME=%s\n", buffer);
666 strcat(comment, line);
667 }
668
669 /* LANE */
670 if (-1 != getABIint2(fp, indexO, LANELabel, 1, &i2, 1)) {
671 sprintf(line, "LANE=%d\n", i2);
672 strcat(comment, line);
673 }
674
675 /* Get Signal Strength Offset */
676 if (getABIIndexEntryLW(fp, (off_t)indexO, SignalEntryLabel, 1, 5,
677 &signalO)) {
678 int_2 C,A,G,T;
679 int_2 *base[4];
680 base[0] = &C;
681 base[1] = &A;
682 base[2] = &G;
683 base[3] = &T;
684
685 if (fseek(fp, header_fudge + (off_t)signalO, 0) != -1 &&
686 be_read_int_2(fp, (uint_2 *)
687 base[baseIndex((char)(fwo_>>24&255))]) &&
688 be_read_int_2(fp, (uint_2 *)
689 base[baseIndex((char)(fwo_>>16&255))]) &&
690 be_read_int_2(fp, (uint_2 *)
691 base[baseIndex((char)(fwo_>>8&255))]) &&
692 be_read_int_2(fp, (uint_2 *)
693 base[baseIndex((char)(fwo_&255))])) {
694 sprintf(line, "SIGN=A=%d,C=%d,G=%d,T=%d\n",
695 A, C, G, T);
696 strcat(comment, line);
697 }
698 }
699
700 /* Get the spacing.. it's a float but don't worry yet */
701 fspacing = 0;
702 if (-1 != getABIint4(fp, indexO, SpacingEntryLabel, 1,
703 (uint_4 *)&spacing, 1)) {
704 fspacing = int_to_float(spacing);
705 sprintf(line, "SPAC=%-6.2f\n", fspacing);
706 strcat(comment, line);
707 }
708 /* Correction for when spacing is negative. Why does this happen? */
709 if (fspacing <= 0) {
710 if (read->NBases > 1) {
711 if (sections & READ_BASES)
712 fspacing = (float)(read->basePos[read->NBases-1] -
713 read->basePos[0])
714 / (float) (read->NBases-1);
715 else
716 fspacing = (float) read->NPoints / (float) read->NBases;
717 } else {
718 fspacing = 1;
719 }
720 }
721
722
723 /* Get primer position */
724 if (getABIIndexEntryLW(fp, (off_t)indexO, PPOSLabel, 1, 5,
725 (uint_4 *)&i4)) {
726 /* ppos stores in MBShort of pointer */
727 sprintf(line, "PRIM=%d\n", (i4>>16));
728 strcat(comment, line);
729 }
730
731 /* RUND/RUNT */
732 if (getABIIndexEntryLW(fp, (off_t)indexO, RUNDLabel, 1, 5, &offset) &&
733 getABIIndexEntryLW(fp, (off_t)indexO, RUNDLabel, 2, 5, &offset2) &&
734 getABIIndexEntryLW(fp, (off_t)indexO, RUNTLabel, 1, 5, &offset3) &&
735 getABIIndexEntryLW(fp, (off_t)indexO, RUNTLabel, 2, 5, &offset4)) {
736 char buffer[1025];
737 char buffer_s[1025];
738 char buffer_e[1025];
739 struct tm t;
740 uint_4 rund_s, rund_e, runt_s, runt_e;
741
742 rund_s = offset;
743 rund_e = offset2;
744 runt_s = offset3;
745 runt_e = offset4;
746
747 sprintf(buffer, "%04d%02d%02d.%02d%02d%02d - %04d%02d%02d.%02d%02d%02d",
748 rund_s >> 16, (rund_s >> 8) & 0xff, rund_s & 0xff,
749 runt_s >> 24, (runt_s >> 16) & 0xff, (runt_s >> 8) & 0xff,
750 rund_e >> 16, (rund_e >> 8) & 0xff, rund_e & 0xff,
751 runt_e >> 24, (runt_e >> 16) & 0xff, (runt_e >> 8) & 0xff);
752
753 memset(&t, 0, sizeof(t));
754 t.tm_mday = rund_s & 0xff;
755 t.tm_mon = ((rund_s >> 8) & 0xff) - 1;
756 t.tm_year = (rund_s >> 16) - 1900;
757 t.tm_hour = runt_s >> 24;
758 t.tm_min = (runt_s >> 16) & 0xff;
759 t.tm_sec = (runt_s >> 8) & 0xff;
760 t.tm_isdst = -1;
761 /*
762 * Convert struct tm to time_t. We ignore the time_t value, but
763 * the conversion process will update the tm_wday element of
764 * struct tm.
765 */
766 mktime(&t);
767 strftime(buffer_s, 1024, "%a %d %b %H:%M:%S %Y", &t);
768
769 t.tm_mday = rund_e & 0xff;
770 t.tm_mon = ((rund_e >> 8) & 0xff) - 1;
771 t.tm_year = (rund_e >> 16) - 1900;
772 t.tm_hour = runt_e >> 24;
773 t.tm_min = (runt_e >> 16) & 0xff;
774 t.tm_sec = (runt_e >> 8) & 0xff;
775 t.tm_isdst = -1;
776 /*
777 * Convert struct tm to time_t. We ignore the time_t value, but
778 * the conversion process will update the tm_wday element of
779 * struct tm.
780 */
781 mktime(&t);
782 strftime(buffer_e, 1024, "%a %d %b %H:%M:%S %Y", &t);
783
784 sprintf(line, "DATE=%s to %s\nRUND=%s\n",
785 buffer_s, buffer_e, buffer);
786 strcat(comment, line);
787 }
788
789
790 /* Get Dye Primer Offset */
791 if (-1 != getABIString(fp, indexO, PDMFLabel, 1, buffer)) {
792 replace_nl(buffer);
793 sprintf(line, "DYEP=%s\n", buffer);
794 strcat(comment, line);
795 }
796
797 /* Get Machine Name Offset */
798 if (-1 != getABIString(fp, indexO, MCHNLabel, 1, buffer)) {
799 replace_nl(buffer);
800 sprintf(line, "MACH=%s\n", buffer);
801 strcat(comment, line);
802 }
803
804 /* Machine model */
805 if (-1 != getABIString(fp, indexO, MODLLabel, 1, buffer)) {
806 replace_nl(buffer);
807 sprintf(line, "MODL=%s\n", buffer);
808 strcat(comment, line);
809 }
810
811 /* Matrix file */
812 if (-1 != getABIString(fp, indexO, MTXFLabel, 1, buffer)) {
813 replace_nl(buffer);
814 sprintf(line, "MTXF=%s\n", buffer);
815 strcat(comment, line);
816 }
817
818 /* Base calling version */
819 if (-1 != getABIString(fp, indexO, SPACLabel, 2, buffer)) {
820 replace_nl(buffer);
821 sprintf(line, "BCAL=%s\n", buffer);
822 strcat(comment, line);
823 }
824
825 /* Software versions */
826 if (-1 != getABIString(fp, indexO, SVERLabel, 1, buffer)) {
827 replace_nl(buffer);
828 sprintf(line, "VER1=%s\n", buffer);
829 strcat(comment, line);
830 }
831 if (-1 != getABIString(fp, indexO, SVERLabel, 2, buffer)) {
832 replace_nl(buffer);
833 sprintf(line, "VER2=%s\n", buffer);
834 strcat(comment, line);
835 }
836
837 /* Get Gel Name Offset */
838 if (-1 != getABIString(fp, indexO, GelNameLabel, 1, buffer)) {
839 replace_nl(buffer);
840 sprintf(line, "GELN=%s\n", buffer);
841 strcat(comment, line);
842 }
843
844 /* dumplicate string and set info */
845 {
846 char *s = (char *)xmalloc(strlen(comment)+1);
847 strcpy(s,comment);
848 read->info = s;
849 }
850 }
851
852
853
854 /*************************************************************
855 * Check base positions are in order
856 *************************************************************/
857 #if 0
858 /*
859 * Disable for now as the original ABI bug this is meant to fix shouldn't
860 * happen any more, and this has the effect of reordering bases where there
861 * are compressions (which is wrong to do).
862 */
863 if (sections & READ_SAMPLES) {
864 float pos;
865 int start;
866
867 for (i = 1; i < read->NBases; ) {
868 if (read->basePos[i] < read->basePos[i-1]) {
869 fprintf(stderr,"fread_abi(): Base positions are not in order. Fixing (%d=%d, %d=%d)\n", i-1, read->basePos[i-1], i, read->basePos[i]);
870
871 /* pass 1 - find end of region */
872 start = i - 1;
873 pos = (float) read->basePos[i-1] + fspacing;
874 for(;i < read->NBases && (int)read->basePos[i] < pos;i++) {
875 pos += fspacing;
876 }
877
878 /* calculate average base spacing */
879 if (i < read->NBases )
880 fspacing = ((float) read->basePos[i] -
881 (float) read->basePos[start]) /
882 (float)(i - start);
883
884 /* pass 2 - adjust */
885 i = start + 1;
886 pos = (float) read->basePos[i-1] + fspacing;
887 for(;i < read->NBases && (int)read->basePos[i] < pos;i++) {
888 read->basePos[i] = (int) pos;
889 pos += fspacing;
890 }
891
892 } else {
893 i++;
894 }
895 }
896 }
897 #endif
898
899
900 /* SUCCESS */
901
902 read->format = TT_ABI;
903 return(read);
904
905 /* FAILURE */
906 bail_out:
907 if (read)
908 read_deallocate(read);
909
910 return NULLRead;
911 }
912
913 /*
914 * Read the ABI format sequence from file 'fn' into a Read structure.
915 * All printing characters (as defined by ANSII C `isprint')
916 * are accepted, but `N's are translated to `-'s. In this respect we
917 * are adhering (more or less) to the CSET_DEFAULT uncertainty code set.
918 *
919 * Returns:
920 * Read * - Success, the Read structure read.
921 * NULLRead - Failure.
922 */
923 Read *read_abi(char *fn) {
924 Read *read;
925 FILE *fp;
926
927 /* Open file */
928 if ((fp = fopen(fn, "rb")) == NULL)
929 return NULLRead;
930
931 read = fread_abi(fp);
932 fclose(fp);
933
934 if (read && (read->trace_name = (char *)xmalloc(strlen(fn)+1)))
935 strcpy(read->trace_name, fn);
936
937 return read;
938 }
939
940 /*
941 * Write to an ABI file - unsupported.
942 */
943 /* ARGSUSED */
944 int write_abi(char *fn, Read *read) {
945 fprintf(stderr, "ABI write support is unavailable\n");
946 return -1;
947 }
948
949 /*
950 * Write to an ABI file - unsupported.
951 */
952 /* ARGSUSED */
953 int fwrite_abi(FILE *fp, Read *read) {
954 fprintf(stderr, "ABI write support is unavailable\n");
955 return -1;
956 }
957