comparison srf2fastq/io_lib-1.12.2/io_lib/ztr_translate.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 #include <stdio.h>
2 #include <string.h>
3
4 #include "io_lib/ztr.h"
5 #include "io_lib/xalloc.h"
6 #include "io_lib/Read.h"
7
8 /* Return the A,C,G,T samples */
9 static char *ztr_encode_samples_4(ztr_t *z,
10 Read *r, int *nbytes, char **mdata,
11 int *mdbytes) {
12 char *bytes;
13 int i, j, t;
14
15 if (!r->NPoints)
16 return NULL;
17
18 if ((z->header.version_major > 1 ||
19 z->header.version_minor >= 2) && r->baseline) {
20 /* 1.2 onwards */
21 char buf[256];
22 int blen;
23 blen = sprintf(buf, "%d", r->baseline);
24 *mdata = (char *)malloc(6+blen);
25 *mdbytes = sprintf(*mdata, "OFFS%c%s", 0, buf) + 1;
26 } else {
27 *mdata = NULL;
28 *mdbytes = 0;
29 }
30
31 bytes = (char *)xmalloc(r->NPoints * sizeof(TRACE)*4 + 2);
32 for (i = 0, j = 2; i < r->NPoints; i++) {
33 t = r->traceA[i];
34 bytes[j++] = (t >> 8) & 0xff;
35 bytes[j++] = (t >> 0) & 0xff;
36 }
37 for (i = 0; i < r->NPoints; i++) {
38 t = r->traceC[i];
39 bytes[j++] = (t >> 8) & 0xff;
40 bytes[j++] = (t >> 0) & 0xff;
41 }
42 for (i = 0; i < r->NPoints; i++) {
43 t = r->traceG[i];
44 bytes[j++] = (t >> 8) & 0xff;
45 bytes[j++] = (t >> 0) & 0xff;
46 }
47 for (i = 0; i < r->NPoints; i++) {
48 t = r->traceT[i];
49 bytes[j++] = (t >> 8) & 0xff;
50 bytes[j++] = (t >> 0) & 0xff;
51 }
52 *nbytes = 4 * r->NPoints * sizeof(TRACE) + 2;
53
54 bytes[0] = ZTR_FORM_RAW;
55 bytes[1] = 0;
56 return bytes;
57 }
58
59 static void ztr_decode_samples_4(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
60 int i, j;
61 int maxTraceVal = 0;
62 TRACE sample;
63 unsigned char *bytes = (unsigned char *)chunk->data;
64 int nbytes = chunk->dlength;
65
66 bytes+=2;
67 nbytes-=2;
68
69 /* Store in the Read structure */
70 r->NPoints = nbytes/8;
71 if (r->traceA) xfree(r->traceA);
72 if (r->traceC) xfree(r->traceC);
73 if (r->traceG) xfree(r->traceG);
74 if (r->traceT) xfree(r->traceT);
75 r->traceA = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE));
76 r->traceC = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE));
77 r->traceG = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE));
78 r->traceT = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE));
79
80 for (i = j = 0; i < r->NPoints; i++, j+=2) {
81 sample = (bytes[j] << 8) | bytes[j+1];
82 r->traceA[i] = sample;
83 if (maxTraceVal < sample)
84 maxTraceVal = sample;
85 }
86 for (i = 0; i < r->NPoints; i++, j+=2) {
87 sample = (bytes[j] << 8) | bytes[j+1];
88 r->traceC[i] = sample;
89 if (maxTraceVal < sample)
90 maxTraceVal = sample;
91 }
92 for (i = 0; i < r->NPoints; i++, j+=2) {
93 sample = (bytes[j] << 8) | bytes[j+1];
94 r->traceG[i] = sample;
95 if (maxTraceVal < sample)
96 maxTraceVal = sample;
97 }
98 for (i = 0; i < r->NPoints; i++, j+=2) {
99 sample = (bytes[j] << 8) | bytes[j+1];
100 r->traceT[i] = sample;
101 if (maxTraceVal < sample)
102 maxTraceVal = sample;
103 }
104
105 r->maxTraceVal = maxTraceVal;
106 }
107
108 /* Return the [A,C,G,T] samples */
109 static char *ztr_encode_samples_common(ztr_t *z,
110 char ident[4], Read *r,
111 TRACE *data, int *nbytes,
112 char **mdata, int *mdbytes) {
113 char *bytes;
114 int i, j, t;
115
116 if (!r->NPoints)
117 return NULL;
118
119 if (z->header.version_major > 1 ||
120 z->header.version_minor >= 2) {
121 /* 1.2 onwards */
122 char buf[256];
123 int blen;
124 if (r->baseline) {
125 blen = sprintf(buf, "%d", r->baseline);
126 *mdata = (char *)malloc(16+blen);
127 *mdbytes = sprintf(*mdata, "TYPE%c%.*s%cOFFS%c%s",
128 0, 4, ident, 0,
129 0, buf) + 1;
130 } else {
131 *mdata = (char *)malloc(10);
132 *mdbytes = sprintf(*mdata, "TYPE%c%.*s", 0, 4, ident) + 1;
133 }
134 } else {
135 *mdata = (char *)malloc(4);
136 *mdbytes = 4;
137 (*mdata)[0] = ident[0];
138 (*mdata)[1] = ident[1];
139 (*mdata)[2] = ident[2];
140 (*mdata)[3] = ident[3];
141 }
142
143 bytes = (char *)xmalloc(r->NPoints * sizeof(TRACE) + 2);
144 for (i = 0, j = 2; i < r->NPoints; i++) {
145 t = data[i];
146 bytes[j++] = (t >> 8) & 0xff;
147 bytes[j++] = (t >> 0) & 0xff;
148 }
149 *nbytes = r->NPoints * sizeof(TRACE) + 2;
150
151 bytes[0] = ZTR_FORM_RAW;
152 bytes[1] = 0;
153 return bytes;
154 }
155
156
157 static char *ztr_encode_samples_A(ztr_t *z,
158 Read *r, int *nbytes, char **mdata,
159 int *mdbytes) {
160 return ztr_encode_samples_common(z, "A\0\0", r, r->traceA,
161 nbytes, mdata, mdbytes);
162 }
163
164 static char *ztr_encode_samples_C(ztr_t *z,
165 Read *r, int *nbytes, char **mdata,
166 int *mdbytes) {
167 return ztr_encode_samples_common(z, "C\0\0", r, r->traceC,
168 nbytes, mdata, mdbytes);
169 }
170
171 static char *ztr_encode_samples_G(ztr_t *z,
172 Read *r, int *nbytes, char **mdata,
173 int *mdbytes) {
174 return ztr_encode_samples_common(z, "G\0\0", r, r->traceG,
175 nbytes, mdata, mdbytes);
176 }
177
178 static char *ztr_encode_samples_T(ztr_t *z,
179 Read *r, int *nbytes, char **mdata,
180 int *mdbytes) {
181 return ztr_encode_samples_common(z, "T\0\0", r, r->traceT,
182 nbytes, mdata, mdbytes);
183 }
184
185 /* ARGSUSED */
186 static void ztr_decode_samples(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
187 int i, j;
188 int maxTraceVal = 0;
189 TRACE sample;
190 unsigned char *bytes = (unsigned char *)chunk->data;
191 int dlen = chunk->dlength;
192 TRACE **lane, *lanex;
193 char *type = ztr_lookup_mdata_value(z, chunk, "TYPE");
194
195 if (!type)
196 return;
197
198 switch(type[0]) {
199 case 'A':
200 lane = &r->traceA;
201 break;
202 case 'C':
203 lane = &r->traceC;
204 break;
205 case 'G':
206 lane = &r->traceG;
207 break;
208 case 'T':
209 lane = &r->traceT;
210 break;
211 default:
212 return;
213 }
214
215 bytes+=2;
216 dlen-=2;
217
218 /* Store in the Read structure */
219 r->NPoints = dlen/2;
220 if (*lane)
221 xfree(*lane);
222 lanex = *lane = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE));
223
224 for (i = j = 0; i < r->NPoints; i++, j+=2) {
225 sample = (bytes[j] << 8) | bytes[j+1];
226 lanex[i] = sample;
227 if (maxTraceVal < sample)
228 maxTraceVal = sample;
229 }
230
231 if (r->maxTraceVal < maxTraceVal)
232 r->maxTraceVal = maxTraceVal;
233 }
234
235 /* Encode the the base calls */
236 static char *ztr_encode_bases(ztr_t *z,
237 Read *r, int *nbytes, char **mdata,
238 int *mdbytes) {
239 char *bytes;
240
241 if (!r->NBases)
242 return NULL;
243
244 *mdata = NULL;
245 *mdbytes = 0;
246
247 bytes = (char *)xmalloc(r->NBases + 1);
248 memcpy(bytes+1, r->base, r->NBases);
249 *nbytes = r->NBases+1;
250
251 bytes[0] = ZTR_FORM_RAW;
252 return bytes;
253 }
254
255 static void ztr_decode_bases(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
256 char *bytes = chunk->data;
257 int nbytes = chunk->dlength;
258
259 nbytes--;
260 bytes++;
261
262 r->NBases = nbytes;
263 if (r->base) xfree(r->base);
264 r->base = (char *)xmalloc(r->NBases+1);
265 memcpy(r->base, bytes, r->NBases);
266 r->base[r->NBases] = 0;
267
268 /* Incase there isn't a clip chunk */
269 r->leftCutoff = 0;
270 r->rightCutoff = r->NBases+1;
271 }
272
273 /* Encode the base positions as 4 byte values */
274 static char *ztr_encode_positions(ztr_t *z,
275 Read *r, int *nbytes, char **mdata,
276 int *mdbytes) {
277 char *bytes;
278 int i, j;
279
280 if ((!r->NPoints && !r->nflows) || !r->basePos || !r->NBases)
281 return NULL;
282
283 *mdata = NULL;
284 *mdbytes = 0;
285
286 bytes = (char *)xmalloc(r->NBases * 4 + 4);
287 for (j = 4, i = 0; i < r->NBases; i++) {
288 /*
289 * First 2 bytes are zero as currently r->basePos is 16-bit.
290 *
291 * bytes[j++] = (r->basePos[i] >> 24) & 0xff;
292 * bytes[j++] = (r->basePos[i] >> 16) & 0xff;
293 */
294 bytes[j++] = 0;
295 bytes[j++] = 0;
296 bytes[j++] = (r->basePos[i] >> 8) & 0xff;
297 bytes[j++] = (r->basePos[i] >> 0) & 0xff;
298 }
299
300 bytes[0] = ZTR_FORM_RAW;
301 bytes[1] = 0; /* Dummy */
302 bytes[2] = 0; /* Dummy */
303 bytes[3] = 0; /* Dummy */
304
305 *nbytes = j;
306 return (char *)bytes;
307 }
308
309 static void ztr_decode_positions(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
310 int i, j;
311 unsigned char *bytes = (unsigned char *)chunk->data;
312 int nbytes = chunk->dlength;
313
314 bytes+=4;
315 nbytes-=4;
316
317 r->NBases = nbytes/4;
318 if (r->basePos) xfree(r->basePos);
319 r->basePos = (uint_2 *)xmalloc(r->NBases * sizeof(*r->basePos));
320
321 for (i = j = 0; j < nbytes; i++, j += 4) {
322 r->basePos[i] =
323 (bytes[j+0] << 24) +
324 (bytes[j+1] << 16) +
325 (bytes[j+2] << 8) +
326 (bytes[j+3] << 0);
327 }
328 }
329
330 /* Encode the main base confidence (called base) */
331 static char *ztr_encode_confidence_1(ztr_t *z,
332 Read *r, int *nbytes, char **mdata,
333 int *mdbytes)
334 {
335 char *bytes;
336 int i;
337
338 /* Check that we have any confidence values first */
339 if (!r->prob_A || !r->prob_C || !r->prob_G || !r->prob_T)
340 return NULL;
341
342 *mdata = NULL;
343 *mdbytes = 0;
344
345 /* Check that they're not all zero - will "normally" be quick */
346 for (i = 0; i < r->NBases; i++) {
347 if (r->prob_A[i]) break;
348 if (r->prob_C[i]) break;
349 if (r->prob_G[i]) break;
350 if (r->prob_T[i]) break;
351 }
352 if (i == r->NBases)
353 return NULL;
354
355 /* Memory allocation */
356 if (NULL == (bytes = xmalloc(r->NBases * sizeof(*bytes) + 1)))
357 return NULL;
358
359 /*
360 * Encode probs for called bases.
361 * Unknown base => average of prob_A, prob_C, prob_G and prob_T.
362 */
363 bytes++;
364 for (i = 0; i < r->NBases; i++) {
365 switch (r->base[i]) {
366 case 'A':
367 case 'a':
368 bytes[i] = r->prob_A[i];
369 break;
370 case 'C':
371 case 'c':
372 bytes[i] = r->prob_C[i];
373 break;
374 case 'G':
375 case 'g':
376 bytes[i] = r->prob_G[i];
377 break;
378 case 'T':
379 case 't':
380 bytes[i] = r->prob_T[i];
381 break;
382 default:
383 bytes[i] = (r->prob_A[i] + r->prob_C[i] +
384 r->prob_G[i] + r->prob_T[i]) / 4;
385 break;
386 }
387 }
388 bytes--;
389
390 *nbytes = r->NBases + 1;
391
392 bytes[0] = ZTR_FORM_RAW;
393 return bytes;
394 }
395
396 static int ztr_decode_confidence_1(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
397 char *bytes = chunk->data;
398 int nbytes = chunk->dlength;
399 int i;
400
401 bytes++;
402 nbytes--;
403
404 /* Unpack confidence values; depends on base calls */
405 if (!r->base)
406 return -1;
407
408 if (r->prob_A) xfree(r->prob_A);
409 if (r->prob_C) xfree(r->prob_C);
410 if (r->prob_G) xfree(r->prob_G);
411 if (r->prob_T) xfree(r->prob_T);
412 r->prob_A = (char *)xmalloc(r->NBases * sizeof(*r->prob_A));
413 r->prob_C = (char *)xmalloc(r->NBases * sizeof(*r->prob_C));
414 r->prob_G = (char *)xmalloc(r->NBases * sizeof(*r->prob_G));
415 r->prob_T = (char *)xmalloc(r->NBases * sizeof(*r->prob_T));
416
417 for (i = 0; i < r->NBases; i++) {
418 switch (r->base[i]) {
419 case 'A':
420 case 'a':
421 r->prob_A[i] = bytes[i];
422 r->prob_C[i] = 0;
423 r->prob_G[i] = 0;
424 r->prob_T[i] = 0;
425 break;
426 case 'C':
427 case 'c':
428 r->prob_A[i] = 0;
429 r->prob_C[i] = bytes[i];
430 r->prob_G[i] = 0;
431 r->prob_T[i] = 0;
432 break;
433 case 'G':
434 case 'g':
435 r->prob_A[i] = 0;
436 r->prob_C[i] = 0;
437 r->prob_G[i] = bytes[i];
438 r->prob_T[i] = 0;
439 break;
440 case 'T':
441 case 't':
442 r->prob_A[i] = 0;
443 r->prob_C[i] = 0;
444 r->prob_G[i] = 0;
445 r->prob_T[i] = bytes[i];
446 break;
447 default:
448 r->prob_A[i] = bytes[i];
449 r->prob_C[i] = bytes[i];
450 r->prob_G[i] = bytes[i];
451 r->prob_T[i] = bytes[i];
452 }
453 }
454
455 return 0;
456 }
457
458 /* Encode the four main base confidences */
459 static char *ztr_encode_confidence_4(ztr_t *z,
460 Read *r, int *nbytes, char **mdata,
461 int *mdbytes)
462 {
463 char *bytes;
464 int i, j;
465
466 /* Check that we have any confidence values first */
467 if (!r->prob_A || !r->prob_C || !r->prob_G || !r->prob_T)
468 return NULL;
469
470 *mdata = NULL;
471 *mdbytes = 0;
472
473 /* Check that they're not all zero - will "normally" be quick */
474 for (i = 0; i < r->NBases; i++) {
475 if (r->prob_A[i]) break;
476 if (r->prob_C[i]) break;
477 if (r->prob_G[i]) break;
478 if (r->prob_T[i]) break;
479 }
480 if (i == r->NBases)
481 return NULL;
482
483 /* Memory allocation */
484 if (NULL == (bytes = xmalloc(4 * r->NBases * sizeof(*bytes) + 1)))
485 return NULL;
486
487 /*
488 * Encode probs for called bases first
489 * Unknown base = 'T'.
490 */
491 j = r->NBases;
492 bytes++;
493 for (i = 0; i < r->NBases; i++) {
494 switch (r->base[i]) {
495 case 'A':
496 case 'a':
497 bytes[i ] = r->prob_A[i];
498 bytes[j++] = r->prob_C[i];
499 bytes[j++] = r->prob_G[i];
500 bytes[j++] = r->prob_T[i];
501 break;
502 case 'C':
503 case 'c':
504 bytes[j++] = r->prob_A[i];
505 bytes[i ] = r->prob_C[i];
506 bytes[j++] = r->prob_G[i];
507 bytes[j++] = r->prob_T[i];
508 break;
509 case 'G':
510 case 'g':
511 bytes[j++] = r->prob_A[i];
512 bytes[j++] = r->prob_C[i];
513 bytes[i ] = r->prob_G[i];
514 bytes[j++] = r->prob_T[i];
515 break;
516 default:
517 bytes[j++] = r->prob_A[i];
518 bytes[j++] = r->prob_C[i];
519 bytes[j++] = r->prob_G[i];
520 bytes[i ] = r->prob_T[i];
521 break;
522 }
523 }
524 bytes--;
525
526 *nbytes = r->NBases * 4 + 1;
527
528 bytes[0] = ZTR_FORM_RAW;
529 return bytes;
530 }
531
532 static int ztr_decode_confidence_4(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
533 char *bytes = chunk->data;
534 int nbytes = chunk->dlength;
535 int i, j;
536
537 bytes++;
538 nbytes--;
539
540 /* Unpack confidence values; depends on base calls */
541 if (!r->base)
542 return -1;
543
544 if (r->prob_A) xfree(r->prob_A);
545 if (r->prob_C) xfree(r->prob_C);
546 if (r->prob_G) xfree(r->prob_G);
547 if (r->prob_T) xfree(r->prob_T);
548 r->prob_A = (char *)xmalloc(r->NBases * sizeof(*r->prob_A));
549 r->prob_C = (char *)xmalloc(r->NBases * sizeof(*r->prob_C));
550 r->prob_G = (char *)xmalloc(r->NBases * sizeof(*r->prob_G));
551 r->prob_T = (char *)xmalloc(r->NBases * sizeof(*r->prob_T));
552
553 j = r->NBases;
554 for (i = 0; i < r->NBases; i++) {
555 switch (r->base[i]) {
556 case 'A':
557 case 'a':
558 r->prob_A[i] = bytes[i];
559 r->prob_C[i] = bytes[j++];
560 r->prob_G[i] = bytes[j++];
561 r->prob_T[i] = bytes[j++];
562 break;
563 case 'C':
564 case 'c':
565 r->prob_A[i] = bytes[j++];
566 r->prob_C[i] = bytes[i];
567 r->prob_G[i] = bytes[j++];
568 r->prob_T[i] = bytes[j++];
569 break;
570 case 'G':
571 case 'g':
572 r->prob_A[i] = bytes[j++];
573 r->prob_C[i] = bytes[j++];
574 r->prob_G[i] = bytes[i];
575 r->prob_T[i] = bytes[j++];
576 break;
577 default:
578 r->prob_A[i] = bytes[j++];
579 r->prob_C[i] = bytes[j++];
580 r->prob_G[i] = bytes[j++];
581 r->prob_T[i] = bytes[i];
582 break;
583 }
584 }
585
586 return 0;
587 }
588
589 /* Encode the textual comments */
590 static char *ztr_encode_text(ztr_t *z,
591 Read *r, int *nbytes, char **mdata,
592 int *mdbytes) {
593 char *bytes;
594 int len, alen;
595 int ident;
596 int i, j;
597
598 if (!r->info)
599 return NULL;
600
601 *mdata = NULL;
602 *mdbytes = 0;
603
604 /*
605 * traditional Read comments are a single char * of ident=value lines.
606 * The length of ident=valueXident=valueX (X = newline) if the same
607 * as ident0value0ident0value0 (0 = \0), although ztr has
608 * a double \0 as terminator.
609 */
610 len = strlen(r->info);
611
612 /* Allocate */
613 alen = len + 3;
614 bytes = xmalloc(alen);
615
616 /* Copy */
617 j = 0;
618 bytes[j++] = ZTR_FORM_RAW;
619 ident = 1;
620 for (i = 0; i < len; i++) {
621 switch (r->info[i]) {
622 case '=':
623 if (ident) {
624 ident = 0;
625 bytes[j++] = 0;
626 } else {
627 bytes[j++] = '=';
628 }
629 break;
630
631 case '\n':
632 if (ident) {
633 /* Invalid Read info, but we'll carry on anyway. */
634 if (j && bytes[j-1] != 0)
635 bytes[j++] = 0;
636 else
637 break;
638 }
639 bytes[j++] = 0;
640 ident = 1;
641 break;
642
643 default:
644 bytes[j++] = r->info[i];
645 }
646
647 if (j + 3 > alen) {
648 /* This can happen if we have Read idents without values */
649 alen += 100;
650 bytes = xrealloc(bytes, alen);
651 }
652 }
653 if (j && bytes[j-1] != 0)
654 bytes[j++] = 0; /* Must end in two nuls */
655 bytes[j++] = 0;
656 *nbytes = j;
657
658 return bytes;
659 }
660
661 static void ztr_decode_text(Read *r, ztr_t *ztr) {
662 int i;
663 int nbytes = 0;
664 char *iptr;
665
666 /* Find length */
667 for (i = 0; i < ztr->ntext_segments; i++) {
668 if (ztr->text_segments[i].ident)
669 nbytes += strlen(ztr->text_segments[i].ident);
670 if (ztr->text_segments[i].value)
671 nbytes += strlen(ztr->text_segments[i].value);
672 nbytes += 2;
673 }
674
675 /* Allocate */
676 if (r->info) xfree(r->info);
677 r->info = (char *)xmalloc(nbytes+1);
678
679 /* Convert */
680 iptr = r->info;
681 for (i = 0; i < ztr->ntext_segments; i++) {
682 if (ztr->text_segments[i].ident && ztr->text_segments[i].value) {
683 int added = sprintf(iptr, "%s=%s\n",
684 ztr->text_segments[i].ident,
685 ztr->text_segments[i].value);
686 iptr += added;
687 }
688 }
689 *iptr = 0;
690 }
691
692 /* Encode the clip points */
693 static char *ztr_encode_clips(ztr_t *z,
694 Read *r, int *nbytes, char **mdata,
695 int *mdbytes) {
696 char *bytes;
697
698 if (!r->NBases)
699 return NULL;
700
701 if (r->leftCutoff == 0 && r->rightCutoff > r->NBases)
702 return NULL;
703
704 *mdata = NULL;
705 *mdbytes = 0;
706
707 /* Allocate */
708 *nbytes = 9;
709 bytes = xmalloc(9);
710
711 /* Store */
712 bytes[1] = (r->leftCutoff >> 24) & 0xff;
713 bytes[2] = (r->leftCutoff >> 16) & 0xff;
714 bytes[3] = (r->leftCutoff >> 8) & 0xff;
715 bytes[4] = (r->leftCutoff >> 0) & 0xff;
716 bytes[5] = (r->rightCutoff >> 24) & 0xff;
717 bytes[6] = (r->rightCutoff >> 16) & 0xff;
718 bytes[7] = (r->rightCutoff >> 8) & 0xff;
719 bytes[8] = (r->rightCutoff >> 0) & 0xff;
720
721 bytes[0] = ZTR_FORM_RAW;
722 return bytes;
723 }
724
725 /* ARGSUSED */
726 static void ztr_decode_clips(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
727 char *bytes = chunk->data;
728
729 r->leftCutoff =
730 (((unsigned char)bytes[1]) << 24) +
731 (((unsigned char)bytes[2]) << 16) +
732 (((unsigned char)bytes[3]) << 8) +
733 (((unsigned char)bytes[4]) << 0);
734
735 r->rightCutoff =
736 (((unsigned char)bytes[5]) << 24) +
737 (((unsigned char)bytes[6]) << 16) +
738 (((unsigned char)bytes[7]) << 8) +
739 (((unsigned char)bytes[8]) << 0);
740 }
741
742 /* ARGSUSED */
743 static char *ztr_encode_flow_order(ztr_t *z,
744 Read *r, int *nbytes, char **mdata,
745 int *mdbytes) {
746 char *bytes;
747
748 if (!r->flow_order || !r->nflows)
749 return NULL;
750
751 bytes = (char *)xmalloc(r->nflows+1);
752 *nbytes = r->nflows+1;
753 bytes[0] = ZTR_FORM_RAW;
754
755 memcpy(bytes+1, r->flow_order, r->nflows);
756
757 return bytes;
758 }
759
760 static void ztr_decode_flow_order(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
761 char *bytes = chunk->data;
762 int nbytes = chunk->dlength;
763
764 nbytes--;
765 bytes++;
766
767 r->nflows = nbytes;
768 r->flow_order = (char *)xmalloc(r->nflows+1);
769 memcpy(r->flow_order, bytes, r->nflows);
770 r->flow_order[r->nflows] = 0;
771 }
772
773 static char *ztr_encode_flow_proc(ztr_t *z,
774 Read *r, int *nbytes, char **mdata,
775 int *mdbytes) {
776 char *bytes;
777 int i, j;
778 float *data;
779
780 if (!r->flow_order || !r->nflows)
781 return NULL;
782
783 data = r->flow;
784
785 /* Meta-data */
786 if (z->header.version_major > 1 ||
787 z->header.version_minor >= 2) {
788 /* 1.2 onwards */
789 *mdata = (char *)malloc(10);
790 *mdbytes = 10;
791 sprintf(*mdata, "TYPE%cPYNO", 0);
792 } else {
793 *mdata = (char *)malloc(4);
794 *mdbytes = 4;
795 (*mdata)[0] = 'P';
796 (*mdata)[1] = 'Y';
797 (*mdata)[2] = 'N';
798 (*mdata)[3] = 'O';
799 }
800
801 /* floats themselves, scaled */
802 bytes = (char *)xmalloc(r->nflows*2+2);
803 *nbytes = r->nflows*2+2;
804 bytes[0] = ZTR_FORM_RAW;
805 bytes[1] = 0;
806
807 for (i = 0, j = 2; i < r->nflows; i++) {
808 signed int t = data[i] * 100 + 0.49999;
809 bytes[j++] = (t >> 8) & 0xff;
810 bytes[j++] = (t >> 0) & 0xff;
811 }
812
813 return bytes;
814 }
815
816 /* ARGSUSED */
817 static void ztr_decode_flow_proc(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
818 int i, j;
819 unsigned char *bytes = (unsigned char *)chunk->data;
820 int dlen = chunk->dlength;
821
822 bytes+=2;
823 dlen-=2;
824
825 /* Store in the Read structure */
826 r->nflows = dlen/2;
827 r->flow = (float *)xcalloc(r->nflows, sizeof(float));
828 for (i = j = 0; i < r->nflows; i++, j+=2) {
829 float sample = ((bytes[j] << 8) | bytes[j+1]) / 100.0;
830 r->flow[i] = sample;
831 }
832 }
833
834 static char *ztr_encode_flow_raw(ztr_t *z,
835 Read *r, int *nbytes, char **mdata,
836 int *mdbytes) {
837 char *bytes;
838 int i, j;
839 unsigned int *data;
840
841 if (!r->flow_raw || !r->nflows)
842 return NULL;
843
844 data = r->flow_raw;
845
846 /* Meta-data */
847 if (z->header.version_major > 1 ||
848 z->header.version_minor >= 2) {
849 /* 1.2 onwards */
850 *mdata = (char *)malloc(10);
851 *mdbytes = 10;
852 sprintf(*mdata, "TYPE%cPYRW", 0);
853 } else {
854 *mdata = (char *)malloc(4);
855 *mdbytes = 4;
856 (*mdata)[0] = 'P';
857 (*mdata)[1] = 'Y';
858 (*mdata)[2] = 'R';
859 (*mdata)[3] = 'W';
860 }
861
862 /* floats themselves, scaled */
863 bytes = (char *)xmalloc(r->nflows*2+2);
864 *nbytes = r->nflows*2+2;
865 bytes[0] = ZTR_FORM_RAW;
866 bytes[1] = 0;
867
868 for (i = 0, j = 2; i < r->nflows; i++) {
869 int t = data[i];
870 bytes[j++] = (t >> 8) & 0xff;
871 bytes[j++] = (t >> 0) & 0xff;
872 }
873
874 return bytes;
875 }
876
877 /* ARGSUSED */
878 static void ztr_decode_flow_raw(ztr_t *z, ztr_chunk_t *chunk, Read *r) {
879 int i, j;
880 unsigned char *bytes = (unsigned char *)chunk->data;
881 int dlen = chunk->dlength;
882
883 bytes+=2;
884 dlen-=2;
885
886 /* Store in the Read structure */
887 r->nflows = dlen/2;
888 r->flow_raw = (unsigned int *)xcalloc(r->nflows, sizeof(*r->flow_raw));
889 for (i = j = 0; i < r->nflows; i++, j+=2) {
890 unsigned int sample = (bytes[j] << 8) | bytes[j+1];
891 r->flow_raw[i] = sample;
892 }
893 }
894
895 /*
896 * read2ztr
897 *
898 * Converts an io_lib "Read" structure to a ztr_t structure.
899 *
900 * Arguments:
901 * r A pointer to the "Read" structure to convert from
902 *
903 * Returns:
904 * Success: A pointer to the ztr_t struct.
905 * Failure: NULL
906 */
907 ztr_t *read2ztr(Read *r) {
908 ztr_t *ztr;
909 int i, j, nbytes, mdbytes;
910 char *bytes;
911 char *mdata;
912
913 #define DO_SMP4
914 int chunk_type[] = {
915 #ifdef DO_SMP4
916 ZTR_TYPE_SMP4,
917 #else
918 ZTR_TYPE_SAMP,
919 ZTR_TYPE_SAMP,
920 ZTR_TYPE_SAMP,
921 ZTR_TYPE_SAMP,
922 #endif
923 ZTR_TYPE_BASE,
924 ZTR_TYPE_BPOS,
925 ZTR_TYPE_CNF4,
926 ZTR_TYPE_TEXT,
927 ZTR_TYPE_CLIP,
928
929 ZTR_TYPE_FLWO,
930 ZTR_TYPE_SAMP,
931 ZTR_TYPE_SAMP,
932 };
933
934 char *(*chunk_func[])(ztr_t *z, Read *r, int *nbytes, char **mdata, int *mdbytes) = {
935 #ifdef DO_SMP4
936 ztr_encode_samples_4,
937 #else
938 ztr_encode_samples_A,
939 ztr_encode_samples_C,
940 ztr_encode_samples_G,
941 ztr_encode_samples_T,
942 #endif
943 ztr_encode_bases,
944 ztr_encode_positions,
945 ztr_encode_confidence_4,
946 ztr_encode_text,
947 ztr_encode_clips,
948
949 ztr_encode_flow_order,
950 ztr_encode_flow_proc,
951 ztr_encode_flow_raw,
952 };
953
954 if (NULL == (ztr = new_ztr()))
955 return NULL;
956
957 /* Create a header record */
958 memcpy(ztr->header.magic, ZTR_MAGIC, 8);
959 ztr->header.version_major = ZTR_VERSION_MAJOR;
960 ztr->header.version_minor = ZTR_VERSION_MINOR;
961
962 /* Alloc chunks (max number) */
963 ztr->nchunks = sizeof(chunk_type)/sizeof(*chunk_type);
964 ztr->chunk = (ztr_chunk_t *)xmalloc(ztr->nchunks *
965 sizeof(ztr_chunk_t));
966 if (NULL == ztr->chunk)
967 return NULL;
968
969 /* Create the chunks */
970 for (j = i = 0; i < ztr->nchunks; i++) {
971 /* char str[5]; */
972
973 bytes = chunk_func[i](ztr, r, &nbytes, &mdata, &mdbytes);
974 if (!bytes)
975 continue;
976
977 /*
978 fprintf(stderr, "block %.4s length %d\n",
979 ZTR_BE2STR(chunk_type[i], str), nbytes);
980 */
981 ztr->chunk[j].type = chunk_type[i];
982 ztr->chunk[j].mdlength = mdbytes;
983 ztr->chunk[j].mdata = mdata;
984 ztr->chunk[j].dlength = nbytes;
985 ztr->chunk[j].data = bytes;
986 ztr->chunk[j].ztr_owns = 1;
987
988 j++;
989 }
990 ztr->nchunks = j;
991
992 /*
993 * Experiments show that typically a double delta does
994 * better than a single delta for 8-bit data, and the other
995 * way around for 16-bit data
996 */
997 ztr->delta_level = r->maxTraceVal < 256 ? 2 : 3;
998
999 return ztr;
1000 }
1001
1002 /*
1003 * ztr2read
1004 *
1005 * Converts an ztr_t structure to an io_lib "Read" structure.
1006 *
1007 * Arguments:
1008 * ztr A pointer to the ztr structure to convert from
1009 *
1010 * Returns:
1011 * Success: A pointer to the Read struct.
1012 * Failure: NULL
1013 */
1014 Read *ztr2read(ztr_t *ztr) {
1015 Read *r;
1016 int i;
1017 int done_conf = 0, done_pos = 0;
1018 int sections = read_sections(0);
1019
1020 /* Allocate */
1021 r = read_allocate(0, 0);
1022
1023 if (NULLRead == r)
1024 return NULLRead;
1025
1026 /* Proces text chunks - makes conversion easier */
1027 if (sections & READ_COMMENTS) {
1028 ztr_process_text(ztr);
1029 ztr_decode_text(r, ztr);
1030 }
1031
1032 /* Iterate around each known chunk type turning into the Read elements */
1033 for (i = 0; i < ztr->nchunks; i++) {
1034 switch (ztr->chunk[i].type) {
1035 case ZTR_TYPE_SMP4:
1036 if (sections & READ_SAMPLES) {
1037 char *offs = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "OFFS");
1038 char *type = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE");
1039 if (!type || 0 == strcmp(type, "PROC")) {
1040 //if (type && 0 == strcmp(type, "SLXI")) {
1041 uncompress_chunk(ztr, &ztr->chunk[i]);
1042 ztr_decode_samples_4(ztr, &ztr->chunk[i], r);
1043
1044 if (offs)
1045 r->baseline = atoi(offs);
1046 }
1047 }
1048 break;
1049
1050 case ZTR_TYPE_SAMP:
1051 if (sections & READ_SAMPLES) {
1052 char *type = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE");
1053 char *offs = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "OFFS");
1054 uncompress_chunk(ztr, &ztr->chunk[i]);
1055 if (type && 0 == strcmp(type, "PYRW"))
1056 ztr_decode_flow_raw(ztr, &ztr->chunk[i], r);
1057 else if (type && 0 == strcmp(type, "PYNO"))
1058 ztr_decode_flow_proc(ztr, &ztr->chunk[i], r);
1059 else if (type &&
1060 (0 == strcmp(type, "A") ||
1061 0 == strcmp(type, "C") ||
1062 0 == strcmp(type, "G") ||
1063 0 == strcmp(type, "T"))) {
1064 ztr_decode_samples(ztr, &ztr->chunk[i], r);
1065
1066 if (offs)
1067 r->baseline = atoi(offs);
1068 }
1069 }
1070 break;
1071
1072 case ZTR_TYPE_BASE:
1073 if (sections & READ_BASES) {
1074 uncompress_chunk(ztr, &ztr->chunk[i]);
1075 ztr_decode_bases(ztr, &ztr->chunk[i], r);
1076 }
1077 break;
1078
1079 case ZTR_TYPE_BPOS:
1080 if (sections & READ_BASES) {
1081 uncompress_chunk(ztr, &ztr->chunk[i]);
1082 ztr_decode_positions(ztr, &ztr->chunk[i], r);
1083 done_pos++;
1084 }
1085 break;
1086
1087 case ZTR_TYPE_CNF4:
1088 if (sections & READ_BASES) {
1089 uncompress_chunk(ztr, &ztr->chunk[i]);
1090 ztr_decode_confidence_4(ztr, &ztr->chunk[i], r);
1091 done_conf++;
1092 }
1093 break;
1094
1095 case ZTR_TYPE_CNF1:
1096 if (sections & READ_BASES) {
1097 uncompress_chunk(ztr, &ztr->chunk[i]);
1098 ztr_decode_confidence_1(ztr, &ztr->chunk[i], r);
1099 done_conf++;
1100 }
1101 break;
1102
1103 case ZTR_TYPE_TEXT:
1104 /* Skip - already did this; see ztr_process_text */
1105 break;
1106
1107 case ZTR_TYPE_CLIP:
1108 if (sections & READ_BASES) {
1109 uncompress_chunk(ztr, &ztr->chunk[i]);
1110 ztr_decode_clips(ztr, &ztr->chunk[i], r);
1111 }
1112 break;
1113
1114 case ZTR_TYPE_FLWO:
1115 if (sections & READ_SAMPLES) {
1116 uncompress_chunk(ztr, &ztr->chunk[i]);
1117 ztr_decode_flow_order(ztr, &ztr->chunk[i], r);
1118 }
1119 break;
1120 }
1121 }
1122
1123 /* Handle the case when we have no confidence values */
1124 if (!done_conf && r->NBases > 0) {
1125 r->prob_A = (char *)xrealloc(r->prob_A, r->NBases);
1126 r->prob_C = (char *)xrealloc(r->prob_C, r->NBases);
1127 r->prob_G = (char *)xrealloc(r->prob_G, r->NBases);
1128 r->prob_T = (char *)xrealloc(r->prob_T, r->NBases);
1129 memset(r->prob_A, 0, r->NBases);
1130 memset(r->prob_C, 0, r->NBases);
1131 memset(r->prob_G, 0, r->NBases);
1132 memset(r->prob_T, 0, r->NBases);
1133 }
1134
1135 /* Handle the case when we have no BPOS chunk */
1136 if (!done_pos && r->NBases > 0) {
1137 r->basePos = (uint_2 *)xrealloc(r->basePos, r->NBases * 2);
1138 for (i = 0; i < r->NBases; i++)
1139 r->basePos[i] = i;
1140 }
1141
1142 r->format = TT_ZTR;
1143
1144 return r;
1145 }