Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/io_lib/ztr_translate.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 #include <stdio.h> | |
2 #include <string.h> | |
3 | |
4 #include "io_lib/ztr.h" | |
5 #include "io_lib/xalloc.h" | |
6 #include "io_lib/Read.h" | |
7 | |
8 /* Return the A,C,G,T samples */ | |
9 static char *ztr_encode_samples_4(ztr_t *z, | |
10 Read *r, int *nbytes, char **mdata, | |
11 int *mdbytes) { | |
12 char *bytes; | |
13 int i, j, t; | |
14 | |
15 if (!r->NPoints) | |
16 return NULL; | |
17 | |
18 if ((z->header.version_major > 1 || | |
19 z->header.version_minor >= 2) && r->baseline) { | |
20 /* 1.2 onwards */ | |
21 char buf[256]; | |
22 int blen; | |
23 blen = sprintf(buf, "%d", r->baseline); | |
24 *mdata = (char *)malloc(6+blen); | |
25 *mdbytes = sprintf(*mdata, "OFFS%c%s", 0, buf) + 1; | |
26 } else { | |
27 *mdata = NULL; | |
28 *mdbytes = 0; | |
29 } | |
30 | |
31 bytes = (char *)xmalloc(r->NPoints * sizeof(TRACE)*4 + 2); | |
32 for (i = 0, j = 2; i < r->NPoints; i++) { | |
33 t = r->traceA[i]; | |
34 bytes[j++] = (t >> 8) & 0xff; | |
35 bytes[j++] = (t >> 0) & 0xff; | |
36 } | |
37 for (i = 0; i < r->NPoints; i++) { | |
38 t = r->traceC[i]; | |
39 bytes[j++] = (t >> 8) & 0xff; | |
40 bytes[j++] = (t >> 0) & 0xff; | |
41 } | |
42 for (i = 0; i < r->NPoints; i++) { | |
43 t = r->traceG[i]; | |
44 bytes[j++] = (t >> 8) & 0xff; | |
45 bytes[j++] = (t >> 0) & 0xff; | |
46 } | |
47 for (i = 0; i < r->NPoints; i++) { | |
48 t = r->traceT[i]; | |
49 bytes[j++] = (t >> 8) & 0xff; | |
50 bytes[j++] = (t >> 0) & 0xff; | |
51 } | |
52 *nbytes = 4 * r->NPoints * sizeof(TRACE) + 2; | |
53 | |
54 bytes[0] = ZTR_FORM_RAW; | |
55 bytes[1] = 0; | |
56 return bytes; | |
57 } | |
58 | |
59 static void ztr_decode_samples_4(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
60 int i, j; | |
61 int maxTraceVal = 0; | |
62 TRACE sample; | |
63 unsigned char *bytes = (unsigned char *)chunk->data; | |
64 int nbytes = chunk->dlength; | |
65 | |
66 bytes+=2; | |
67 nbytes-=2; | |
68 | |
69 /* Store in the Read structure */ | |
70 r->NPoints = nbytes/8; | |
71 if (r->traceA) xfree(r->traceA); | |
72 if (r->traceC) xfree(r->traceC); | |
73 if (r->traceG) xfree(r->traceG); | |
74 if (r->traceT) xfree(r->traceT); | |
75 r->traceA = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE)); | |
76 r->traceC = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE)); | |
77 r->traceG = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE)); | |
78 r->traceT = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE)); | |
79 | |
80 for (i = j = 0; i < r->NPoints; i++, j+=2) { | |
81 sample = (bytes[j] << 8) | bytes[j+1]; | |
82 r->traceA[i] = sample; | |
83 if (maxTraceVal < sample) | |
84 maxTraceVal = sample; | |
85 } | |
86 for (i = 0; i < r->NPoints; i++, j+=2) { | |
87 sample = (bytes[j] << 8) | bytes[j+1]; | |
88 r->traceC[i] = sample; | |
89 if (maxTraceVal < sample) | |
90 maxTraceVal = sample; | |
91 } | |
92 for (i = 0; i < r->NPoints; i++, j+=2) { | |
93 sample = (bytes[j] << 8) | bytes[j+1]; | |
94 r->traceG[i] = sample; | |
95 if (maxTraceVal < sample) | |
96 maxTraceVal = sample; | |
97 } | |
98 for (i = 0; i < r->NPoints; i++, j+=2) { | |
99 sample = (bytes[j] << 8) | bytes[j+1]; | |
100 r->traceT[i] = sample; | |
101 if (maxTraceVal < sample) | |
102 maxTraceVal = sample; | |
103 } | |
104 | |
105 r->maxTraceVal = maxTraceVal; | |
106 } | |
107 | |
108 /* Return the [A,C,G,T] samples */ | |
109 static char *ztr_encode_samples_common(ztr_t *z, | |
110 char ident[4], Read *r, | |
111 TRACE *data, int *nbytes, | |
112 char **mdata, int *mdbytes) { | |
113 char *bytes; | |
114 int i, j, t; | |
115 | |
116 if (!r->NPoints) | |
117 return NULL; | |
118 | |
119 if (z->header.version_major > 1 || | |
120 z->header.version_minor >= 2) { | |
121 /* 1.2 onwards */ | |
122 char buf[256]; | |
123 int blen; | |
124 if (r->baseline) { | |
125 blen = sprintf(buf, "%d", r->baseline); | |
126 *mdata = (char *)malloc(16+blen); | |
127 *mdbytes = sprintf(*mdata, "TYPE%c%.*s%cOFFS%c%s", | |
128 0, 4, ident, 0, | |
129 0, buf) + 1; | |
130 } else { | |
131 *mdata = (char *)malloc(10); | |
132 *mdbytes = sprintf(*mdata, "TYPE%c%.*s", 0, 4, ident) + 1; | |
133 } | |
134 } else { | |
135 *mdata = (char *)malloc(4); | |
136 *mdbytes = 4; | |
137 (*mdata)[0] = ident[0]; | |
138 (*mdata)[1] = ident[1]; | |
139 (*mdata)[2] = ident[2]; | |
140 (*mdata)[3] = ident[3]; | |
141 } | |
142 | |
143 bytes = (char *)xmalloc(r->NPoints * sizeof(TRACE) + 2); | |
144 for (i = 0, j = 2; i < r->NPoints; i++) { | |
145 t = data[i]; | |
146 bytes[j++] = (t >> 8) & 0xff; | |
147 bytes[j++] = (t >> 0) & 0xff; | |
148 } | |
149 *nbytes = r->NPoints * sizeof(TRACE) + 2; | |
150 | |
151 bytes[0] = ZTR_FORM_RAW; | |
152 bytes[1] = 0; | |
153 return bytes; | |
154 } | |
155 | |
156 | |
157 static char *ztr_encode_samples_A(ztr_t *z, | |
158 Read *r, int *nbytes, char **mdata, | |
159 int *mdbytes) { | |
160 return ztr_encode_samples_common(z, "A\0\0", r, r->traceA, | |
161 nbytes, mdata, mdbytes); | |
162 } | |
163 | |
164 static char *ztr_encode_samples_C(ztr_t *z, | |
165 Read *r, int *nbytes, char **mdata, | |
166 int *mdbytes) { | |
167 return ztr_encode_samples_common(z, "C\0\0", r, r->traceC, | |
168 nbytes, mdata, mdbytes); | |
169 } | |
170 | |
171 static char *ztr_encode_samples_G(ztr_t *z, | |
172 Read *r, int *nbytes, char **mdata, | |
173 int *mdbytes) { | |
174 return ztr_encode_samples_common(z, "G\0\0", r, r->traceG, | |
175 nbytes, mdata, mdbytes); | |
176 } | |
177 | |
178 static char *ztr_encode_samples_T(ztr_t *z, | |
179 Read *r, int *nbytes, char **mdata, | |
180 int *mdbytes) { | |
181 return ztr_encode_samples_common(z, "T\0\0", r, r->traceT, | |
182 nbytes, mdata, mdbytes); | |
183 } | |
184 | |
185 /* ARGSUSED */ | |
186 static void ztr_decode_samples(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
187 int i, j; | |
188 int maxTraceVal = 0; | |
189 TRACE sample; | |
190 unsigned char *bytes = (unsigned char *)chunk->data; | |
191 int dlen = chunk->dlength; | |
192 TRACE **lane, *lanex; | |
193 char *type = ztr_lookup_mdata_value(z, chunk, "TYPE"); | |
194 | |
195 if (!type) | |
196 return; | |
197 | |
198 switch(type[0]) { | |
199 case 'A': | |
200 lane = &r->traceA; | |
201 break; | |
202 case 'C': | |
203 lane = &r->traceC; | |
204 break; | |
205 case 'G': | |
206 lane = &r->traceG; | |
207 break; | |
208 case 'T': | |
209 lane = &r->traceT; | |
210 break; | |
211 default: | |
212 return; | |
213 } | |
214 | |
215 bytes+=2; | |
216 dlen-=2; | |
217 | |
218 /* Store in the Read structure */ | |
219 r->NPoints = dlen/2; | |
220 if (*lane) | |
221 xfree(*lane); | |
222 lanex = *lane = (TRACE *)xmalloc(r->NPoints * sizeof(TRACE)); | |
223 | |
224 for (i = j = 0; i < r->NPoints; i++, j+=2) { | |
225 sample = (bytes[j] << 8) | bytes[j+1]; | |
226 lanex[i] = sample; | |
227 if (maxTraceVal < sample) | |
228 maxTraceVal = sample; | |
229 } | |
230 | |
231 if (r->maxTraceVal < maxTraceVal) | |
232 r->maxTraceVal = maxTraceVal; | |
233 } | |
234 | |
235 /* Encode the the base calls */ | |
236 static char *ztr_encode_bases(ztr_t *z, | |
237 Read *r, int *nbytes, char **mdata, | |
238 int *mdbytes) { | |
239 char *bytes; | |
240 | |
241 if (!r->NBases) | |
242 return NULL; | |
243 | |
244 *mdata = NULL; | |
245 *mdbytes = 0; | |
246 | |
247 bytes = (char *)xmalloc(r->NBases + 1); | |
248 memcpy(bytes+1, r->base, r->NBases); | |
249 *nbytes = r->NBases+1; | |
250 | |
251 bytes[0] = ZTR_FORM_RAW; | |
252 return bytes; | |
253 } | |
254 | |
255 static void ztr_decode_bases(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
256 char *bytes = chunk->data; | |
257 int nbytes = chunk->dlength; | |
258 | |
259 nbytes--; | |
260 bytes++; | |
261 | |
262 r->NBases = nbytes; | |
263 if (r->base) xfree(r->base); | |
264 r->base = (char *)xmalloc(r->NBases+1); | |
265 memcpy(r->base, bytes, r->NBases); | |
266 r->base[r->NBases] = 0; | |
267 | |
268 /* Incase there isn't a clip chunk */ | |
269 r->leftCutoff = 0; | |
270 r->rightCutoff = r->NBases+1; | |
271 } | |
272 | |
273 /* Encode the base positions as 4 byte values */ | |
274 static char *ztr_encode_positions(ztr_t *z, | |
275 Read *r, int *nbytes, char **mdata, | |
276 int *mdbytes) { | |
277 char *bytes; | |
278 int i, j; | |
279 | |
280 if ((!r->NPoints && !r->nflows) || !r->basePos || !r->NBases) | |
281 return NULL; | |
282 | |
283 *mdata = NULL; | |
284 *mdbytes = 0; | |
285 | |
286 bytes = (char *)xmalloc(r->NBases * 4 + 4); | |
287 for (j = 4, i = 0; i < r->NBases; i++) { | |
288 /* | |
289 * First 2 bytes are zero as currently r->basePos is 16-bit. | |
290 * | |
291 * bytes[j++] = (r->basePos[i] >> 24) & 0xff; | |
292 * bytes[j++] = (r->basePos[i] >> 16) & 0xff; | |
293 */ | |
294 bytes[j++] = 0; | |
295 bytes[j++] = 0; | |
296 bytes[j++] = (r->basePos[i] >> 8) & 0xff; | |
297 bytes[j++] = (r->basePos[i] >> 0) & 0xff; | |
298 } | |
299 | |
300 bytes[0] = ZTR_FORM_RAW; | |
301 bytes[1] = 0; /* Dummy */ | |
302 bytes[2] = 0; /* Dummy */ | |
303 bytes[3] = 0; /* Dummy */ | |
304 | |
305 *nbytes = j; | |
306 return (char *)bytes; | |
307 } | |
308 | |
309 static void ztr_decode_positions(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
310 int i, j; | |
311 unsigned char *bytes = (unsigned char *)chunk->data; | |
312 int nbytes = chunk->dlength; | |
313 | |
314 bytes+=4; | |
315 nbytes-=4; | |
316 | |
317 r->NBases = nbytes/4; | |
318 if (r->basePos) xfree(r->basePos); | |
319 r->basePos = (uint_2 *)xmalloc(r->NBases * sizeof(*r->basePos)); | |
320 | |
321 for (i = j = 0; j < nbytes; i++, j += 4) { | |
322 r->basePos[i] = | |
323 (bytes[j+0] << 24) + | |
324 (bytes[j+1] << 16) + | |
325 (bytes[j+2] << 8) + | |
326 (bytes[j+3] << 0); | |
327 } | |
328 } | |
329 | |
330 /* Encode the main base confidence (called base) */ | |
331 static char *ztr_encode_confidence_1(ztr_t *z, | |
332 Read *r, int *nbytes, char **mdata, | |
333 int *mdbytes) | |
334 { | |
335 char *bytes; | |
336 int i; | |
337 | |
338 /* Check that we have any confidence values first */ | |
339 if (!r->prob_A || !r->prob_C || !r->prob_G || !r->prob_T) | |
340 return NULL; | |
341 | |
342 *mdata = NULL; | |
343 *mdbytes = 0; | |
344 | |
345 /* Check that they're not all zero - will "normally" be quick */ | |
346 for (i = 0; i < r->NBases; i++) { | |
347 if (r->prob_A[i]) break; | |
348 if (r->prob_C[i]) break; | |
349 if (r->prob_G[i]) break; | |
350 if (r->prob_T[i]) break; | |
351 } | |
352 if (i == r->NBases) | |
353 return NULL; | |
354 | |
355 /* Memory allocation */ | |
356 if (NULL == (bytes = xmalloc(r->NBases * sizeof(*bytes) + 1))) | |
357 return NULL; | |
358 | |
359 /* | |
360 * Encode probs for called bases. | |
361 * Unknown base => average of prob_A, prob_C, prob_G and prob_T. | |
362 */ | |
363 bytes++; | |
364 for (i = 0; i < r->NBases; i++) { | |
365 switch (r->base[i]) { | |
366 case 'A': | |
367 case 'a': | |
368 bytes[i] = r->prob_A[i]; | |
369 break; | |
370 case 'C': | |
371 case 'c': | |
372 bytes[i] = r->prob_C[i]; | |
373 break; | |
374 case 'G': | |
375 case 'g': | |
376 bytes[i] = r->prob_G[i]; | |
377 break; | |
378 case 'T': | |
379 case 't': | |
380 bytes[i] = r->prob_T[i]; | |
381 break; | |
382 default: | |
383 bytes[i] = (r->prob_A[i] + r->prob_C[i] + | |
384 r->prob_G[i] + r->prob_T[i]) / 4; | |
385 break; | |
386 } | |
387 } | |
388 bytes--; | |
389 | |
390 *nbytes = r->NBases + 1; | |
391 | |
392 bytes[0] = ZTR_FORM_RAW; | |
393 return bytes; | |
394 } | |
395 | |
396 static int ztr_decode_confidence_1(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
397 char *bytes = chunk->data; | |
398 int nbytes = chunk->dlength; | |
399 int i; | |
400 | |
401 bytes++; | |
402 nbytes--; | |
403 | |
404 /* Unpack confidence values; depends on base calls */ | |
405 if (!r->base) | |
406 return -1; | |
407 | |
408 if (r->prob_A) xfree(r->prob_A); | |
409 if (r->prob_C) xfree(r->prob_C); | |
410 if (r->prob_G) xfree(r->prob_G); | |
411 if (r->prob_T) xfree(r->prob_T); | |
412 r->prob_A = (char *)xmalloc(r->NBases * sizeof(*r->prob_A)); | |
413 r->prob_C = (char *)xmalloc(r->NBases * sizeof(*r->prob_C)); | |
414 r->prob_G = (char *)xmalloc(r->NBases * sizeof(*r->prob_G)); | |
415 r->prob_T = (char *)xmalloc(r->NBases * sizeof(*r->prob_T)); | |
416 | |
417 for (i = 0; i < r->NBases; i++) { | |
418 switch (r->base[i]) { | |
419 case 'A': | |
420 case 'a': | |
421 r->prob_A[i] = bytes[i]; | |
422 r->prob_C[i] = 0; | |
423 r->prob_G[i] = 0; | |
424 r->prob_T[i] = 0; | |
425 break; | |
426 case 'C': | |
427 case 'c': | |
428 r->prob_A[i] = 0; | |
429 r->prob_C[i] = bytes[i]; | |
430 r->prob_G[i] = 0; | |
431 r->prob_T[i] = 0; | |
432 break; | |
433 case 'G': | |
434 case 'g': | |
435 r->prob_A[i] = 0; | |
436 r->prob_C[i] = 0; | |
437 r->prob_G[i] = bytes[i]; | |
438 r->prob_T[i] = 0; | |
439 break; | |
440 case 'T': | |
441 case 't': | |
442 r->prob_A[i] = 0; | |
443 r->prob_C[i] = 0; | |
444 r->prob_G[i] = 0; | |
445 r->prob_T[i] = bytes[i]; | |
446 break; | |
447 default: | |
448 r->prob_A[i] = bytes[i]; | |
449 r->prob_C[i] = bytes[i]; | |
450 r->prob_G[i] = bytes[i]; | |
451 r->prob_T[i] = bytes[i]; | |
452 } | |
453 } | |
454 | |
455 return 0; | |
456 } | |
457 | |
458 /* Encode the four main base confidences */ | |
459 static char *ztr_encode_confidence_4(ztr_t *z, | |
460 Read *r, int *nbytes, char **mdata, | |
461 int *mdbytes) | |
462 { | |
463 char *bytes; | |
464 int i, j; | |
465 | |
466 /* Check that we have any confidence values first */ | |
467 if (!r->prob_A || !r->prob_C || !r->prob_G || !r->prob_T) | |
468 return NULL; | |
469 | |
470 *mdata = NULL; | |
471 *mdbytes = 0; | |
472 | |
473 /* Check that they're not all zero - will "normally" be quick */ | |
474 for (i = 0; i < r->NBases; i++) { | |
475 if (r->prob_A[i]) break; | |
476 if (r->prob_C[i]) break; | |
477 if (r->prob_G[i]) break; | |
478 if (r->prob_T[i]) break; | |
479 } | |
480 if (i == r->NBases) | |
481 return NULL; | |
482 | |
483 /* Memory allocation */ | |
484 if (NULL == (bytes = xmalloc(4 * r->NBases * sizeof(*bytes) + 1))) | |
485 return NULL; | |
486 | |
487 /* | |
488 * Encode probs for called bases first | |
489 * Unknown base = 'T'. | |
490 */ | |
491 j = r->NBases; | |
492 bytes++; | |
493 for (i = 0; i < r->NBases; i++) { | |
494 switch (r->base[i]) { | |
495 case 'A': | |
496 case 'a': | |
497 bytes[i ] = r->prob_A[i]; | |
498 bytes[j++] = r->prob_C[i]; | |
499 bytes[j++] = r->prob_G[i]; | |
500 bytes[j++] = r->prob_T[i]; | |
501 break; | |
502 case 'C': | |
503 case 'c': | |
504 bytes[j++] = r->prob_A[i]; | |
505 bytes[i ] = r->prob_C[i]; | |
506 bytes[j++] = r->prob_G[i]; | |
507 bytes[j++] = r->prob_T[i]; | |
508 break; | |
509 case 'G': | |
510 case 'g': | |
511 bytes[j++] = r->prob_A[i]; | |
512 bytes[j++] = r->prob_C[i]; | |
513 bytes[i ] = r->prob_G[i]; | |
514 bytes[j++] = r->prob_T[i]; | |
515 break; | |
516 default: | |
517 bytes[j++] = r->prob_A[i]; | |
518 bytes[j++] = r->prob_C[i]; | |
519 bytes[j++] = r->prob_G[i]; | |
520 bytes[i ] = r->prob_T[i]; | |
521 break; | |
522 } | |
523 } | |
524 bytes--; | |
525 | |
526 *nbytes = r->NBases * 4 + 1; | |
527 | |
528 bytes[0] = ZTR_FORM_RAW; | |
529 return bytes; | |
530 } | |
531 | |
532 static int ztr_decode_confidence_4(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
533 char *bytes = chunk->data; | |
534 int nbytes = chunk->dlength; | |
535 int i, j; | |
536 | |
537 bytes++; | |
538 nbytes--; | |
539 | |
540 /* Unpack confidence values; depends on base calls */ | |
541 if (!r->base) | |
542 return -1; | |
543 | |
544 if (r->prob_A) xfree(r->prob_A); | |
545 if (r->prob_C) xfree(r->prob_C); | |
546 if (r->prob_G) xfree(r->prob_G); | |
547 if (r->prob_T) xfree(r->prob_T); | |
548 r->prob_A = (char *)xmalloc(r->NBases * sizeof(*r->prob_A)); | |
549 r->prob_C = (char *)xmalloc(r->NBases * sizeof(*r->prob_C)); | |
550 r->prob_G = (char *)xmalloc(r->NBases * sizeof(*r->prob_G)); | |
551 r->prob_T = (char *)xmalloc(r->NBases * sizeof(*r->prob_T)); | |
552 | |
553 j = r->NBases; | |
554 for (i = 0; i < r->NBases; i++) { | |
555 switch (r->base[i]) { | |
556 case 'A': | |
557 case 'a': | |
558 r->prob_A[i] = bytes[i]; | |
559 r->prob_C[i] = bytes[j++]; | |
560 r->prob_G[i] = bytes[j++]; | |
561 r->prob_T[i] = bytes[j++]; | |
562 break; | |
563 case 'C': | |
564 case 'c': | |
565 r->prob_A[i] = bytes[j++]; | |
566 r->prob_C[i] = bytes[i]; | |
567 r->prob_G[i] = bytes[j++]; | |
568 r->prob_T[i] = bytes[j++]; | |
569 break; | |
570 case 'G': | |
571 case 'g': | |
572 r->prob_A[i] = bytes[j++]; | |
573 r->prob_C[i] = bytes[j++]; | |
574 r->prob_G[i] = bytes[i]; | |
575 r->prob_T[i] = bytes[j++]; | |
576 break; | |
577 default: | |
578 r->prob_A[i] = bytes[j++]; | |
579 r->prob_C[i] = bytes[j++]; | |
580 r->prob_G[i] = bytes[j++]; | |
581 r->prob_T[i] = bytes[i]; | |
582 break; | |
583 } | |
584 } | |
585 | |
586 return 0; | |
587 } | |
588 | |
589 /* Encode the textual comments */ | |
590 static char *ztr_encode_text(ztr_t *z, | |
591 Read *r, int *nbytes, char **mdata, | |
592 int *mdbytes) { | |
593 char *bytes; | |
594 int len, alen; | |
595 int ident; | |
596 int i, j; | |
597 | |
598 if (!r->info) | |
599 return NULL; | |
600 | |
601 *mdata = NULL; | |
602 *mdbytes = 0; | |
603 | |
604 /* | |
605 * traditional Read comments are a single char * of ident=value lines. | |
606 * The length of ident=valueXident=valueX (X = newline) if the same | |
607 * as ident0value0ident0value0 (0 = \0), although ztr has | |
608 * a double \0 as terminator. | |
609 */ | |
610 len = strlen(r->info); | |
611 | |
612 /* Allocate */ | |
613 alen = len + 3; | |
614 bytes = xmalloc(alen); | |
615 | |
616 /* Copy */ | |
617 j = 0; | |
618 bytes[j++] = ZTR_FORM_RAW; | |
619 ident = 1; | |
620 for (i = 0; i < len; i++) { | |
621 switch (r->info[i]) { | |
622 case '=': | |
623 if (ident) { | |
624 ident = 0; | |
625 bytes[j++] = 0; | |
626 } else { | |
627 bytes[j++] = '='; | |
628 } | |
629 break; | |
630 | |
631 case '\n': | |
632 if (ident) { | |
633 /* Invalid Read info, but we'll carry on anyway. */ | |
634 if (j && bytes[j-1] != 0) | |
635 bytes[j++] = 0; | |
636 else | |
637 break; | |
638 } | |
639 bytes[j++] = 0; | |
640 ident = 1; | |
641 break; | |
642 | |
643 default: | |
644 bytes[j++] = r->info[i]; | |
645 } | |
646 | |
647 if (j + 3 > alen) { | |
648 /* This can happen if we have Read idents without values */ | |
649 alen += 100; | |
650 bytes = xrealloc(bytes, alen); | |
651 } | |
652 } | |
653 if (j && bytes[j-1] != 0) | |
654 bytes[j++] = 0; /* Must end in two nuls */ | |
655 bytes[j++] = 0; | |
656 *nbytes = j; | |
657 | |
658 return bytes; | |
659 } | |
660 | |
661 static void ztr_decode_text(Read *r, ztr_t *ztr) { | |
662 int i; | |
663 int nbytes = 0; | |
664 char *iptr; | |
665 | |
666 /* Find length */ | |
667 for (i = 0; i < ztr->ntext_segments; i++) { | |
668 if (ztr->text_segments[i].ident) | |
669 nbytes += strlen(ztr->text_segments[i].ident); | |
670 if (ztr->text_segments[i].value) | |
671 nbytes += strlen(ztr->text_segments[i].value); | |
672 nbytes += 2; | |
673 } | |
674 | |
675 /* Allocate */ | |
676 if (r->info) xfree(r->info); | |
677 r->info = (char *)xmalloc(nbytes+1); | |
678 | |
679 /* Convert */ | |
680 iptr = r->info; | |
681 for (i = 0; i < ztr->ntext_segments; i++) { | |
682 if (ztr->text_segments[i].ident && ztr->text_segments[i].value) { | |
683 int added = sprintf(iptr, "%s=%s\n", | |
684 ztr->text_segments[i].ident, | |
685 ztr->text_segments[i].value); | |
686 iptr += added; | |
687 } | |
688 } | |
689 *iptr = 0; | |
690 } | |
691 | |
692 /* Encode the clip points */ | |
693 static char *ztr_encode_clips(ztr_t *z, | |
694 Read *r, int *nbytes, char **mdata, | |
695 int *mdbytes) { | |
696 char *bytes; | |
697 | |
698 if (!r->NBases) | |
699 return NULL; | |
700 | |
701 if (r->leftCutoff == 0 && r->rightCutoff > r->NBases) | |
702 return NULL; | |
703 | |
704 *mdata = NULL; | |
705 *mdbytes = 0; | |
706 | |
707 /* Allocate */ | |
708 *nbytes = 9; | |
709 bytes = xmalloc(9); | |
710 | |
711 /* Store */ | |
712 bytes[1] = (r->leftCutoff >> 24) & 0xff; | |
713 bytes[2] = (r->leftCutoff >> 16) & 0xff; | |
714 bytes[3] = (r->leftCutoff >> 8) & 0xff; | |
715 bytes[4] = (r->leftCutoff >> 0) & 0xff; | |
716 bytes[5] = (r->rightCutoff >> 24) & 0xff; | |
717 bytes[6] = (r->rightCutoff >> 16) & 0xff; | |
718 bytes[7] = (r->rightCutoff >> 8) & 0xff; | |
719 bytes[8] = (r->rightCutoff >> 0) & 0xff; | |
720 | |
721 bytes[0] = ZTR_FORM_RAW; | |
722 return bytes; | |
723 } | |
724 | |
725 /* ARGSUSED */ | |
726 static void ztr_decode_clips(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
727 char *bytes = chunk->data; | |
728 | |
729 r->leftCutoff = | |
730 (((unsigned char)bytes[1]) << 24) + | |
731 (((unsigned char)bytes[2]) << 16) + | |
732 (((unsigned char)bytes[3]) << 8) + | |
733 (((unsigned char)bytes[4]) << 0); | |
734 | |
735 r->rightCutoff = | |
736 (((unsigned char)bytes[5]) << 24) + | |
737 (((unsigned char)bytes[6]) << 16) + | |
738 (((unsigned char)bytes[7]) << 8) + | |
739 (((unsigned char)bytes[8]) << 0); | |
740 } | |
741 | |
742 /* ARGSUSED */ | |
743 static char *ztr_encode_flow_order(ztr_t *z, | |
744 Read *r, int *nbytes, char **mdata, | |
745 int *mdbytes) { | |
746 char *bytes; | |
747 | |
748 if (!r->flow_order || !r->nflows) | |
749 return NULL; | |
750 | |
751 bytes = (char *)xmalloc(r->nflows+1); | |
752 *nbytes = r->nflows+1; | |
753 bytes[0] = ZTR_FORM_RAW; | |
754 | |
755 memcpy(bytes+1, r->flow_order, r->nflows); | |
756 | |
757 return bytes; | |
758 } | |
759 | |
760 static void ztr_decode_flow_order(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
761 char *bytes = chunk->data; | |
762 int nbytes = chunk->dlength; | |
763 | |
764 nbytes--; | |
765 bytes++; | |
766 | |
767 r->nflows = nbytes; | |
768 r->flow_order = (char *)xmalloc(r->nflows+1); | |
769 memcpy(r->flow_order, bytes, r->nflows); | |
770 r->flow_order[r->nflows] = 0; | |
771 } | |
772 | |
773 static char *ztr_encode_flow_proc(ztr_t *z, | |
774 Read *r, int *nbytes, char **mdata, | |
775 int *mdbytes) { | |
776 char *bytes; | |
777 int i, j; | |
778 float *data; | |
779 | |
780 if (!r->flow_order || !r->nflows) | |
781 return NULL; | |
782 | |
783 data = r->flow; | |
784 | |
785 /* Meta-data */ | |
786 if (z->header.version_major > 1 || | |
787 z->header.version_minor >= 2) { | |
788 /* 1.2 onwards */ | |
789 *mdata = (char *)malloc(10); | |
790 *mdbytes = 10; | |
791 sprintf(*mdata, "TYPE%cPYNO", 0); | |
792 } else { | |
793 *mdata = (char *)malloc(4); | |
794 *mdbytes = 4; | |
795 (*mdata)[0] = 'P'; | |
796 (*mdata)[1] = 'Y'; | |
797 (*mdata)[2] = 'N'; | |
798 (*mdata)[3] = 'O'; | |
799 } | |
800 | |
801 /* floats themselves, scaled */ | |
802 bytes = (char *)xmalloc(r->nflows*2+2); | |
803 *nbytes = r->nflows*2+2; | |
804 bytes[0] = ZTR_FORM_RAW; | |
805 bytes[1] = 0; | |
806 | |
807 for (i = 0, j = 2; i < r->nflows; i++) { | |
808 signed int t = data[i] * 100 + 0.49999; | |
809 bytes[j++] = (t >> 8) & 0xff; | |
810 bytes[j++] = (t >> 0) & 0xff; | |
811 } | |
812 | |
813 return bytes; | |
814 } | |
815 | |
816 /* ARGSUSED */ | |
817 static void ztr_decode_flow_proc(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
818 int i, j; | |
819 unsigned char *bytes = (unsigned char *)chunk->data; | |
820 int dlen = chunk->dlength; | |
821 | |
822 bytes+=2; | |
823 dlen-=2; | |
824 | |
825 /* Store in the Read structure */ | |
826 r->nflows = dlen/2; | |
827 r->flow = (float *)xcalloc(r->nflows, sizeof(float)); | |
828 for (i = j = 0; i < r->nflows; i++, j+=2) { | |
829 float sample = ((bytes[j] << 8) | bytes[j+1]) / 100.0; | |
830 r->flow[i] = sample; | |
831 } | |
832 } | |
833 | |
834 static char *ztr_encode_flow_raw(ztr_t *z, | |
835 Read *r, int *nbytes, char **mdata, | |
836 int *mdbytes) { | |
837 char *bytes; | |
838 int i, j; | |
839 unsigned int *data; | |
840 | |
841 if (!r->flow_raw || !r->nflows) | |
842 return NULL; | |
843 | |
844 data = r->flow_raw; | |
845 | |
846 /* Meta-data */ | |
847 if (z->header.version_major > 1 || | |
848 z->header.version_minor >= 2) { | |
849 /* 1.2 onwards */ | |
850 *mdata = (char *)malloc(10); | |
851 *mdbytes = 10; | |
852 sprintf(*mdata, "TYPE%cPYRW", 0); | |
853 } else { | |
854 *mdata = (char *)malloc(4); | |
855 *mdbytes = 4; | |
856 (*mdata)[0] = 'P'; | |
857 (*mdata)[1] = 'Y'; | |
858 (*mdata)[2] = 'R'; | |
859 (*mdata)[3] = 'W'; | |
860 } | |
861 | |
862 /* floats themselves, scaled */ | |
863 bytes = (char *)xmalloc(r->nflows*2+2); | |
864 *nbytes = r->nflows*2+2; | |
865 bytes[0] = ZTR_FORM_RAW; | |
866 bytes[1] = 0; | |
867 | |
868 for (i = 0, j = 2; i < r->nflows; i++) { | |
869 int t = data[i]; | |
870 bytes[j++] = (t >> 8) & 0xff; | |
871 bytes[j++] = (t >> 0) & 0xff; | |
872 } | |
873 | |
874 return bytes; | |
875 } | |
876 | |
877 /* ARGSUSED */ | |
878 static void ztr_decode_flow_raw(ztr_t *z, ztr_chunk_t *chunk, Read *r) { | |
879 int i, j; | |
880 unsigned char *bytes = (unsigned char *)chunk->data; | |
881 int dlen = chunk->dlength; | |
882 | |
883 bytes+=2; | |
884 dlen-=2; | |
885 | |
886 /* Store in the Read structure */ | |
887 r->nflows = dlen/2; | |
888 r->flow_raw = (unsigned int *)xcalloc(r->nflows, sizeof(*r->flow_raw)); | |
889 for (i = j = 0; i < r->nflows; i++, j+=2) { | |
890 unsigned int sample = (bytes[j] << 8) | bytes[j+1]; | |
891 r->flow_raw[i] = sample; | |
892 } | |
893 } | |
894 | |
895 /* | |
896 * read2ztr | |
897 * | |
898 * Converts an io_lib "Read" structure to a ztr_t structure. | |
899 * | |
900 * Arguments: | |
901 * r A pointer to the "Read" structure to convert from | |
902 * | |
903 * Returns: | |
904 * Success: A pointer to the ztr_t struct. | |
905 * Failure: NULL | |
906 */ | |
907 ztr_t *read2ztr(Read *r) { | |
908 ztr_t *ztr; | |
909 int i, j, nbytes, mdbytes; | |
910 char *bytes; | |
911 char *mdata; | |
912 | |
913 #define DO_SMP4 | |
914 int chunk_type[] = { | |
915 #ifdef DO_SMP4 | |
916 ZTR_TYPE_SMP4, | |
917 #else | |
918 ZTR_TYPE_SAMP, | |
919 ZTR_TYPE_SAMP, | |
920 ZTR_TYPE_SAMP, | |
921 ZTR_TYPE_SAMP, | |
922 #endif | |
923 ZTR_TYPE_BASE, | |
924 ZTR_TYPE_BPOS, | |
925 ZTR_TYPE_CNF4, | |
926 ZTR_TYPE_TEXT, | |
927 ZTR_TYPE_CLIP, | |
928 | |
929 ZTR_TYPE_FLWO, | |
930 ZTR_TYPE_SAMP, | |
931 ZTR_TYPE_SAMP, | |
932 }; | |
933 | |
934 char *(*chunk_func[])(ztr_t *z, Read *r, int *nbytes, char **mdata, int *mdbytes) = { | |
935 #ifdef DO_SMP4 | |
936 ztr_encode_samples_4, | |
937 #else | |
938 ztr_encode_samples_A, | |
939 ztr_encode_samples_C, | |
940 ztr_encode_samples_G, | |
941 ztr_encode_samples_T, | |
942 #endif | |
943 ztr_encode_bases, | |
944 ztr_encode_positions, | |
945 ztr_encode_confidence_4, | |
946 ztr_encode_text, | |
947 ztr_encode_clips, | |
948 | |
949 ztr_encode_flow_order, | |
950 ztr_encode_flow_proc, | |
951 ztr_encode_flow_raw, | |
952 }; | |
953 | |
954 if (NULL == (ztr = new_ztr())) | |
955 return NULL; | |
956 | |
957 /* Create a header record */ | |
958 memcpy(ztr->header.magic, ZTR_MAGIC, 8); | |
959 ztr->header.version_major = ZTR_VERSION_MAJOR; | |
960 ztr->header.version_minor = ZTR_VERSION_MINOR; | |
961 | |
962 /* Alloc chunks (max number) */ | |
963 ztr->nchunks = sizeof(chunk_type)/sizeof(*chunk_type); | |
964 ztr->chunk = (ztr_chunk_t *)xmalloc(ztr->nchunks * | |
965 sizeof(ztr_chunk_t)); | |
966 if (NULL == ztr->chunk) | |
967 return NULL; | |
968 | |
969 /* Create the chunks */ | |
970 for (j = i = 0; i < ztr->nchunks; i++) { | |
971 /* char str[5]; */ | |
972 | |
973 bytes = chunk_func[i](ztr, r, &nbytes, &mdata, &mdbytes); | |
974 if (!bytes) | |
975 continue; | |
976 | |
977 /* | |
978 fprintf(stderr, "block %.4s length %d\n", | |
979 ZTR_BE2STR(chunk_type[i], str), nbytes); | |
980 */ | |
981 ztr->chunk[j].type = chunk_type[i]; | |
982 ztr->chunk[j].mdlength = mdbytes; | |
983 ztr->chunk[j].mdata = mdata; | |
984 ztr->chunk[j].dlength = nbytes; | |
985 ztr->chunk[j].data = bytes; | |
986 ztr->chunk[j].ztr_owns = 1; | |
987 | |
988 j++; | |
989 } | |
990 ztr->nchunks = j; | |
991 | |
992 /* | |
993 * Experiments show that typically a double delta does | |
994 * better than a single delta for 8-bit data, and the other | |
995 * way around for 16-bit data | |
996 */ | |
997 ztr->delta_level = r->maxTraceVal < 256 ? 2 : 3; | |
998 | |
999 return ztr; | |
1000 } | |
1001 | |
1002 /* | |
1003 * ztr2read | |
1004 * | |
1005 * Converts an ztr_t structure to an io_lib "Read" structure. | |
1006 * | |
1007 * Arguments: | |
1008 * ztr A pointer to the ztr structure to convert from | |
1009 * | |
1010 * Returns: | |
1011 * Success: A pointer to the Read struct. | |
1012 * Failure: NULL | |
1013 */ | |
1014 Read *ztr2read(ztr_t *ztr) { | |
1015 Read *r; | |
1016 int i; | |
1017 int done_conf = 0, done_pos = 0; | |
1018 int sections = read_sections(0); | |
1019 | |
1020 /* Allocate */ | |
1021 r = read_allocate(0, 0); | |
1022 | |
1023 if (NULLRead == r) | |
1024 return NULLRead; | |
1025 | |
1026 /* Proces text chunks - makes conversion easier */ | |
1027 if (sections & READ_COMMENTS) { | |
1028 ztr_process_text(ztr); | |
1029 ztr_decode_text(r, ztr); | |
1030 } | |
1031 | |
1032 /* Iterate around each known chunk type turning into the Read elements */ | |
1033 for (i = 0; i < ztr->nchunks; i++) { | |
1034 switch (ztr->chunk[i].type) { | |
1035 case ZTR_TYPE_SMP4: | |
1036 if (sections & READ_SAMPLES) { | |
1037 char *offs = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "OFFS"); | |
1038 char *type = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE"); | |
1039 if (!type || 0 == strcmp(type, "PROC")) { | |
1040 //if (type && 0 == strcmp(type, "SLXI")) { | |
1041 uncompress_chunk(ztr, &ztr->chunk[i]); | |
1042 ztr_decode_samples_4(ztr, &ztr->chunk[i], r); | |
1043 | |
1044 if (offs) | |
1045 r->baseline = atoi(offs); | |
1046 } | |
1047 } | |
1048 break; | |
1049 | |
1050 case ZTR_TYPE_SAMP: | |
1051 if (sections & READ_SAMPLES) { | |
1052 char *type = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE"); | |
1053 char *offs = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "OFFS"); | |
1054 uncompress_chunk(ztr, &ztr->chunk[i]); | |
1055 if (type && 0 == strcmp(type, "PYRW")) | |
1056 ztr_decode_flow_raw(ztr, &ztr->chunk[i], r); | |
1057 else if (type && 0 == strcmp(type, "PYNO")) | |
1058 ztr_decode_flow_proc(ztr, &ztr->chunk[i], r); | |
1059 else if (type && | |
1060 (0 == strcmp(type, "A") || | |
1061 0 == strcmp(type, "C") || | |
1062 0 == strcmp(type, "G") || | |
1063 0 == strcmp(type, "T"))) { | |
1064 ztr_decode_samples(ztr, &ztr->chunk[i], r); | |
1065 | |
1066 if (offs) | |
1067 r->baseline = atoi(offs); | |
1068 } | |
1069 } | |
1070 break; | |
1071 | |
1072 case ZTR_TYPE_BASE: | |
1073 if (sections & READ_BASES) { | |
1074 uncompress_chunk(ztr, &ztr->chunk[i]); | |
1075 ztr_decode_bases(ztr, &ztr->chunk[i], r); | |
1076 } | |
1077 break; | |
1078 | |
1079 case ZTR_TYPE_BPOS: | |
1080 if (sections & READ_BASES) { | |
1081 uncompress_chunk(ztr, &ztr->chunk[i]); | |
1082 ztr_decode_positions(ztr, &ztr->chunk[i], r); | |
1083 done_pos++; | |
1084 } | |
1085 break; | |
1086 | |
1087 case ZTR_TYPE_CNF4: | |
1088 if (sections & READ_BASES) { | |
1089 uncompress_chunk(ztr, &ztr->chunk[i]); | |
1090 ztr_decode_confidence_4(ztr, &ztr->chunk[i], r); | |
1091 done_conf++; | |
1092 } | |
1093 break; | |
1094 | |
1095 case ZTR_TYPE_CNF1: | |
1096 if (sections & READ_BASES) { | |
1097 uncompress_chunk(ztr, &ztr->chunk[i]); | |
1098 ztr_decode_confidence_1(ztr, &ztr->chunk[i], r); | |
1099 done_conf++; | |
1100 } | |
1101 break; | |
1102 | |
1103 case ZTR_TYPE_TEXT: | |
1104 /* Skip - already did this; see ztr_process_text */ | |
1105 break; | |
1106 | |
1107 case ZTR_TYPE_CLIP: | |
1108 if (sections & READ_BASES) { | |
1109 uncompress_chunk(ztr, &ztr->chunk[i]); | |
1110 ztr_decode_clips(ztr, &ztr->chunk[i], r); | |
1111 } | |
1112 break; | |
1113 | |
1114 case ZTR_TYPE_FLWO: | |
1115 if (sections & READ_SAMPLES) { | |
1116 uncompress_chunk(ztr, &ztr->chunk[i]); | |
1117 ztr_decode_flow_order(ztr, &ztr->chunk[i], r); | |
1118 } | |
1119 break; | |
1120 } | |
1121 } | |
1122 | |
1123 /* Handle the case when we have no confidence values */ | |
1124 if (!done_conf && r->NBases > 0) { | |
1125 r->prob_A = (char *)xrealloc(r->prob_A, r->NBases); | |
1126 r->prob_C = (char *)xrealloc(r->prob_C, r->NBases); | |
1127 r->prob_G = (char *)xrealloc(r->prob_G, r->NBases); | |
1128 r->prob_T = (char *)xrealloc(r->prob_T, r->NBases); | |
1129 memset(r->prob_A, 0, r->NBases); | |
1130 memset(r->prob_C, 0, r->NBases); | |
1131 memset(r->prob_G, 0, r->NBases); | |
1132 memset(r->prob_T, 0, r->NBases); | |
1133 } | |
1134 | |
1135 /* Handle the case when we have no BPOS chunk */ | |
1136 if (!done_pos && r->NBases > 0) { | |
1137 r->basePos = (uint_2 *)xrealloc(r->basePos, r->NBases * 2); | |
1138 for (i = 0; i < r->NBases; i++) | |
1139 r->basePos[i] = i; | |
1140 } | |
1141 | |
1142 r->format = TT_ZTR; | |
1143 | |
1144 return r; | |
1145 } |