Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/io_lib/compression.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 #include <stdio.h> | |
2 #include <unistd.h> | |
3 #include <stdlib.h> | |
4 #include <string.h> | |
5 #include <zlib.h> | |
6 #include <assert.h> | |
7 #include <math.h> | |
8 #include <ctype.h> | |
9 | |
10 #ifndef M_PI | |
11 # define M_PI 3.14159265358979323846 | |
12 #endif | |
13 | |
14 #include "io_lib/ztr.h" | |
15 #include "io_lib/os.h" | |
16 #include "io_lib/compression.h" | |
17 #include "io_lib/xalloc.h" | |
18 | |
19 #ifndef NDEBUG | |
20 # define NDEBUG | |
21 #endif | |
22 | |
23 /* | |
24 * --------------------------------------------------------------------------- | |
25 * ZTR_FORM_ZLIB | |
26 * --------------------------------------------------------------------------- | |
27 */ | |
28 | |
29 /* | |
30 * Some comments on zlib usage. | |
31 * | |
32 * - Ideally for trace data, after decorrelation, we should use Z_FILTERED. | |
33 * Empirical studies show that this gives the best compression ratio, but | |
34 * it is slow (about the same speed as normal gzip). MUCH faster is huffman | |
35 * only, and it doesn't give radically different compression ratios. | |
36 * | |
37 * - When compressing using Z_HUFFMAN_ONLY we used compression level | |
38 * '1' as this invokes the deflate_fast() algorithm. It makes no | |
39 * difference to the compression level, but it seems to be quicker still. | |
40 * | |
41 */ | |
42 | |
43 /* | |
44 * zlib_huff() | |
45 * | |
46 * Compresses data using huffman encoding, as implemented by zlib. | |
47 * | |
48 * Arguments: | |
49 * uncomp Uncompressed input data | |
50 * uncomp_len Length of uncomp data | |
51 * comp_len Output: length of compressed data | |
52 * | |
53 * Returns: | |
54 * Compressed data if successful | |
55 * NULL if not successful | |
56 */ | |
57 char *zlib_huff(char *uncomp, int uncomp_len, int strategy, int *comp_len) { | |
58 z_stream zstr; | |
59 int err; | |
60 int comp_len_tmp = (int)(uncomp_len * 1.001 + 12); /* Maximum expansion size */ | |
61 char *comp = (char *)xmalloc(comp_len_tmp+5); | |
62 int c_len; | |
63 | |
64 /* Initialise zlib */ | |
65 zstr.zalloc = (alloc_func)0; | |
66 zstr.zfree = (free_func)0; | |
67 zstr.opaque = (voidpf)0; | |
68 | |
69 if ((err = deflateInit2(&zstr, 1, Z_DEFLATED, 15, 8, strategy)) != Z_OK) { | |
70 fprintf(stderr, "zlib errror in deflateInit2(): %d\n", err); | |
71 return NULL; | |
72 } | |
73 | |
74 /* Set up input and output buffers */ | |
75 zstr.next_in = (unsigned char *)uncomp; | |
76 zstr.avail_in = uncomp_len; | |
77 zstr.next_out = (unsigned char *)comp+5; | |
78 zstr.avail_out = comp_len_tmp; | |
79 | |
80 /* Do the compression */ | |
81 if ((err = deflate(&zstr, Z_FINISH)) != Z_STREAM_END) { | |
82 fprintf(stderr, "zlib errror in deflate(): %d\n", err); | |
83 return NULL; | |
84 } | |
85 | |
86 /* Tidy up */ | |
87 deflateEnd(&zstr); | |
88 | |
89 c_len = zstr.total_out; | |
90 | |
91 /* Return */ | |
92 comp[0] = ZTR_FORM_ZLIB; | |
93 comp[1] = (uncomp_len >> 0) & 0xff; | |
94 comp[2] = (uncomp_len >> 8) & 0xff; | |
95 comp[3] = (uncomp_len >> 16) & 0xff; | |
96 comp[4] = (uncomp_len >> 24) & 0xff; | |
97 | |
98 if (comp_len) | |
99 *comp_len = c_len+5; | |
100 | |
101 return comp; | |
102 } | |
103 | |
104 /* | |
105 * zlib_dehuff() | |
106 * | |
107 * Uncompresses data using huffman encoding, as implemented by zlib. | |
108 * | |
109 * Arguments: | |
110 * comp Compressed input data | |
111 * comp_len Length of comp data | |
112 * uncomp_len Output: length of uncompressed data | |
113 * | |
114 * Returns: | |
115 * Uncompressed data if successful | |
116 * NULL if not successful | |
117 */ | |
118 char *zlib_dehuff(char *comp, int comp_len, int *uncomp_len) { | |
119 z_stream zstr; | |
120 int err; | |
121 char *uncomp; | |
122 int ulen; | |
123 | |
124 /* Allocate */ | |
125 ulen = | |
126 ((unsigned char)comp[1] << 0) + | |
127 ((unsigned char)comp[2] << 8) + | |
128 ((unsigned char)comp[3] << 16) + | |
129 ((unsigned char)comp[4] << 24); | |
130 uncomp = (char *)xmalloc(ulen); | |
131 | |
132 /* Initialise zlib */ | |
133 zstr.zalloc = (alloc_func)0; | |
134 zstr.zfree = (free_func)0; | |
135 zstr.opaque = (voidpf)0; | |
136 | |
137 if ((err = inflateInit(&zstr)) != Z_OK) { | |
138 fprintf(stderr, "zlib errror in inflateInit(): %d\n", err); | |
139 return NULL; | |
140 } | |
141 | |
142 /* Set up input and output buffers */ | |
143 zstr.next_in = (unsigned char *)comp+5; | |
144 zstr.avail_in = comp_len-5; | |
145 zstr.next_out = (unsigned char *)uncomp; | |
146 zstr.avail_out = ulen; | |
147 | |
148 /* Do the decompression */ | |
149 if ((err = inflate(&zstr, Z_FINISH)) != Z_STREAM_END) { | |
150 fprintf(stderr, "zlib errror in deflate(): %d\n", err); | |
151 return NULL; | |
152 } | |
153 | |
154 /* Tidy up */ | |
155 inflateEnd(&zstr); | |
156 | |
157 if (uncomp_len) | |
158 *uncomp_len = ulen; | |
159 | |
160 return uncomp; | |
161 } | |
162 | |
163 | |
164 /* | |
165 * --------------------------------------------------------------------------- | |
166 * ZTR_FORM_RLE | |
167 * --------------------------------------------------------------------------- | |
168 */ | |
169 | |
170 /* | |
171 * Run length encoding. | |
172 * | |
173 * Any run of 3 or more identical characters (up to 255 in a row) are replaced | |
174 * by a 'guard' byte followed by the number of characters followed by | |
175 * the character value itself. | |
176 * Any single guard value in the input is escaped using 'guard 0'. | |
177 * | |
178 * Specifying guard as -1 will automatically pick one of the least used | |
179 * characters in the input as the guard. | |
180 * | |
181 * Arguments: | |
182 * uncomp Input data | |
183 * uncomp_len Length of input data 'uncomp' | |
184 * guard Guard byte - used to encode "N" copies of data | |
185 * comp_len Output: length of compressed data | |
186 * | |
187 * Returns: | |
188 * Compressed data if successful | |
189 * NULL if not successful | |
190 */ | |
191 char *rle(char *uncomp, int uncomp_len, int guard, int *comp_len) { | |
192 int i, k, c_len = 0; | |
193 char *comp = xmalloc(2 * uncomp_len + 6); | |
194 char *out = comp + 6; | |
195 | |
196 /* A guard of -1 implies to search for the least frequent symbol */ | |
197 if (guard == -1) { | |
198 int cnt[256]; | |
199 int bestcnt = uncomp_len + 1; | |
200 | |
201 for (i = 0; i < 256; i++) | |
202 cnt[i] = 0; | |
203 | |
204 for (i = 0; i < uncomp_len; i++) { | |
205 cnt[(unsigned char)uncomp[i]]++; | |
206 } | |
207 | |
208 for (i = 0; i < 256; i++) { | |
209 if (cnt[i] < bestcnt) { | |
210 bestcnt = cnt[i]; | |
211 guard = i; | |
212 } | |
213 } | |
214 } | |
215 | |
216 for (i = 0; i < uncomp_len; i=k) { | |
217 /* | |
218 * Detect blocks of up identical bytes up to 255 bytes long. | |
219 */ | |
220 for (k = i; k < uncomp_len && uncomp[i] == uncomp[k]; k++) | |
221 if (k-i == 255) | |
222 break; | |
223 | |
224 /* 1, 2 or 3 bytes are best stored "as is" */ | |
225 if (k-i < 4) { | |
226 do { | |
227 /* | |
228 * If we find 'guard' in our sequence, escape it by | |
229 * outputting 'guard' <zero>. (We know that we'll never | |
230 * write out zero copies of a token in our rle compression | |
231 * algorithm.) | |
232 */ | |
233 if ((unsigned char)(uncomp[i]) == guard) { | |
234 out[c_len++] = guard; | |
235 out[c_len++] = 0; | |
236 } else { | |
237 out[c_len++] = uncomp[i]; | |
238 } | |
239 i++; | |
240 } while (k >= i+1); | |
241 } else { | |
242 /* More than 3 bytes: store as ('guard', length, byte value) */ | |
243 out[c_len++] = guard; | |
244 out[c_len++] = k-i; | |
245 out[c_len++] = uncomp[i]; | |
246 } | |
247 } | |
248 | |
249 /* Return */ | |
250 comp[0] = ZTR_FORM_RLE; | |
251 comp[1] = (uncomp_len >> 0) & 0xff; | |
252 comp[2] = (uncomp_len >> 8) & 0xff; | |
253 comp[3] = (uncomp_len >> 16) & 0xff; | |
254 comp[4] = (uncomp_len >> 24) & 0xff; | |
255 comp[5] = guard; | |
256 | |
257 if (comp_len) | |
258 *comp_len = c_len+6; | |
259 | |
260 return comp; | |
261 } | |
262 | |
263 /* | |
264 * Reverses run length encoding. | |
265 * | |
266 * Arguments: | |
267 * comp Compressed input data | |
268 * comp_len Length of comp data | |
269 * uncomp_len Output: length of uncompressed data | |
270 * | |
271 * Returns: | |
272 * Uncompressed data if successful | |
273 * NULL if not successful | |
274 */ | |
275 /* ARGSUSED */ | |
276 char *unrle(char *comp, int comp_len, int *uncomp_len) { | |
277 int in_i, out_i, i, val, count, out_len; | |
278 char *uncomp; | |
279 unsigned char *in = (unsigned char *)comp+6; | |
280 char *out; | |
281 int guard = (unsigned char)comp[5]; | |
282 | |
283 /* Allocate */ | |
284 out_len = | |
285 ((unsigned char)comp[1] << 0) + | |
286 ((unsigned char)comp[2] << 8) + | |
287 ((unsigned char)comp[3] << 16) + | |
288 ((unsigned char)comp[4] << 24); | |
289 out = uncomp = (char *)xmalloc(out_len); | |
290 | |
291 for (in_i = out_i = 0; out_i < out_len; in_i++) { | |
292 if (in[in_i] != guard) { | |
293 /* When not 'guard' it's easy - just output this token */ | |
294 assert(out_i >= 0 && out_i < out_len); | |
295 out[out_i++] = in[in_i]; | |
296 | |
297 } else { | |
298 /* | |
299 * Found an 'guard' token. If next token is zero, then | |
300 * we were simply escaping a real 'guard' token in the input | |
301 * data, otherwise output a string of bytes. | |
302 */ | |
303 count = in[++in_i]; | |
304 if (count != 0) { | |
305 val = in[++in_i]; | |
306 for (i = 0; i < count; i++) { | |
307 assert(out_i >= 0 && out_i < out_len); | |
308 out[out_i++] = val; | |
309 } | |
310 } else { | |
311 assert(out_i >= 0 && out_i < out_len); | |
312 out[out_i++] = guard; | |
313 } | |
314 } | |
315 } | |
316 | |
317 if (uncomp_len) | |
318 *uncomp_len = out_len; | |
319 | |
320 return uncomp; | |
321 } | |
322 | |
323 /* | |
324 * --------------------------------------------------------------------------- | |
325 * ZTR_FORM_XRLE | |
326 * --------------------------------------------------------------------------- | |
327 */ | |
328 | |
329 /* | |
330 * Mutli-byte run length encoding. | |
331 * | |
332 * Any run of 3 or more identical characters (up to 255 in a row) are replaced | |
333 * by a 'guard' byte followed by the number of characters followed by | |
334 * the character value itself. | |
335 * Any single guard value in the input is escaped using 'guard 0'. | |
336 * | |
337 * Specifying guard as -1 will automatically pick one of the least used | |
338 * characters in the input as the guard. | |
339 * | |
340 * Arguments: | |
341 * uncomp Input data | |
342 * uncomp_len Length of input data 'uncomp' | |
343 * guard Guard byte - used to encode "N" copies of data | |
344 * rsz Size of blocks to compare for run checking. | |
345 * comp_len Output: length of compressed data | |
346 * | |
347 * Returns: | |
348 * Compressed data if successful | |
349 * NULL if not successful | |
350 */ | |
351 char *xrle(char *uncomp, int uncomp_len, int guard, int rsz, int *comp_len) { | |
352 char *comp = (char *)malloc(2 * uncomp_len + 3); | |
353 char *out = comp; | |
354 int i, j, k; | |
355 | |
356 /* A guard of -1 implies to search for the least frequent symbol */ | |
357 if (guard == -1) { | |
358 int cnt[256]; | |
359 int bestcnt = uncomp_len + 1; | |
360 | |
361 for (i = 0; i < 256; i++) | |
362 cnt[i] = 0; | |
363 | |
364 for (i = 0; i < uncomp_len; i++) { | |
365 cnt[(unsigned char)uncomp[i]]++; | |
366 } | |
367 | |
368 for (i = 0; i < 256; i++) { | |
369 if (cnt[i] < bestcnt) { | |
370 bestcnt = cnt[i]; | |
371 guard = i; | |
372 } | |
373 } | |
374 } | |
375 | |
376 *out++ = ZTR_FORM_XRLE; | |
377 *out++ = rsz; | |
378 *out++ = guard; | |
379 | |
380 for (i = 0; i < uncomp_len; i = k) { | |
381 /* Count repeats */ | |
382 k = i + rsz; | |
383 while (k <= uncomp_len - rsz && !memcmp(&uncomp[i], &uncomp[k], rsz)) { | |
384 k += rsz; | |
385 if ((k-i)/rsz == 255) | |
386 break; | |
387 } | |
388 | |
389 if (k-i > rsz) { | |
390 /* Duplicates, so RLE */ | |
391 *out++ = guard; | |
392 *out++ = (k-i)/rsz; | |
393 for (j = 0; j < rsz; j++) | |
394 *out++ = uncomp[i+j]; | |
395 } else { | |
396 /* No dups, store as is escaping guarding as appropriate */ | |
397 if ((unsigned char)(uncomp[i]) == guard) { | |
398 *out++ = guard; | |
399 *out++ = 0; | |
400 } else { | |
401 *out++ = uncomp[i]; | |
402 } | |
403 k = i+1; | |
404 } | |
405 } | |
406 | |
407 *comp_len = out-comp; | |
408 | |
409 return comp; | |
410 } | |
411 | |
412 | |
413 /* | |
414 * Reverses multi-byte run length encoding. | |
415 * | |
416 * Arguments: | |
417 * comp Compressed input data | |
418 * comp_len Length of comp data | |
419 * uncomp_len Output: length of uncompressed data | |
420 * | |
421 * Returns: | |
422 * Uncompressed data if successful | |
423 * NULL if not successful | |
424 */ | |
425 char *unxrle(char *comp, int comp_len, int *uncomp_len) { | |
426 char *uncomp; | |
427 char *out; | |
428 int rsz = (unsigned char)comp[1]; | |
429 int guard = (unsigned char)comp[2]; | |
430 unsigned char *in; | |
431 int unclen = 0, cpos, len; | |
432 | |
433 /* Calculate uncompressed length */ | |
434 for (in = (unsigned char *)comp, cpos = 3; cpos < comp_len; unclen++) { | |
435 if (in[cpos++] == guard) { | |
436 if ((len = in[cpos++])) { | |
437 cpos += rsz; | |
438 unclen += len*rsz -1; | |
439 } | |
440 } | |
441 } | |
442 *uncomp_len = unclen; | |
443 | |
444 /* Expand */ | |
445 uncomp = out = (char *)malloc(unclen+1); | |
446 for (in = (unsigned char *)comp, cpos = 3; cpos < comp_len;) { | |
447 char c; | |
448 if ((c = in[cpos++]) != guard) { | |
449 *out++ = c; | |
450 } else { | |
451 int len = in[cpos++]; | |
452 if (len) { | |
453 int i, j; | |
454 for (i = 0; i < len; i++) { | |
455 for (j = 0; j < rsz; j++) { | |
456 *out++ = in[cpos+j]; | |
457 } | |
458 } | |
459 cpos += rsz; | |
460 } else { | |
461 *out++ = guard; | |
462 } | |
463 } | |
464 } | |
465 *out++ = 0; | |
466 | |
467 return uncomp; | |
468 } | |
469 | |
470 /* | |
471 * Mutli-byte run length encoding. | |
472 * | |
473 * Steps along in words of size 'rsz'. Unlike XRLE above this does run-length | |
474 * encoding by writing out an additional "length" word every time 2 or more | |
475 * words in a row are spotted. This removes the need for a guard byte. | |
476 * | |
477 * Additionally this method ensures that both input and output formats remain | |
478 * aligned on words of size 'rsz'. | |
479 * | |
480 * Arguments: | |
481 * uncomp Input data | |
482 * uncomp_len Length of input data 'uncomp' | |
483 * rsz Size of blocks to compare for run checking. | |
484 * comp_len Output: length of compressed data | |
485 * | |
486 * Returns: | |
487 * Compressed data if successful | |
488 * NULL if not successful | |
489 */ | |
490 char *xrle2(char *uncomp, int uncomp_len, int rsz, int *comp_len) { | |
491 char *comp = (char *)malloc(1.4*uncomp_len + rsz); | |
492 char *last = uncomp; | |
493 char *out = comp; | |
494 int i, j, k, run_len = 0; | |
495 | |
496 *out++ = ZTR_FORM_XRLE2; | |
497 *out++ = rsz; | |
498 for (i = 2; i < rsz; i++) | |
499 *out++ = -40; | |
500 | |
501 /* FIXME: how to deal with uncomp_len not being a multiple of rsz */ | |
502 for (i = 0; i < uncomp_len; i += rsz) { | |
503 /* FIXME: use inline #def versions of memcmp/memcpy for speed? */ | |
504 memcpy(out, &uncomp[i], rsz); | |
505 out += rsz; | |
506 | |
507 if (memcmp(last, &uncomp[i], rsz) == 0) { | |
508 run_len++; | |
509 } else { | |
510 run_len = 1; | |
511 last = &uncomp[i]; | |
512 } | |
513 | |
514 /* NB: >= 3 is more optimal in many cases */ | |
515 if (run_len >= 2) { | |
516 /* Count remaining copies */ | |
517 for (k = i+rsz; k < uncomp_len && run_len < 257; k += rsz) { | |
518 if (memcmp(last, &uncomp[k], rsz) != 0) | |
519 break; | |
520 run_len++; | |
521 } | |
522 run_len -= 2; | |
523 | |
524 *out++ = run_len; | |
525 for (j = 1; j < rsz; j++) { | |
526 /* Padding with last reduces entropy compared to padding | |
527 * with copies of run_len | |
528 */ | |
529 *out++ = last[j]; | |
530 } | |
531 i = k-rsz; | |
532 | |
533 last = out-rsz; | |
534 run_len = 0; | |
535 } | |
536 } | |
537 | |
538 *comp_len = out-comp; | |
539 | |
540 return comp; | |
541 } | |
542 | |
543 /* | |
544 * Reverses multi-byte run length encoding (xrle_new). | |
545 * | |
546 * Arguments: | |
547 * comp Compressed input data | |
548 * comp_len Length of comp data | |
549 * uncomp_len Output: length of uncompressed data | |
550 * | |
551 * Returns: | |
552 * Uncompressed data if successful | |
553 * NULL if not successful | |
554 */ | |
555 char *unxrle2(char *comp, int comp_len, int *uncomp_len) { | |
556 char *out, *last; | |
557 int out_len, out_alloc, rsz, i, j, run_len; | |
558 | |
559 out_alloc = comp_len*2; /* just an estimate */ | |
560 out_len = 0; | |
561 if (NULL == (out = (char *)malloc(out_alloc))) | |
562 return NULL; | |
563 | |
564 if (*comp++ != ZTR_FORM_XRLE2) | |
565 return NULL; | |
566 | |
567 /* Read rsz and swallow padding */ | |
568 rsz = *comp++; | |
569 comp_len -= 2; | |
570 for (i = 2; i < rsz; i++) { | |
571 comp++; | |
572 comp_len--; | |
573 } | |
574 | |
575 /* Uncompress */ | |
576 run_len = 0; | |
577 last = comp; | |
578 for (i = 0; i < comp_len;) { | |
579 while (out_len + rsz > out_alloc) { | |
580 out_alloc *= 2; | |
581 if (NULL == (out = (char *)realloc(out, out_alloc))) | |
582 return NULL; | |
583 } | |
584 memcpy(&out[out_len], &comp[i], rsz); | |
585 | |
586 if (memcmp(&out[out_len], last, rsz) == 0) { | |
587 run_len++; | |
588 } else { | |
589 run_len = 1; | |
590 } | |
591 | |
592 i += rsz; | |
593 out_len += rsz; | |
594 | |
595 /* NB: >= 3 is more optimal in many cases */ | |
596 if (run_len >= 2) { | |
597 /* Count remaining copies */ | |
598 run_len = (unsigned char)comp[i]; | |
599 i += rsz; | |
600 | |
601 while (out_len + run_len * rsz > out_alloc) { | |
602 out_alloc *= 2; | |
603 if (NULL == (out = (char *)realloc(out, out_alloc))) | |
604 return NULL; | |
605 } | |
606 | |
607 for (j = 0; j < run_len; j++) { | |
608 memcpy(&out[out_len], last, rsz); | |
609 out_len += rsz; | |
610 } | |
611 run_len = 0; | |
612 } | |
613 | |
614 last = &comp[i-rsz]; | |
615 } | |
616 | |
617 /* Shrink back down to avoid excessive memory usage */ | |
618 out = realloc(out, out_len); | |
619 *uncomp_len = out_len; | |
620 | |
621 return out; | |
622 } | |
623 | |
624 | |
625 /* | |
626 * --------------------------------------------------------------------------- | |
627 * ZTR_FORM_DELTA1 | |
628 * --------------------------------------------------------------------------- | |
629 */ | |
630 | |
631 /* | |
632 * This replaces 'samples' with successive differences between samples. | |
633 * These implementations support 'level's of 1, 2 and 3. | |
634 * | |
635 * NB: This is analogous to our SCF delta_samples1 (etc) function, except that | |
636 * this function about 40% faster. | |
637 * | |
638 * Implementation details taken from Jean Thierry-Mieg's CTF code. | |
639 */ | |
640 | |
641 /* | |
642 * decorrelate1() | |
643 * | |
644 * Produce successive deltas from a 1-byte array. | |
645 * | |
646 * Arguments: | |
647 * uncomp Uncompressed data | |
648 * uncomp_len Length of uncompressed data | |
649 * level Differencing level (must be 1, 2 or 3) | |
650 * comp_len Return: where to store new compressed length | |
651 * | |
652 * Returns: | |
653 * Success: A decorrelated buffer (malloced) | |
654 * Failure: NULL | |
655 */ | |
656 char *decorrelate1(char *x_uncomp, | |
657 int uncomp_len, | |
658 int level, | |
659 int *comp_len) { | |
660 int i, z; | |
661 int u1 = 0, u2 = 0, u3 = 0; | |
662 char *comp = (char *)xmalloc(uncomp_len + 2); | |
663 unsigned char *u_uncomp = (unsigned char *)x_uncomp; | |
664 | |
665 if (!comp) | |
666 return NULL; | |
667 | |
668 comp+=2; | |
669 switch (level) { | |
670 case 1: | |
671 for (i = 0; i < uncomp_len; i++) { | |
672 z = u1; | |
673 u1 = u_uncomp[i]; | |
674 comp[i] = u_uncomp[i] - z; | |
675 } | |
676 break; | |
677 | |
678 case 2: | |
679 for (i = 0; i < uncomp_len; i++) { | |
680 z = 2*u1 - u2; | |
681 u2 = u1; | |
682 u1 = u_uncomp[i]; | |
683 comp[i] = u_uncomp[i] - z; | |
684 } | |
685 break; | |
686 | |
687 case 3: | |
688 for (i = 0; i < uncomp_len; i++) { | |
689 z = 3*u1 - 3*u2 + u3; | |
690 u3 = u2; | |
691 u2 = u1; | |
692 u1 = u_uncomp[i]; | |
693 comp[i] = u_uncomp[i] - z; | |
694 } | |
695 break; | |
696 | |
697 default: | |
698 return NULL; | |
699 } | |
700 comp-=2; | |
701 comp[0] = ZTR_FORM_DELTA1; | |
702 comp[1] = level; | |
703 | |
704 *comp_len = uncomp_len+2; | |
705 | |
706 return comp; | |
707 } | |
708 | |
709 #define ABS(a) ((a) > 0 ? (a) : -(a)) | |
710 | |
711 /* ZTR_FORM_DDELTA1 - experimental */ | |
712 char *decorrelate1dyn(char *x_uncomp, | |
713 int uncomp_len, | |
714 int *comp_len) { | |
715 int i, j, z[4]; | |
716 int u1 = 0, u2 = 0, u3 = 0; | |
717 char *comp = (char *)xmalloc(uncomp_len + 2); | |
718 unsigned char *u_uncomp = (unsigned char *)x_uncomp; | |
719 int level = 3; /* default level */ | |
720 int last_level = level; | |
721 int best; | |
722 | |
723 if (!comp) | |
724 return NULL; | |
725 | |
726 comp+=2; | |
727 for (i = 0; i < uncomp_len; i++) { | |
728 z[1] = u1; | |
729 z[2] = 2*u1 - u2; | |
730 z[3] = 3*u1 - 3*u2 + u3; | |
731 comp[i] = u_uncomp[i] - z[last_level]; | |
732 best = 10000; | |
733 for (j = 1; j < 3; j++) { | |
734 if (ABS(u_uncomp[i] - z[j]) < best) { | |
735 best = ABS(u_uncomp[i] - z[j]); | |
736 last_level = j; | |
737 } | |
738 } | |
739 u3 = u2; | |
740 u2 = u1; | |
741 u1 = u_uncomp[i]; | |
742 } | |
743 comp-=2; | |
744 comp[0] = ZTR_FORM_DDELTA1; | |
745 comp[1] = level; | |
746 | |
747 *comp_len = uncomp_len+2; | |
748 | |
749 return comp; | |
750 } | |
751 | |
752 /* | |
753 * recorrelate1() | |
754 * | |
755 * The reverse of decorrelate1() | |
756 * | |
757 * Arguments: | |
758 * comp Compressed input data | |
759 * comp_len Length of comp data | |
760 * uncomp_len Output: length of uncompressed data | |
761 * | |
762 * Returns: | |
763 * Success: uncompressed data | |
764 * Failure: NULL | |
765 */ | |
766 char *recorrelate1(char *x_comp, | |
767 int comp_len, | |
768 int *uncomp_len) { | |
769 int i, z; | |
770 int u1 = 0, u2 = 0, u3 = 0; | |
771 int level = x_comp[1]; | |
772 char *uncomp; | |
773 | |
774 uncomp = (char *)xmalloc(comp_len-2); | |
775 if (!uncomp) | |
776 return NULL; | |
777 | |
778 x_comp+=2; | |
779 comp_len-=2; | |
780 *uncomp_len = comp_len; | |
781 | |
782 switch (level) { | |
783 case 1: | |
784 for (i = 0; i < comp_len; i++) { | |
785 z = u1; | |
786 u1 = uncomp[i] = x_comp[i] + z; | |
787 } | |
788 break; | |
789 | |
790 case 2: | |
791 for (i = 0; i < comp_len; i++) { | |
792 z = 2*u1 - u2; | |
793 u2 = u1; | |
794 u1 = uncomp[i] = x_comp[i] + z; | |
795 } | |
796 break; | |
797 | |
798 case 3: | |
799 for (i = 0; i < comp_len; i++) { | |
800 z = 3*u1 - 3*u2 + u3; | |
801 u3 = u2; | |
802 u2 = u1; | |
803 u1 = uncomp[i] = x_comp[i] + z; | |
804 } | |
805 break; | |
806 } | |
807 | |
808 return uncomp; | |
809 } | |
810 | |
811 /* | |
812 * --------------------------------------------------------------------------- | |
813 * ZTR_FORM_DELTA2 | |
814 * --------------------------------------------------------------------------- | |
815 */ | |
816 | |
817 /* | |
818 * decorrelate2() | |
819 * | |
820 * Produce successive deltas from a 2-byte array (big endian) | |
821 * | |
822 * Arguments: | |
823 * uncomp Uncompressed data | |
824 * uncomp_len Length of uncompressed data | |
825 * level Differencing level (must be 1, 2 or 3) | |
826 * comp_len Return: where to store new compressed length | |
827 * | |
828 * Returns: | |
829 * Success: A decorrelated buffer (malloced) | |
830 * Failure: NULL | |
831 */ | |
832 char *decorrelate2(char *x_uncomp, | |
833 int uncomp_len, | |
834 int level, | |
835 int *comp_len) { | |
836 int i, z, delta; | |
837 int u1 = 0, u2 = 0, u3 = 0; | |
838 char *comp = (char *)xmalloc(uncomp_len + 2); | |
839 unsigned char *u_uncomp = (unsigned char *)x_uncomp; | |
840 | |
841 if (!comp) | |
842 return NULL; | |
843 | |
844 comp+=2; | |
845 switch (level) { | |
846 case 1: | |
847 for (i = 0; i < uncomp_len; i+=2) { | |
848 z = u1; | |
849 u1 = (u_uncomp[i] << 8) + u_uncomp[i+1]; | |
850 delta = u1 - z; | |
851 comp[i ] = (delta >> 8) & 0xff; | |
852 comp[i+1] = (delta >> 0) & 0xff; | |
853 } | |
854 break; | |
855 | |
856 case 2: | |
857 for (i = 0; i < uncomp_len; i+=2) { | |
858 z = 2*u1 - u2; | |
859 u2 = u1; | |
860 u1 = (u_uncomp[i] << 8) + u_uncomp[i+1]; | |
861 delta = u1 - z; | |
862 comp[i ] = (delta >> 8) & 0xff; | |
863 comp[i+1] = (delta >> 0) & 0xff; | |
864 } | |
865 break; | |
866 | |
867 case 3: | |
868 for (i = 0; i < uncomp_len; i+=2) { | |
869 z = 3*u1 - 3*u2 + u3; | |
870 u3 = u2; | |
871 u2 = u1; | |
872 u1 = (u_uncomp[i] << 8) + u_uncomp[i+1]; | |
873 delta = u1 - z; | |
874 comp[i ] = (delta >> 8) & 0xff; | |
875 comp[i+1] = (delta >> 0) & 0xff; | |
876 } | |
877 break; | |
878 | |
879 default: | |
880 return NULL; | |
881 } | |
882 comp-=2; | |
883 comp[0] = ZTR_FORM_DELTA2; | |
884 comp[1] = level; | |
885 | |
886 *comp_len = uncomp_len+2; | |
887 | |
888 return comp; | |
889 } | |
890 | |
891 char *decorrelate2dyn(char *x_uncomp, | |
892 int uncomp_len, | |
893 int *comp_len) { | |
894 int i, j, z[4]; | |
895 int u1 = 0, u2 = 0, u3 = 0; | |
896 char *comp = (char *)xmalloc(uncomp_len + 2); | |
897 unsigned char *u_uncomp = (unsigned char *)x_uncomp; | |
898 int level = 2; /* minimum level */ | |
899 int last_level = level; | |
900 int best; | |
901 | |
902 if (!comp) | |
903 return NULL; | |
904 | |
905 comp+=2; | |
906 for (i = 0; i < uncomp_len; i+=2) { | |
907 z[0] = 0; | |
908 z[1] = u1; | |
909 z[2] = 2*u1 - u2; | |
910 z[3] = 3*u1 - 3*u2 + u3; | |
911 u3 = u2; | |
912 u2 = u1; | |
913 u1 = (u_uncomp[i]<<8) + u_uncomp[i+1]; | |
914 comp[i ] = ((u1 - z[last_level]) >> 8) & 0xff; | |
915 comp[i+1] = ((u1 - z[last_level]) >> 0) & 0xff; | |
916 best = 10000; | |
917 for (j = level; j < 4; j++) { | |
918 if (ABS(u1 - z[j]) < best) { | |
919 best = ABS(u1 - z[j]); | |
920 last_level = j; | |
921 } | |
922 } | |
923 } | |
924 comp-=2; | |
925 comp[0] = ZTR_FORM_DDELTA2; | |
926 comp[1] = level; | |
927 | |
928 *comp_len = uncomp_len+2; | |
929 | |
930 return comp; | |
931 } | |
932 | |
933 /* | |
934 * recorrelate2() | |
935 * | |
936 * The reverse of decorrelate2() | |
937 * | |
938 * Arguments: | |
939 * comp Compressed input data | |
940 * comp_len Length of comp data | |
941 * uncomp_len Output: length of uncompressed data | |
942 * | |
943 * Returns: | |
944 * Success: uncompressed data | |
945 * Failure: NULL | |
946 */ | |
947 char *recorrelate2(char *x_comp, | |
948 int comp_len, | |
949 int *uncomp_len) { | |
950 int i, z; | |
951 int u1 = 0, u2 = 0, u3 = 0; | |
952 int level = x_comp[1]; | |
953 char *uncomp; | |
954 unsigned char *u_comp = (unsigned char *)x_comp; | |
955 | |
956 uncomp = (char *)xmalloc(comp_len-2); | |
957 if (!uncomp) | |
958 return NULL; | |
959 | |
960 u_comp+=2; | |
961 comp_len-=2; | |
962 *uncomp_len = comp_len; | |
963 | |
964 switch (level) { | |
965 case 1: | |
966 for (i = 0; i < comp_len; i+=2) { | |
967 z = u1; | |
968 u1 = ((u_comp[i] << 8) | u_comp[i+1]) + z; | |
969 uncomp[i ] = (u1 >> 8) & 0xff; | |
970 uncomp[i+1] = (u1 >> 0) & 0xff; | |
971 } | |
972 break; | |
973 | |
974 case 2: | |
975 for (i = 0; i < comp_len; i+=2) { | |
976 z = 2*u1 - u2; | |
977 u2 = u1; | |
978 u1 = ((u_comp[i] << 8) | u_comp[i+1]) + z; | |
979 uncomp[i ] = (u1 >> 8) & 0xff; | |
980 uncomp[i+1] = (u1 >> 0) & 0xff; | |
981 } | |
982 break; | |
983 | |
984 case 3: | |
985 for (i = 0; i < comp_len; i+=2) { | |
986 z = 3*u1 - 3*u2 + u3; | |
987 u3 = u2; | |
988 u2 = u1; | |
989 u1 = ((u_comp[i] << 8) | u_comp[i+1]) + z; | |
990 uncomp[i ] = (u1 >> 8) & 0xff; | |
991 uncomp[i+1] = (u1 >> 0) & 0xff; | |
992 } | |
993 break; | |
994 } | |
995 | |
996 return uncomp; | |
997 } | |
998 | |
999 /* | |
1000 * --------------------------------------------------------------------------- | |
1001 * ZTR_FORM_DELTA4 | |
1002 * --------------------------------------------------------------------------- | |
1003 */ | |
1004 | |
1005 /* | |
1006 * decorrelate4() | |
1007 * | |
1008 * Produce successive deltas from a 4-byte array (big endian) | |
1009 * | |
1010 * Arguments: | |
1011 * uncomp Uncompressed data | |
1012 * uncomp_len Length of uncompressed data | |
1013 * level Differencing level (must be 1, 2 or 3) | |
1014 * comp_len Return: where to store new compressed length | |
1015 * | |
1016 * Returns: | |
1017 * Success: A decorrelated buffer (malloced) | |
1018 * Failure: NULL | |
1019 */ | |
1020 char *decorrelate4(char *x_uncomp, | |
1021 int uncomp_len, | |
1022 int level, | |
1023 int *comp_len) { | |
1024 int i, z, delta; | |
1025 int u1 = 0, u2 = 0, u3 = 0; | |
1026 char *comp = (char *)xmalloc(uncomp_len + 4); | |
1027 unsigned char *u_uncomp = (unsigned char *)x_uncomp; | |
1028 | |
1029 if (!comp) | |
1030 return NULL; | |
1031 | |
1032 comp+=4; | |
1033 switch (level) { | |
1034 case 1: | |
1035 for (i = 0; i < uncomp_len; i+=4) { | |
1036 z = u1; | |
1037 u1 =(u_uncomp[i ] << 24) + | |
1038 (u_uncomp[i+1] << 16) + | |
1039 (u_uncomp[i+2] << 8) + | |
1040 (u_uncomp[i+3] << 0); | |
1041 delta = u1 - z; | |
1042 comp[i ] = (delta >> 24) & 0xff; | |
1043 comp[i+1] = (delta >> 16) & 0xff; | |
1044 comp[i+2] = (delta >> 8) & 0xff; | |
1045 comp[i+3] = (delta >> 0) & 0xff; | |
1046 } | |
1047 break; | |
1048 | |
1049 case 2: | |
1050 for (i = 0; i < uncomp_len; i+=4) { | |
1051 z = 2*u1 - u2; | |
1052 u2 = u1; | |
1053 u1 =(u_uncomp[i ] << 24) + | |
1054 (u_uncomp[i+1] << 16) + | |
1055 (u_uncomp[i+2] << 8) + | |
1056 (u_uncomp[i+3] << 0); | |
1057 delta = u1 - z; | |
1058 comp[i ] = (delta >> 24) & 0xff; | |
1059 comp[i+1] = (delta >> 16) & 0xff; | |
1060 comp[i+2] = (delta >> 8) & 0xff; | |
1061 comp[i+3] = (delta >> 0) & 0xff; | |
1062 } | |
1063 break; | |
1064 | |
1065 case 3: | |
1066 for (i = 0; i < uncomp_len; i+=4) { | |
1067 z = 3*u1 - 3*u2 + u3; | |
1068 u3 = u2; | |
1069 u2 = u1; | |
1070 u1 =(u_uncomp[i ] << 24) + | |
1071 (u_uncomp[i+1] << 16) + | |
1072 (u_uncomp[i+2] << 8) + | |
1073 (u_uncomp[i+3] << 0); | |
1074 delta = u1 - z; | |
1075 comp[i ] = (delta >> 24) & 0xff; | |
1076 comp[i+1] = (delta >> 16) & 0xff; | |
1077 comp[i+2] = (delta >> 8) & 0xff; | |
1078 comp[i+3] = (delta >> 0) & 0xff; | |
1079 } | |
1080 break; | |
1081 | |
1082 default: | |
1083 return NULL; | |
1084 } | |
1085 comp-=4; | |
1086 comp[0] = ZTR_FORM_DELTA4; | |
1087 comp[1] = level; | |
1088 comp[2] = 0; /* dummy - to align on 4-byte boundary */ | |
1089 comp[3] = 0; /* dummy - to align on 4-byte boundary */ | |
1090 | |
1091 *comp_len = uncomp_len+4; | |
1092 | |
1093 return comp; | |
1094 } | |
1095 | |
1096 /* | |
1097 * recorrelate4() | |
1098 * | |
1099 * The reverse of decorrelate4() | |
1100 * | |
1101 * Arguments: | |
1102 * comp Compressed input data | |
1103 * comp_len Length of comp data | |
1104 * uncomp_len Output: length of uncompressed data | |
1105 * | |
1106 * Returns: | |
1107 * Success: uncompressed data | |
1108 * Failure: NULL | |
1109 */ | |
1110 char *recorrelate4(char *x_comp, | |
1111 int comp_len, | |
1112 int *uncomp_len) { | |
1113 int i, z; | |
1114 int u1 = 0, u2 = 0, u3 = 0; | |
1115 int level = x_comp[1]; | |
1116 char *uncomp; | |
1117 unsigned char *u_comp = (unsigned char *)x_comp; | |
1118 | |
1119 uncomp = (char *)xmalloc(comp_len-4); | |
1120 if (!uncomp) | |
1121 return NULL; | |
1122 | |
1123 u_comp+=4; | |
1124 comp_len-=4; | |
1125 *uncomp_len = comp_len; | |
1126 | |
1127 switch (level) { | |
1128 case 1: | |
1129 for (i = 0; i < comp_len; i+=4) { | |
1130 z = u1; | |
1131 u1 = z + | |
1132 ((u_comp[i ] << 24) | | |
1133 (u_comp[i+1] << 16) | | |
1134 (u_comp[i+2] << 8) | | |
1135 u_comp[i+3]); | |
1136 uncomp[i ] = (u1 >> 24) & 0xff; | |
1137 uncomp[i+1] = (u1 >> 16) & 0xff; | |
1138 uncomp[i+2] = (u1 >> 8) & 0xff; | |
1139 uncomp[i+3] = (u1 >> 0) & 0xff; | |
1140 } | |
1141 break; | |
1142 | |
1143 case 2: | |
1144 for (i = 0; i < comp_len; i+=4) { | |
1145 z = 2*u1 - u2; | |
1146 u2 = u1; | |
1147 u1 = z + | |
1148 ((u_comp[i ] << 24) | | |
1149 (u_comp[i+1] << 16) | | |
1150 (u_comp[i+2] << 8) | | |
1151 u_comp[i+3]); | |
1152 uncomp[i ] = (u1 >> 24) & 0xff; | |
1153 uncomp[i+1] = (u1 >> 16) & 0xff; | |
1154 uncomp[i+2] = (u1 >> 8) & 0xff; | |
1155 uncomp[i+3] = (u1 >> 0) & 0xff; | |
1156 } | |
1157 break; | |
1158 | |
1159 case 3: | |
1160 for (i = 0; i < comp_len; i+=4) { | |
1161 z = 3*u1 - 3*u2 + u3; | |
1162 u3 = u2; | |
1163 u2 = u1; | |
1164 u1 = z + | |
1165 ((u_comp[i ] << 24) | | |
1166 (u_comp[i+1] << 16) | | |
1167 (u_comp[i+2] << 8) | | |
1168 u_comp[i+3]); | |
1169 uncomp[i ] = (u1 >> 24) & 0xff; | |
1170 uncomp[i+1] = (u1 >> 16) & 0xff; | |
1171 uncomp[i+2] = (u1 >> 8) & 0xff; | |
1172 uncomp[i+3] = (u1 >> 0) & 0xff; | |
1173 } | |
1174 break; | |
1175 } | |
1176 | |
1177 return uncomp; | |
1178 } | |
1179 | |
1180 | |
1181 /* | |
1182 * --------------------------------------------------------------------------- | |
1183 * ZTR_FORM_16TO8 | |
1184 * --------------------------------------------------------------------------- | |
1185 */ | |
1186 | |
1187 /* | |
1188 * shrink_16to8() | |
1189 * | |
1190 * Stores an array of 16-bit (big endian) array elements in an 8-bit array. | |
1191 * We assume that most 16-bit elements encode numbers that fit in an 8-bit | |
1192 * value. When not possible, we store a marker followed by the 16-bit value | |
1193 * stored as multiple 8-bit values. | |
1194 * | |
1195 * uncomp Uncompressed data | |
1196 * uncomp_len Length of uncompressed data (in bytes) | |
1197 * comp_len Return: where to store new compressed length | |
1198 * | |
1199 * Returns: | |
1200 * Success: An 8-bit array (malloced) | |
1201 * Failure: NULL | |
1202 */ | |
1203 char *shrink_16to8(char *x_uncomp, int uncomp_len, int *comp_len) { | |
1204 char *comp; | |
1205 int i, j, i16; | |
1206 signed char *s_uncomp = (signed char *)x_uncomp; | |
1207 | |
1208 /* Allocation - worst case is 3 * (uncomp_len/2) + 1 */ | |
1209 if (NULL == (comp = (char *)xmalloc(3 * (uncomp_len/2) + 1))) | |
1210 return NULL; | |
1211 | |
1212 comp[0] = ZTR_FORM_16TO8; | |
1213 for (i = 0, j = 1; i < uncomp_len; i+=2) { | |
1214 i16 = (s_uncomp[i] << 8) | (unsigned char)s_uncomp[i+1]; | |
1215 if (i16 >= -127 && i16 <= 127) { | |
1216 comp[j++] = i16; | |
1217 } else { | |
1218 comp[j++] = -128; | |
1219 comp[j++] = s_uncomp[i]; | |
1220 comp[j++] = s_uncomp[i+1]; | |
1221 } | |
1222 } | |
1223 | |
1224 /* Reclaim unneeded memory */ | |
1225 comp = xrealloc(comp, j); | |
1226 | |
1227 *comp_len = j; | |
1228 return comp; | |
1229 } | |
1230 | |
1231 | |
1232 /* | |
1233 * expand_8to16() | |
1234 * | |
1235 * The opposite of the shrink_16to8() function. | |
1236 * | |
1237 * comp Compressed input data | |
1238 * comp_len Length of comp data (in bytes) | |
1239 * uncomp_len Output: length of uncompressed data (in bytes) | |
1240 * | |
1241 * Returns: | |
1242 * Success: Uncompressed data (char *) | |
1243 * Failure: NULL | |
1244 */ | |
1245 char *expand_8to16(char *x_comp, int comp_len, int *uncomp_len) { | |
1246 int i, j; | |
1247 char *uncomp; | |
1248 signed char *s_comp = (signed char *)x_comp; | |
1249 | |
1250 /* Allocation - worst case is twice comp_len */ | |
1251 if (NULL == (uncomp = (char *)xmalloc(comp_len*2))) | |
1252 return NULL; | |
1253 | |
1254 #if 0 | |
1255 for (i = 0, j = 1; j < comp_len; i+=2) { | |
1256 if (s_comp[j] != -128) { | |
1257 uncomp[i ] = s_comp[j] < 0 ? -1 : 0; | |
1258 uncomp[i+1] = s_comp[j++]; | |
1259 } else { | |
1260 j++; | |
1261 uncomp[i ] = s_comp[j++]; | |
1262 uncomp[i+1] = s_comp[j++]; | |
1263 } | |
1264 } | |
1265 #endif | |
1266 | |
1267 for (i = 0, j = 1; j < comp_len; i+=2) { | |
1268 if (s_comp[j] >= 0) { | |
1269 uncomp[i ] = 0; | |
1270 uncomp[i+1] = s_comp[j++]; | |
1271 } else { | |
1272 if (s_comp[j] != -128) { | |
1273 uncomp[i+1] = s_comp[j++]; | |
1274 uncomp[i ] = -1; | |
1275 } else { | |
1276 j++; | |
1277 uncomp[i ] = s_comp[j++]; | |
1278 uncomp[i+1] = s_comp[j++]; | |
1279 } | |
1280 } | |
1281 } | |
1282 | |
1283 /* Reclaim unneeded memory */ | |
1284 uncomp = xrealloc(uncomp, i); | |
1285 | |
1286 *uncomp_len = i; | |
1287 return uncomp; | |
1288 } | |
1289 | |
1290 /* | |
1291 * --------------------------------------------------------------------------- | |
1292 * ZTR_FORM_32TO8 | |
1293 * --------------------------------------------------------------------------- | |
1294 */ | |
1295 | |
1296 /* | |
1297 * shrink_32to8() | |
1298 * | |
1299 * Stores an array of 32-bit (big endian) array elements in an 8-bit array. | |
1300 * We assume that most 32-bit elements encode numbers that fit in an 8-bit | |
1301 * value. When not possible, we store a marker followed by the 32-bit value | |
1302 * stored as multiple 8-bit values. | |
1303 * | |
1304 * uncomp Uncompressed data | |
1305 * uncomp_len Length of uncompressed data (in bytes) | |
1306 * comp_len Return: where to store new compressed length | |
1307 * | |
1308 * Returns: | |
1309 * Success: An 8-bit array (malloced) | |
1310 * Failure: NULL | |
1311 */ | |
1312 char *shrink_32to8(char *x_uncomp, int uncomp_len, int *comp_len) { | |
1313 char *comp; | |
1314 int i, j, i32; | |
1315 signed char *s_uncomp = (signed char *)x_uncomp; | |
1316 | |
1317 /* Allocation - worst case is 5 * (uncomp_len/4) + 1 */ | |
1318 if (NULL == (comp = (char *)xmalloc(5 * (uncomp_len/4) + 1))) | |
1319 return NULL; | |
1320 | |
1321 comp[0] = ZTR_FORM_32TO8; | |
1322 for (i = 0, j = 1; i < uncomp_len; i+=4) { | |
1323 i32 = (s_uncomp[i] << 24) | | |
1324 (s_uncomp[i+1] << 16) | | |
1325 (s_uncomp[i+2] << 8) | | |
1326 (unsigned char)s_uncomp[i+3]; | |
1327 if (i32 >= -127 && i32 <= 127) { | |
1328 comp[j++] = i32; | |
1329 } else { | |
1330 comp[j++] = -128; | |
1331 comp[j++] = s_uncomp[i]; | |
1332 comp[j++] = s_uncomp[i+1]; | |
1333 comp[j++] = s_uncomp[i+2]; | |
1334 comp[j++] = s_uncomp[i+3]; | |
1335 } | |
1336 } | |
1337 | |
1338 /* Reclaim unneeded memory */ | |
1339 comp = xrealloc(comp, j); | |
1340 | |
1341 *comp_len = j; | |
1342 return comp; | |
1343 } | |
1344 | |
1345 /* | |
1346 * expand_8to32() | |
1347 * | |
1348 * The opposite of the shrink_32to8() function. | |
1349 * | |
1350 * comp Compressed input data | |
1351 * comp_len Length of comp data (in bytes) | |
1352 * uncomp_len Output: length of uncompressed data (in bytes) | |
1353 * | |
1354 * Returns: | |
1355 * Success: Uncompressed data (char *) | |
1356 * Failure: NULL | |
1357 */ | |
1358 char *expand_8to32(char *comp, int comp_len, int *uncomp_len) { | |
1359 int i, j; | |
1360 char *uncomp; | |
1361 signed char *s_comp = (signed char *)comp; | |
1362 | |
1363 /* Allocation - worst case is four times comp_len */ | |
1364 if (NULL == (uncomp = (char *)xmalloc(comp_len*4))) | |
1365 return NULL; | |
1366 | |
1367 for (i = 0, j = 1; j < comp_len; i+=4) { | |
1368 if (s_comp[j] != -128) { | |
1369 uncomp[i ] = s_comp[j] < 0 ? -1 : 0; | |
1370 uncomp[i+1] = s_comp[j] < 0 ? -1 : 0; | |
1371 uncomp[i+2] = s_comp[j] < 0 ? -1 : 0; | |
1372 uncomp[i+3] = s_comp[j++]; | |
1373 } else { | |
1374 j++; | |
1375 uncomp[i ] = s_comp[j++]; | |
1376 uncomp[i+1] = s_comp[j++]; | |
1377 uncomp[i+2] = s_comp[j++]; | |
1378 uncomp[i+3] = s_comp[j++]; | |
1379 } | |
1380 } | |
1381 | |
1382 /* Reclaim unneeded memory */ | |
1383 uncomp = xrealloc(uncomp, i); | |
1384 | |
1385 *uncomp_len = i; | |
1386 return uncomp; | |
1387 } | |
1388 | |
1389 /* | |
1390 * --------------------------------------------------------------------------- | |
1391 * ZTR_FORM_FOLLOW1 | |
1392 * --------------------------------------------------------------------------- | |
1393 */ | |
1394 static int follow_tab[256][256]; | |
1395 char *follow1(char *x_uncomp, | |
1396 int uncomp_len, | |
1397 int *comp_len) { | |
1398 char *comp = (char *)xmalloc(uncomp_len + 256 + 1); | |
1399 unsigned char *u_uncomp = (unsigned char *)x_uncomp; | |
1400 signed char *s_uncomp = ((signed char *)x_uncomp); | |
1401 int i, j; | |
1402 char next[256]; | |
1403 int count[256]; | |
1404 | |
1405 if (!comp) | |
1406 return NULL; | |
1407 | |
1408 /* Count di-freqs */ | |
1409 memset(follow_tab, 0, 256*256*sizeof(int)); | |
1410 #if 0 | |
1411 for (i = 0; i < uncomp_len-1; i++) | |
1412 follow_tab[u_uncomp[i]][u_uncomp[i+1]]++; | |
1413 | |
1414 /* Pick the most frequent next byte from the preceeding byte */ | |
1415 for (i = 0; i < 256; i++) { | |
1416 int bestval, bestind; | |
1417 | |
1418 bestval = bestind = 0; | |
1419 for (j = 0; j < 256; j++) { | |
1420 if (follow_tab[i][j] > bestval) { | |
1421 bestval = follow_tab[i][j]; | |
1422 bestind = j; | |
1423 } | |
1424 } | |
1425 next[i] = bestind; | |
1426 } | |
1427 #endif | |
1428 memset(next, 0, 256*sizeof(*next)); | |
1429 memset(count, 0, 256*sizeof(*count)); | |
1430 /* Pick the most frequent next byte from the preceeding byte */ | |
1431 for (i = 0; i < uncomp_len-1; ) { | |
1432 int cur = u_uncomp[i]; | |
1433 int nxt = u_uncomp[++i]; | |
1434 int folcnt = ++follow_tab[cur][nxt]; | |
1435 if (folcnt > count[cur]) { | |
1436 count[cur] = folcnt; | |
1437 next[cur] = nxt; | |
1438 } | |
1439 } | |
1440 | |
1441 j = 0; | |
1442 comp[j++] = ZTR_FORM_FOLLOW1; | |
1443 | |
1444 /* Output 'next' array */ | |
1445 for (i = 0; i < 256; i++, j++) | |
1446 comp[j] = next[i]; | |
1447 | |
1448 /* Output new 'uncomp' as next['uncomp'] */ | |
1449 comp[j++] = u_uncomp[0]; | |
1450 for (i = 1; i < uncomp_len; i++, j++) { | |
1451 comp[j] = next[u_uncomp[i-1]] - s_uncomp[i]; | |
1452 } | |
1453 *comp_len = j; | |
1454 | |
1455 return comp; | |
1456 } | |
1457 | |
1458 char *unfollow1(char *x_comp, | |
1459 int comp_len, | |
1460 int *uncomp_len) { | |
1461 unsigned char *u_uncomp; | |
1462 int i, j; | |
1463 char next[256]; | |
1464 unsigned char *u_comp = (unsigned char *)x_comp; | |
1465 signed char *s_comp = (signed char *)x_comp; | |
1466 | |
1467 u_uncomp = (unsigned char *)xmalloc(comp_len-256-1); | |
1468 if (!u_uncomp) | |
1469 return NULL; | |
1470 | |
1471 /* Load next[] array */ | |
1472 j = 1; | |
1473 for (i = 0; i < 256; i++, j++) | |
1474 next[i] = u_comp[j]; | |
1475 | |
1476 /* Replace comp[x] with next[comp[x-1]] - comp[x]*/ | |
1477 u_uncomp[0] = u_comp[j++]; | |
1478 | |
1479 comp_len -= 257; | |
1480 s_comp += 257; | |
1481 for (i = 1; i < comp_len; i++) { | |
1482 u_uncomp[i] = next[u_uncomp[i-1]] - s_comp[i]; | |
1483 } | |
1484 | |
1485 *uncomp_len = i; | |
1486 | |
1487 return (char *)u_uncomp; | |
1488 } | |
1489 | |
1490 | |
1491 /* | |
1492 * --------------------------------------------------------------------------- | |
1493 * ZTR_FORM_CHEB445 | |
1494 * --------------------------------------------------------------------------- | |
1495 */ | |
1496 | |
1497 #if 0 | |
1498 /* | |
1499 * Compresses using chebyshev polynomials to predict the next peak. | |
1500 * Based on around 96 modern ABI files it compresses by around 5% (varied | |
1501 * between 3.9 and 6.6). | |
1502 */ | |
1503 | |
1504 /* | |
1505 * For now this has been disabled in favour of the integer version below | |
1506 * as we cannot guarantee all systems to have the same floating point | |
1507 * roundings, especially with the final conversion to integer. | |
1508 * (Also, for some unknown reason, the integer version produces smaller | |
1509 * files.) | |
1510 */ | |
1511 | |
1512 char *cheb445comp(char *uncomp, int uncomp_len, int *data_len) | |
1513 { | |
1514 int i, k, l, z; | |
1515 int datap; | |
1516 float frac[5]; | |
1517 float fz[25]; | |
1518 signed short *d16 = (signed short *)uncomp; | |
1519 int nwords = uncomp_len / 2; | |
1520 short *dptr = d16; | |
1521 signed short *data = (signed short *)malloc((nwords+1)*sizeof(short)); | |
1522 | |
1523 data[0] = le_int2(ZTR_FORM_CHEB445); | |
1524 /* Check for boundary cases */ | |
1525 if (nwords <= 4) { | |
1526 switch (nwords) { | |
1527 case 4: | |
1528 data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2])); | |
1529 case 3: | |
1530 data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1])); | |
1531 case 2: | |
1532 data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0])); | |
1533 case 1: | |
1534 data[1] = be_int2(d16[0]); | |
1535 } | |
1536 *data_len = nwords*2; | |
1537 return (char *)data; | |
1538 } | |
1539 | |
1540 /* First 4 values are just direct deltas */ | |
1541 data[1] = be_int2(d16[0]); | |
1542 data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0])); | |
1543 data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1])); | |
1544 data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2])); | |
1545 datap = 5; | |
1546 | |
1547 /* Initialise - speeds up loop */ | |
1548 for (k = 0; k < 5; k++) { | |
1549 float kx = cos(M_PI*(k+0.5)/5)*1.5; | |
1550 frac[k] = (kx + 1.5) - (int)(kx + 1.5); | |
1551 } | |
1552 for (z = l = 0; l < 5; l++) { | |
1553 for (k = 0; k < 5; k++, z++) { | |
1554 fz[z] = 0.4 * cos(l * M_PI*(k+0.5)/5); | |
1555 } | |
1556 } | |
1557 | |
1558 /* Loop */ | |
1559 for (i = 0; i < nwords-4; i++) { | |
1560 float dd, y = 10/3.0; | |
1561 float f[5], coef[5]; | |
1562 signed short diff; | |
1563 int p; | |
1564 | |
1565 f[0] = be_int2(dptr[2])*frac[4] + be_int2(dptr[3])*frac[0]; | |
1566 f[1] = be_int2(dptr[2])*frac[3] + be_int2(dptr[3])*frac[1]; | |
1567 f[2] = be_int2(dptr[1])*frac[2] + be_int2(dptr[2])*frac[2]; | |
1568 f[3] = be_int2(dptr[0])*frac[1] + be_int2(dptr[1])*frac[3]; | |
1569 f[4] = be_int2(dptr[0])*frac[0] + be_int2(dptr[1])*frac[4]; | |
1570 | |
1571 for (z = l = 0; l < 5; l++, z+=5) | |
1572 coef[l] = f[0] * fz[z+0] + | |
1573 f[1] * fz[z+1] + | |
1574 f[2] * fz[z+2] + | |
1575 f[3] * fz[z+3] + | |
1576 f[4] * fz[z+4]; | |
1577 | |
1578 dd = y*coef[3]+coef[2]; | |
1579 p = 0.5 + 5/3.0*(y*dd-coef[3]+coef[1])-dd+coef[0]/2.0; | |
1580 | |
1581 if (p < 0) p = 0; | |
1582 diff = be_int2(dptr[4]) - p; | |
1583 data[datap++] = be_int2(diff); | |
1584 | |
1585 dptr++; | |
1586 } | |
1587 | |
1588 *data_len = datap*2; | |
1589 | |
1590 return (char *)data; | |
1591 } | |
1592 | |
1593 char *cheb445uncomp(char *comp, int comp_len, int *uncomp_len) | |
1594 { | |
1595 int i, k, l, z; | |
1596 float frac[5]; | |
1597 float fz[25]; | |
1598 signed short *d16 = (signed short *)comp; | |
1599 int nwords = comp_len / 2; | |
1600 signed short *data = (signed short *)xmalloc(comp_len); | |
1601 short *dptr = data, *dptr2 = d16; | |
1602 | |
1603 /* Check for boundary cases */ | |
1604 if (nwords <= 3) { | |
1605 switch (nwords) { | |
1606 case 3: | |
1607 data[0] = be_int2(d16[1]); | |
1608 data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); | |
1609 data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); | |
1610 break; | |
1611 case 2: | |
1612 data[0] = be_int2(d16[1]); | |
1613 data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); | |
1614 break; | |
1615 case 1: | |
1616 data[0] = be_int2(d16[1]); | |
1617 break; | |
1618 } | |
1619 *uncomp_len = (nwords-1)*2; | |
1620 return (char *)data; | |
1621 } | |
1622 | |
1623 /* First 3 values are just direct deltas */ | |
1624 data[0] = be_int2(d16[1]); | |
1625 data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); | |
1626 data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); | |
1627 data[3] = be_int2(be_int2(d16[4])+be_int2(data[2])); | |
1628 dptr2 += 5; | |
1629 | |
1630 /* Initialise - speeds up loop */ | |
1631 for (k = 0; k < 5; k++) { | |
1632 float kx = cos(M_PI*(k+0.5)/5)*1.5; | |
1633 frac[k] = (kx + 1.5) - (int)(kx + 1.5); | |
1634 } | |
1635 for (z = l = 0; l < 5; l++) { | |
1636 for (k = 0; k < 5; k++, z++) { | |
1637 fz[z] = 0.4 * cos(l * M_PI*(k+0.5)/5); | |
1638 } | |
1639 } | |
1640 | |
1641 /* Loop */ | |
1642 for (i = 0; i < nwords-3; i++) { | |
1643 float dd, y = 10/3.0; | |
1644 float f[5], coef[5]; | |
1645 signed short diff; | |
1646 int p; | |
1647 | |
1648 f[0] = be_int2(dptr[2])*frac[4] + be_int2(dptr[3])*frac[0]; | |
1649 f[1] = be_int2(dptr[2])*frac[3] + be_int2(dptr[3])*frac[1]; | |
1650 f[2] = be_int2(dptr[1])*frac[2] + be_int2(dptr[2])*frac[2]; | |
1651 f[3] = be_int2(dptr[0])*frac[1] + be_int2(dptr[1])*frac[3]; | |
1652 f[4] = be_int2(dptr[0])*frac[0] + be_int2(dptr[1])*frac[4]; | |
1653 | |
1654 for (z = l = 0; l < 5; l++, z+=5) | |
1655 coef[l] = f[0] * fz[z+0] + | |
1656 f[1] * fz[z+1] + | |
1657 f[2] * fz[z+2] + | |
1658 f[3] * fz[z+3] + | |
1659 f[4] * fz[z+4]; | |
1660 | |
1661 dd = y*coef[3]+coef[2]; | |
1662 p = 0.5 + 5/3.0*(y*dd-coef[3]+coef[1])-dd+coef[0]/2.0; | |
1663 | |
1664 if (p < 0) p = 0; | |
1665 diff = be_int2(*dptr2) + p; | |
1666 dptr[4] = be_int2(diff); | |
1667 | |
1668 dptr++; | |
1669 dptr2++; | |
1670 } | |
1671 | |
1672 *uncomp_len = (nwords-1)*2; | |
1673 return (char *)data; | |
1674 } | |
1675 #endif | |
1676 | |
1677 /* | |
1678 * --------------------------------------------------------------------------- | |
1679 * ZTR_FORM_ICHEB | |
1680 * --------------------------------------------------------------------------- | |
1681 */ | |
1682 | |
1683 /* | |
1684 * Integer versions of the chebyshev polynomial compressor. This uses | |
1685 * the polynomials to predict the next peak from the preceeding 3. | |
1686 * Tested on 100 ABI-3700, Megabace and Licor files it compressed by | |
1687 * around 7-8%. (Oddly this is slightly more than the floating point | |
1688 * version.) | |
1689 * | |
1690 * These require 32-bit integers and have code to make sure that arithmetic | |
1691 * does not overflow this. | |
1692 */ | |
1693 #define CH1 150 | |
1694 #define CH2 105 | |
1695 char *ichebcomp(char *uncomp, int uncomp_len, int *data_len) | |
1696 { | |
1697 int i, l, z; | |
1698 int datap; | |
1699 int frac[5] = {139,57,75,93,11}; | |
1700 int fz[20] = {42, 42, 42, 42, 42, | |
1701 39, 24, 0,-24,-39, | |
1702 33,-12,-42,-12, 33, | |
1703 24,-39, 0, 39,-24}; | |
1704 int dfac; | |
1705 signed short *d16 = (signed short *)uncomp; | |
1706 int nwords = uncomp_len / 2; | |
1707 short *dptr = d16; | |
1708 signed short *data = (signed short *)malloc((nwords+1)*sizeof(short)); | |
1709 | |
1710 data[0] = le_int2(ZTR_FORM_ICHEB); | |
1711 /* Check for boundary cases */ | |
1712 if (nwords <= 4) { | |
1713 switch (nwords) { | |
1714 case 4: | |
1715 data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2])); | |
1716 case 3: | |
1717 data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1])); | |
1718 case 2: | |
1719 data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0])); | |
1720 case 1: | |
1721 data[1] = be_int2(d16[0]); | |
1722 } | |
1723 *data_len = nwords*2; | |
1724 return (char *)data; | |
1725 } | |
1726 | |
1727 /* First 4 values are just direct deltas */ | |
1728 data[1] = be_int2(d16[0]); | |
1729 data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0])); | |
1730 data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1])); | |
1731 data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2])); | |
1732 datap = 5; | |
1733 | |
1734 /* Loop */ | |
1735 for (i = 4; i < nwords; i++) { | |
1736 int dd, f[5]; | |
1737 signed int coef[4]; | |
1738 signed short diff; | |
1739 int p; | |
1740 | |
1741 /* | |
1742 * FIXME: As an alternative to the range checking below, if we | |
1743 * scale dptr[X] such that it's never higher than 2800 then | |
1744 * the 32-bit arithmetic will never overflow. Practically speaking, | |
1745 * all observed ABI and Megabace files have vales up to 1600 only. | |
1746 */ | |
1747 | |
1748 /* | |
1749 * frac[N] is always paired with frac[4-N], summing to 1.0 - or | |
1750 * 150 when scaled. | |
1751 * Hence f[0] has range 0 to 65536*150. | |
1752 */ | |
1753 f[0] = ((unsigned short)be_int2(dptr[2]))*frac[4] + | |
1754 ((unsigned short)be_int2(dptr[3]))*frac[0]; | |
1755 f[1] = ((unsigned short)be_int2(dptr[2]))*frac[3] + | |
1756 ((unsigned short)be_int2(dptr[3]))*frac[1]; | |
1757 f[2] = ((unsigned short)be_int2(dptr[1]))*frac[2] + | |
1758 ((unsigned short)be_int2(dptr[2]))*frac[2]; | |
1759 f[3] = ((unsigned short)be_int2(dptr[0]))*frac[1] + | |
1760 ((unsigned short)be_int2(dptr[1]))*frac[3]; | |
1761 f[4] = ((unsigned short)be_int2(dptr[0]))*frac[0] + | |
1762 ((unsigned short)be_int2(dptr[1]))*frac[4]; | |
1763 | |
1764 /* | |
1765 * fz[z+0..5] sums to no more than 210 (5*42) and no less than 0. | |
1766 * Therefore coef[l] has range 0 to 65536*150*210, which (just) | |
1767 * fits in 31-bits, plus 1 for the sign. | |
1768 */ | |
1769 for (z = l = 0; l < 4; l++, z+=5) | |
1770 coef[l] = (f[0] * fz[z+0] + | |
1771 f[1] * fz[z+1] + | |
1772 f[2] * fz[z+2] + | |
1773 f[3] * fz[z+3] + | |
1774 f[4] * fz[z+4]); | |
1775 | |
1776 /* | |
1777 * computing p requires at most a temporary variable of | |
1778 * 24.1 * coef, but coef may be a full 32-bit integer. | |
1779 * If coef is sufficiently close to cause an integer overflow then | |
1780 * we scale it down. | |
1781 */ | |
1782 { | |
1783 int max = 0; | |
1784 | |
1785 for (l = 0; l < 4; l++) { | |
1786 if (max < ABS(coef[l])) | |
1787 max = ABS(coef[l]); | |
1788 } | |
1789 | |
1790 | |
1791 if (max > 1<<26) { | |
1792 dfac = max / (1<<26) + 1; | |
1793 for (l = 0; l < 4; l++) | |
1794 coef[l] /= dfac; | |
1795 } else { | |
1796 dfac = 1; | |
1797 } | |
1798 } | |
1799 | |
1800 dd = (coef[3]/3)*10+coef[2]; | |
1801 p = ((((dd/3)*10-coef[3]+coef[1])/3)*5-dd+coef[0]/2)/(CH1*CH2); | |
1802 p *= dfac; | |
1803 | |
1804 if (p < 0) p = 0; | |
1805 diff = be_int2(dptr[4]) - p; | |
1806 data[datap++] = be_int2(diff); | |
1807 | |
1808 dptr++; | |
1809 } | |
1810 | |
1811 *data_len = datap*2; | |
1812 | |
1813 return (char *)data; | |
1814 } | |
1815 | |
1816 char *ichebuncomp(char *comp, int comp_len, int *uncomp_len) | |
1817 { | |
1818 int i, l, z; | |
1819 int frac[5] = {139,57,75,93,11}; | |
1820 int fz[20] = {42, 42, 42, 42, 42, | |
1821 39, 24, 0,-24,-39, | |
1822 33,-12,-42,-12, 33, | |
1823 24,-39, 0, 39,-24}; | |
1824 signed short *d16 = (signed short *)comp; | |
1825 int nwords = comp_len / 2 - 1; | |
1826 signed short *data = (signed short *)xmalloc(comp_len); | |
1827 short *dptr = data, *dptr2 = d16; | |
1828 int dfac; | |
1829 | |
1830 /* Check for boundary cases */ | |
1831 if (nwords <= 4) { | |
1832 switch (nwords) { | |
1833 case 4: | |
1834 data[0] = be_int2(d16[1]); | |
1835 data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); | |
1836 data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); | |
1837 data[3] = be_int2(be_int2(d16[4])+be_int2(data[2])); | |
1838 break; | |
1839 case 3: | |
1840 data[0] = be_int2(d16[1]); | |
1841 data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); | |
1842 data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); | |
1843 break; | |
1844 case 2: | |
1845 data[0] = be_int2(d16[1]); | |
1846 data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); | |
1847 break; | |
1848 case 1: | |
1849 data[0] = be_int2(d16[1]); | |
1850 break; | |
1851 } | |
1852 *uncomp_len = nwords*2; | |
1853 return (char *)data; | |
1854 } | |
1855 | |
1856 /* First 4 values are just direct deltas */ | |
1857 data[0] = be_int2(d16[1]); | |
1858 data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); | |
1859 data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); | |
1860 data[3] = be_int2(be_int2(d16[4])+be_int2(data[2])); | |
1861 dptr2 += 5; | |
1862 | |
1863 /* Loop */ | |
1864 for (i = 4; i < nwords; i++) { | |
1865 int dd, coef[5], f[5]; | |
1866 signed short diff; | |
1867 int p; | |
1868 | |
1869 f[0] = ((unsigned short)be_int2(dptr[2]))*frac[4] + | |
1870 ((unsigned short)be_int2(dptr[3]))*frac[0]; | |
1871 f[1] = ((unsigned short)be_int2(dptr[2]))*frac[3] + | |
1872 ((unsigned short)be_int2(dptr[3]))*frac[1]; | |
1873 f[2] = ((unsigned short)be_int2(dptr[1]))*frac[2] + | |
1874 ((unsigned short)be_int2(dptr[2]))*frac[2]; | |
1875 f[3] = ((unsigned short)be_int2(dptr[0]))*frac[1] + | |
1876 ((unsigned short)be_int2(dptr[1]))*frac[3]; | |
1877 f[4] = ((unsigned short)be_int2(dptr[0]))*frac[0] + | |
1878 ((unsigned short)be_int2(dptr[1]))*frac[4]; | |
1879 | |
1880 for (z = l = 0; l < 4; l++, z+=5) | |
1881 coef[l] = f[0] * fz[z+0] + | |
1882 f[1] * fz[z+1] + | |
1883 f[2] * fz[z+2] + | |
1884 f[3] * fz[z+3] + | |
1885 f[4] * fz[z+4]; | |
1886 | |
1887 /* | |
1888 * computing p requires at most a temporary variable of | |
1889 * 24.1 * coef, but coef may be a full 32-bit integer. | |
1890 * If coef is sufficiently close to cause an integer overflow then | |
1891 * we scale it down. | |
1892 */ | |
1893 { | |
1894 int max = 0; | |
1895 | |
1896 for (l = 0; l < 4; l++) { | |
1897 if (max < ABS(coef[l])) | |
1898 max = ABS(coef[l]); | |
1899 } | |
1900 | |
1901 | |
1902 if (max > 1<<26) { | |
1903 dfac = max / (1<<26) + 1; | |
1904 for (l = 0; l < 4; l++) | |
1905 coef[l] /= dfac; | |
1906 } else { | |
1907 dfac = 1; | |
1908 } | |
1909 } | |
1910 | |
1911 dd = (coef[3]/3)*10+coef[2]; | |
1912 p = ((((dd/3)*10-coef[3]+coef[1])/3)*5-dd+coef[0]/2)/(CH1*CH2); | |
1913 p *= dfac; | |
1914 | |
1915 if (p < 0) p = 0; | |
1916 diff = be_int2(*dptr2) + p; | |
1917 dptr[4] = be_int2(diff); | |
1918 | |
1919 dptr++; | |
1920 dptr2++; | |
1921 } | |
1922 | |
1923 *uncomp_len = nwords*2; | |
1924 return (char *)data; | |
1925 } | |
1926 | |
1927 /* | |
1928 * --------------------------------------------------------------------------- | |
1929 * ZTR_FORM_LOG | |
1930 * --------------------------------------------------------------------------- | |
1931 */ | |
1932 | |
1933 /* | |
1934 * This is a LOSSY compression. It replaces N with 10 * log2(N). | |
1935 * (Just an idea, and not a great one it seems.) | |
1936 */ | |
1937 char *log2_data(char *x_uncomp, | |
1938 int uncomp_len, | |
1939 int *comp_len) { | |
1940 int i, u1, l1; | |
1941 char *comp = (char *)xmalloc(uncomp_len + 2); | |
1942 unsigned char *u_uncomp = (unsigned char *)x_uncomp; | |
1943 | |
1944 if (!comp) | |
1945 return NULL; | |
1946 | |
1947 comp+=2; | |
1948 for (i = 0; i < uncomp_len; i+=2) { | |
1949 u1 =(u_uncomp[i ] << 8) + | |
1950 (u_uncomp[i+1] << 0); | |
1951 l1 = (int)(10 * log(u1+1) / log(2)); | |
1952 comp[i ] = (l1 >> 8) & 0xff; | |
1953 comp[i+1] = (l1 >> 0) & 0xff; | |
1954 } | |
1955 | |
1956 comp-=2; | |
1957 comp[0] = ZTR_FORM_LOG2; | |
1958 comp[1] = 0; /* dummy - to align on 2-byte boundary */ | |
1959 | |
1960 *comp_len = uncomp_len+2; | |
1961 | |
1962 return comp; | |
1963 } | |
1964 | |
1965 char *unlog2_data(char *x_comp, | |
1966 int comp_len, | |
1967 int *uncomp_len) { | |
1968 int i, u1, l1; | |
1969 char *uncomp; | |
1970 unsigned char *u_comp = (unsigned char *)x_comp; | |
1971 | |
1972 uncomp = (char *)xmalloc(comp_len-2); | |
1973 if (!uncomp) | |
1974 return NULL; | |
1975 | |
1976 u_comp+=2; | |
1977 comp_len-=2; | |
1978 *uncomp_len = comp_len; | |
1979 | |
1980 for (i = 0; i < comp_len; i+=2) { | |
1981 l1 = ((u_comp[i ] << 8) | | |
1982 (u_comp[i+1] << 0)); | |
1983 u1 = (int)pow(2.0, l1/10.0)-1; | |
1984 uncomp[i ] = (u1 >> 8) & 0xff; | |
1985 uncomp[i+1] = (u1 >> 0) & 0xff; | |
1986 } | |
1987 | |
1988 return uncomp; | |
1989 } | |
1990 | |
1991 /* | |
1992 * --------------------------------------------------------------------------- | |
1993 * ZTR_FORM_STHUFF | |
1994 * --------------------------------------------------------------------------- | |
1995 */ | |
1996 | |
1997 /* | |
1998 * Implements compression using a set of static huffman codes stored using | |
1999 * the Deflate algorithm (and so in this respect it's similar to zlib). | |
2000 * | |
2001 * The huffman codes though can be previously stored in the ztr object | |
2002 * using ztr_add_hcode(). "cset" indicates which numbered stored huffman | |
2003 * code set is to be used, or passing zero will use inline codes (ie they | |
2004 * are stored in the data stream itself, just as in standard deflate). | |
2005 * | |
2006 * Arguments: | |
2007 * ztr ztr_t pointer; used to find stored code-sets | |
2008 * uncomp The uncompressed input data | |
2009 * uncomp_len Length of uncomp | |
2010 * cset Stored code-set number, zero for inline | |
2011 * recsz Record size - only used when cset == 0. | |
2012 * comp_len Output: length of compressed data | |
2013 * | |
2014 * Returns: | |
2015 * Compressed data stream if successful + comp_len | |
2016 * NULL on failure | |
2017 */ | |
2018 char *sthuff(ztr_t *ztr, char *uncomp, int uncomp_len, | |
2019 int cset, int recsz, int *comp_len) { | |
2020 block_t *blk = block_create(NULL, 2); | |
2021 unsigned char bytes[2]; | |
2022 huffman_codeset_t *c = NULL; | |
2023 unsigned char *comp = NULL; | |
2024 ztr_hcode_t *hc = NULL; | |
2025 | |
2026 if (cset >= CODE_USER) { | |
2027 if (NULL == (hc = ztr_find_hcode(ztr, cset))) | |
2028 return NULL; | |
2029 c = hc->codes; | |
2030 } else if (cset != CODE_INLINE) { | |
2031 c = generate_code_set(cset, 1, NULL, 0, 1, MAX_CODE_LEN, 0); | |
2032 } | |
2033 | |
2034 if (!c) { | |
2035 /* No cached ones found, so inline some instead */ | |
2036 cset = 0; | |
2037 c = generate_code_set(0, recsz, (unsigned char *)uncomp, uncomp_len, 1, | |
2038 MAX_CODE_LEN, 0); | |
2039 } | |
2040 | |
2041 bytes[0] = ZTR_FORM_STHUFF; | |
2042 bytes[1] = cset; | |
2043 store_bytes(blk, bytes, 2); | |
2044 | |
2045 if (hc) { | |
2046 if (!c->blk) { | |
2047 c->blk = block_create(NULL, 2); | |
2048 store_codes(c->blk, c, 1); | |
2049 } | |
2050 blk->bit = c->blk->bit; | |
2051 } else { | |
2052 store_codes(blk, c, 1); | |
2053 } | |
2054 | |
2055 /* | |
2056 {int i; | |
2057 for (i = 0; i < c->ncodes; i++) { | |
2058 output_code_set(stderr, c->codes[i]); | |
2059 }} | |
2060 */ | |
2061 | |
2062 | |
2063 /* | |
2064 * Unless CODE_INLINE, all we wanted to know is what bit number | |
2065 * to start on. The above is therefore somewhat inefficient. | |
2066 */ | |
2067 if (cset != 0) { | |
2068 blk->byte = 2; | |
2069 memset(&blk->data[2], 0, blk->alloc - 2); | |
2070 } | |
2071 | |
2072 if (0 == huffman_multi_encode(blk, c, cset, | |
2073 (unsigned char *)uncomp, uncomp_len)) { | |
2074 comp = blk->data; | |
2075 *comp_len = blk->byte + (blk->bit != 0); | |
2076 block_destroy(blk, 1); | |
2077 } | |
2078 | |
2079 if (cset == 0) | |
2080 huffman_codeset_destroy(c); | |
2081 | |
2082 return (char *)comp; | |
2083 } | |
2084 | |
2085 char *unsthuff(ztr_t *ztr, char *comp, int comp_len, int *uncomp_len) { | |
2086 int cset = (unsigned char)(comp[1]); | |
2087 huffman_codeset_t *cs = NULL, *cs_free = NULL; | |
2088 block_t *blk_in = block_create(NULL, comp_len), | |
2089 *blk_out = block_create(NULL, 1000); | |
2090 int bfinal = 1, bit_num = 0; | |
2091 char *uncomp; | |
2092 | |
2093 if (cset >= CODE_USER) { | |
2094 /* Scans through HUFF chunks */ | |
2095 ztr_hcode_t *hc = ztr_find_hcode(ztr, cset); | |
2096 if (!hc) | |
2097 return NULL; | |
2098 | |
2099 cs = hc->codes; | |
2100 bit_num = cs->bit_num; | |
2101 blk_in->bit = 0; | |
2102 } else if (cset > 0) { | |
2103 /* Create some temporary huffman_codes to stringify */ | |
2104 cs_free = cs = generate_code_set(cset, 1, NULL, 0, 1, MAX_CODE_LEN, 0); | |
2105 if (!cs) | |
2106 return NULL; | |
2107 | |
2108 bit_num = cs->bit_num; | |
2109 blk_in->bit = 0; | |
2110 } /* else inline codes */ | |
2111 | |
2112 /* | |
2113 * We need to know at what bit the huffman codes would have ended on | |
2114 * so we can store our huffman encoded symbols immediately following it. | |
2115 * For speed though this bit-number is cached. | |
2116 */ | |
2117 blk_in->data[blk_in->byte++] |= *(comp+2); | |
2118 store_bytes(blk_in, (unsigned char *)comp+3, comp_len-3); | |
2119 | |
2120 | |
2121 /* Rewind */ | |
2122 blk_in->byte = 0; | |
2123 blk_in->bit = bit_num; | |
2124 | |
2125 do { | |
2126 block_t *out; | |
2127 | |
2128 /* | |
2129 * We're either at the start of a block with codes to restore | |
2130 * (cset == INLINE or the 2nd onwards block) or we've already | |
2131 * got some codes in cs and we're at the position where huffman | |
2132 * encoded symbols are stored. | |
2133 */ | |
2134 if (!cs) | |
2135 if (NULL == (cs = cs_free = restore_codes(blk_in, &bfinal))) | |
2136 return NULL; | |
2137 | |
2138 /* | |
2139 {int i; | |
2140 for (i = 0; i < cs->ncodes; i++) { | |
2141 output_code_set(stderr, cs->codes[i]); | |
2142 }} | |
2143 */ | |
2144 | |
2145 if (NULL == (out = huffman_multi_decode(blk_in, cs))) { | |
2146 huffman_codeset_destroy(cs); | |
2147 return NULL; | |
2148 } | |
2149 | |
2150 /* Could optimise this for the common case of only 1 block */ | |
2151 store_bytes(blk_out, out->data, out->byte); | |
2152 block_destroy(out, 0); | |
2153 if (cs_free) | |
2154 huffman_codeset_destroy(cs_free); | |
2155 cs = cs_free = NULL; | |
2156 } while (!bfinal); | |
2157 | |
2158 *uncomp_len = blk_out->byte; | |
2159 uncomp = (char *)blk_out->data; | |
2160 | |
2161 block_destroy(blk_in, 0); | |
2162 block_destroy(blk_out, 1); | |
2163 | |
2164 return uncomp; | |
2165 } | |
2166 | |
2167 | |
2168 #ifndef NDEBUG | |
2169 #define SYM_EOF 256 | |
2170 static void output_code_set(FILE *fp, huffman_codes_t *cds) { | |
2171 int i, j; | |
2172 int nbits_in = 0, nbits_out = 0; | |
2173 huffman_code_t *codes = cds->codes; | |
2174 int ncodes = cds->ncodes; | |
2175 | |
2176 fprintf(fp, "static huffman_code_t codes_FIXME[] = {\n"); | |
2177 for (i = j = 0; i < ncodes; i++) { | |
2178 nbits_out += codes[i].nbits * codes[i].freq; | |
2179 nbits_in += 8*codes[i].freq; | |
2180 if (j == 0) | |
2181 fprintf(fp, " "); | |
2182 if (codes[i].symbol == SYM_EOF) { | |
2183 fprintf(fp, "{SYM_EOF,%3d}, ", codes[i].nbits); | |
2184 j = 10; | |
2185 } else { | |
2186 if (isalnum(codes[i].symbol)) { | |
2187 fprintf(fp, "{'%c',%3d}, ", codes[i].symbol, codes[i].nbits); | |
2188 } else { | |
2189 fprintf(fp, "{%3d,%3d}, ", codes[i].symbol, codes[i].nbits); | |
2190 } | |
2191 } | |
2192 j++; | |
2193 | |
2194 if (j >= 6) { | |
2195 fputc('\n', fp); | |
2196 j = 0; | |
2197 } | |
2198 } | |
2199 if (j) | |
2200 fputc('\n', fp); | |
2201 fprintf(fp, "};\n"); | |
2202 fprintf(fp, "/* Expected compression to %f of input */\n", | |
2203 (double)nbits_out/nbits_in); | |
2204 } | |
2205 #endif | |
2206 | |
2207 /* | |
2208 * Reorders quality data from its RAW format to an interleaved 4-byte | |
2209 * aligned format. | |
2210 * | |
2211 * Starting with sequence A1 C2 G3 the raw format is quality of called | |
2212 * bases followed by quality of remaining bases in triplets per base: | |
2213 * 0 (RAW format) | |
2214 * Q(A1) Q(C2) Q(G3) | |
2215 * Q(C1) Q(G1) Q(T1) | |
2216 * Q(A2) Q(G2) Q(T2) | |
2217 * Q(A3) Q(C3) Q(T3) | |
2218 * | |
2219 * We reorder it to: | |
2220 * ZTR_FORM_QSHIFT <any> <any> 0(raw) | |
2221 * Q(A1) Q(C1) Q(G1) Q(T1) | |
2222 * Q(C2) Q(A2) Q(G2) Q(T2) | |
2223 * Q(G3) Q(A3) Q(C3) Q(T3) | |
2224 * | |
2225 * Returns shifted data on success | |
2226 * NULL on failure | |
2227 */ | |
2228 char *qshift(char *qold, int qlen, int *new_len) { | |
2229 int i, j, k; | |
2230 char *qnew; | |
2231 int nbases; | |
2232 | |
2233 /* | |
2234 * Correct input is raw encoding + 4x nbases bytes | |
2235 */ | |
2236 if ((qlen-1)%4 != 0 || *qold != 0) | |
2237 return NULL; | |
2238 | |
2239 nbases = (qlen-1)/4; | |
2240 qnew = (char *)malloc((nbases+1)*4); | |
2241 qnew[0] = ZTR_FORM_QSHIFT; /* reorder code */ | |
2242 qnew[1] = -40; /* pad */ | |
2243 qnew[2] = -40; /* pad */ | |
2244 qnew[3] = qold[0]; | |
2245 | |
2246 for (i = 0, j = 4, k = nbases; i < nbases; i++, j+=4, k+=3) { | |
2247 qnew[j ] = qold[1+i ]; | |
2248 qnew[j+1] = qold[1+k ]; | |
2249 qnew[j+2] = qold[1+k+1]; | |
2250 qnew[j+3] = qold[1+k+2]; | |
2251 } | |
2252 | |
2253 *new_len = (nbases+1)*4; | |
2254 return qnew; | |
2255 } | |
2256 | |
2257 /* | |
2258 * The opposite transform from qshift() above. | |
2259 * | |
2260 * Returns unshifted data on success | |
2261 * NULL on failure. | |
2262 */ | |
2263 char *unqshift(char *qold, int qlen, int *new_len) { | |
2264 int i, j, k; | |
2265 char *qnew; | |
2266 int nbases; | |
2267 | |
2268 /* | |
2269 * Correct input is 4x (nbases+1) bytes | |
2270 */ | |
2271 if (qlen%4 != 0 || *qold != ZTR_FORM_QSHIFT) | |
2272 return NULL; | |
2273 | |
2274 nbases = qlen/4-1; | |
2275 qnew = (char *)malloc(nbases*4+1); | |
2276 qnew[0] = 0; /* raw byte */ | |
2277 | |
2278 for (i = 0, j = 4, k = nbases; i < nbases; i++, j+=4, k+=3) { | |
2279 qnew[1+i ] = qold[j ]; | |
2280 qnew[1+k ] = qold[j+1]; | |
2281 qnew[1+k+1] = qold[j+2]; | |
2282 qnew[1+k+2] = qold[j+3]; | |
2283 } | |
2284 | |
2285 *new_len = nbases*4+1; | |
2286 return qnew; | |
2287 } | |
2288 | |
2289 /* | |
2290 * Given a sequence ACTG this shifts trace data from the order: | |
2291 * | |
2292 * A1A2A3A4 C1C2C3C4 G1G2G3G4 T1T2T3T4 | |
2293 * | |
2294 * to | |
2295 * | |
2296 * A1C1G1T1 C2A2G2T2 T3A3C3G3 G4C4C4T4 | |
2297 * | |
2298 * Ie for each base it ouputs the signal for the called base first | |
2299 * followed by the remaining 3 signals in A,C,G,T order (minus the | |
2300 * called signal already output). | |
2301 */ | |
2302 | |
2303 /* | |
2304 * NCBI uses a rotation mechanism. Thus instead of converting acGt (G | |
2305 * called) to Gact they produce Gtac. | |
2306 * | |
2307 * Given 4 columns of data the two schemes produce value orderings as: | |
2308 * | |
2309 * Rotate: Acgt Shift: Acgt | |
2310 * Cgta Cagt | |
2311 * Gtac Gact | |
2312 * Tacg Tacg | |
2313 * | |
2314 * As a consequence any channel bias for a/c/g/t is better preserved | |
2315 * by shifting than it is by rotating as each column (except #1) are | |
2316 * only populated by two base types and 2 of those columns are on a | |
2317 * 3:1 ratio. | |
2318 */ | |
2319 /* #define ROTATE_INSTEAD */ | |
2320 char *tshift(ztr_t *ztr, char *told_c, int tlen, int *new_len) { | |
2321 int nc, i; | |
2322 ztr_chunk_t **base = ztr_find_chunks(ztr, ZTR_TYPE_BASE, &nc); | |
2323 char *bases; | |
2324 int nbases; | |
2325 unsigned short *tnew, *told; | |
2326 | |
2327 if (nc == 0) | |
2328 return NULL; | |
2329 | |
2330 if (*told_c != 0) | |
2331 return NULL; /* assume RAW format trace input */ | |
2332 | |
2333 /* Use last BASE chunk if multiple are present */ | |
2334 /* FIXME: ensure uncompressed first */ | |
2335 bases = base[nc-1]->data+1; | |
2336 nbases = base[nc-1]->dlength-1; | |
2337 | |
2338 if (nbases != (tlen-2)/8) { | |
2339 fprintf(stderr, "Mismatch in number of base calls to samples\n"); | |
2340 return NULL; | |
2341 } | |
2342 | |
2343 /* Allocate and initialise header */ | |
2344 told = ((unsigned short *)told_c) + 1; | |
2345 *new_len = (nbases*4+4) * sizeof(*tnew); | |
2346 tnew = (unsigned short *)malloc(*new_len); | |
2347 for (i = 0; i < 4; i++) { | |
2348 tnew[i] = 0; | |
2349 } | |
2350 ((char *)tnew)[0] = ZTR_FORM_TSHIFT; | |
2351 | |
2352 #ifdef ROTATE_INSTEAD | |
2353 /* Reorder */ | |
2354 for (i = 0; i < nbases; i++) { | |
2355 switch(bases[i]) { | |
2356 case 'T': | |
2357 tnew[4+i*4+0] = told[3*nbases+i]; /* TACG */ | |
2358 tnew[4+i*4+1] = told[0*nbases+i]; | |
2359 tnew[4+i*4+2] = told[1*nbases+i]; | |
2360 tnew[4+i*4+3] = told[2*nbases+i]; | |
2361 break; | |
2362 case 'G': | |
2363 tnew[4+i*4+0] = told[2*nbases+i]; /* GTAC */ | |
2364 tnew[4+i*4+1] = told[3*nbases+i]; | |
2365 tnew[4+i*4+2] = told[0*nbases+i]; | |
2366 tnew[4+i*4+3] = told[1*nbases+i]; | |
2367 break; | |
2368 case 'C': | |
2369 tnew[4+i*4+0] = told[1*nbases+i]; /* CGTA */ | |
2370 tnew[4+i*4+1] = told[2*nbases+i]; | |
2371 tnew[4+i*4+2] = told[3*nbases+i]; | |
2372 tnew[4+i*4+3] = told[0*nbases+i]; | |
2373 break; | |
2374 default: | |
2375 tnew[4+i*4+0] = told[0*nbases+i]; /* ACGT */ | |
2376 tnew[4+i*4+1] = told[1*nbases+i]; | |
2377 tnew[4+i*4+2] = told[2*nbases+i]; | |
2378 tnew[4+i*4+3] = told[3*nbases+i]; | |
2379 break; | |
2380 } | |
2381 } | |
2382 #else | |
2383 /* Reorder */ | |
2384 for (i = 0; i < nbases; i++) { | |
2385 switch(bases[i]) { | |
2386 case 'T': | |
2387 tnew[4+i*4+0] = told[3*nbases+i]; /* TACG */ | |
2388 tnew[4+i*4+1] = told[0*nbases+i]; | |
2389 tnew[4+i*4+2] = told[1*nbases+i]; | |
2390 tnew[4+i*4+3] = told[2*nbases+i]; | |
2391 break; | |
2392 case 'G': | |
2393 tnew[4+i*4+0] = told[2*nbases+i]; /* GACT */ | |
2394 tnew[4+i*4+1] = told[0*nbases+i]; | |
2395 tnew[4+i*4+2] = told[1*nbases+i]; | |
2396 tnew[4+i*4+3] = told[3*nbases+i]; | |
2397 break; | |
2398 case 'C': | |
2399 tnew[4+i*4+0] = told[1*nbases+i]; /* CAGT */ | |
2400 tnew[4+i*4+1] = told[0*nbases+i]; | |
2401 tnew[4+i*4+2] = told[2*nbases+i]; | |
2402 tnew[4+i*4+3] = told[3*nbases+i]; | |
2403 break; | |
2404 default: | |
2405 tnew[4+i*4+0] = told[0*nbases+i]; /* ACGT */ | |
2406 tnew[4+i*4+1] = told[1*nbases+i]; | |
2407 tnew[4+i*4+2] = told[2*nbases+i]; | |
2408 tnew[4+i*4+3] = told[3*nbases+i]; | |
2409 break; | |
2410 } | |
2411 } | |
2412 #endif | |
2413 | |
2414 xfree(base); | |
2415 | |
2416 return (char *)tnew; | |
2417 } | |
2418 | |
2419 char *untshift(ztr_t *ztr, char *told_c, int tlen, int *new_len) { | |
2420 unsigned short *tnew, *told = (unsigned short *)told_c; | |
2421 int nc, nbases, i; | |
2422 char *bases; | |
2423 ztr_chunk_t **base = ztr_find_chunks(ztr, ZTR_TYPE_BASE, &nc); | |
2424 | |
2425 if (nc == 0) | |
2426 return NULL; | |
2427 | |
2428 /* Use last BASE chunk if multiple are present */ | |
2429 uncompress_chunk(ztr, base[nc-1]); | |
2430 bases = base[nc-1]->data+1; | |
2431 nbases = base[nc-1]->dlength-1; | |
2432 | |
2433 *new_len = 2 + nbases*4 * sizeof(*tnew); | |
2434 tnew = (unsigned short *)malloc(*new_len); | |
2435 tnew[0] = 0; | |
2436 told += 4; | |
2437 | |
2438 #ifdef ROTATE_INSTEAD | |
2439 /* Reorder */ | |
2440 for (i = 0; i < nbases; i++) { | |
2441 switch(bases[i]) { | |
2442 case 'T': | |
2443 tnew[1+3*nbases+i] = *told++; /* TACG */ | |
2444 tnew[1+0*nbases+i] = *told++; | |
2445 tnew[1+1*nbases+i] = *told++; | |
2446 tnew[1+2*nbases+i] = *told++; | |
2447 break; | |
2448 case 'G': | |
2449 tnew[1+2*nbases+i] = *told++; /* GTAC */ | |
2450 tnew[1+3*nbases+i] = *told++; | |
2451 tnew[1+0*nbases+i] = *told++; | |
2452 tnew[1+1*nbases+i] = *told++; | |
2453 break; | |
2454 case 'C': | |
2455 tnew[1+1*nbases+i] = *told++; /* CGTA */ | |
2456 tnew[1+2*nbases+i] = *told++; | |
2457 tnew[1+3*nbases+i] = *told++; | |
2458 tnew[1+0*nbases+i] = *told++; | |
2459 break; | |
2460 default: | |
2461 tnew[1+0*nbases+i] = *told++; /* ACGT */ | |
2462 tnew[1+1*nbases+i] = *told++; | |
2463 tnew[1+2*nbases+i] = *told++; | |
2464 tnew[1+3*nbases+i] = *told++; | |
2465 break; | |
2466 } | |
2467 } | |
2468 #else | |
2469 /* Reorder */ | |
2470 for (i = 0; i < nbases; i++) { | |
2471 switch(bases[i]) { | |
2472 case 'T': | |
2473 tnew[1+3*nbases+i] = *told++; /* TACG */ | |
2474 tnew[1+0*nbases+i] = *told++; | |
2475 tnew[1+1*nbases+i] = *told++; | |
2476 tnew[1+2*nbases+i] = *told++; | |
2477 break; | |
2478 case 'G': | |
2479 tnew[1+2*nbases+i] = *told++; /* GACT */ | |
2480 tnew[1+0*nbases+i] = *told++; | |
2481 tnew[1+1*nbases+i] = *told++; | |
2482 tnew[1+3*nbases+i] = *told++; | |
2483 break; | |
2484 case 'C': | |
2485 tnew[1+1*nbases+i] = *told++; /* CAGT */ | |
2486 tnew[1+0*nbases+i] = *told++; | |
2487 tnew[1+2*nbases+i] = *told++; | |
2488 tnew[1+3*nbases+i] = *told++; | |
2489 break; | |
2490 default: | |
2491 tnew[1+0*nbases+i] = *told++; /* ACGT */ | |
2492 tnew[1+1*nbases+i] = *told++; | |
2493 tnew[1+2*nbases+i] = *told++; | |
2494 tnew[1+3*nbases+i] = *told++; | |
2495 break; | |
2496 } | |
2497 } | |
2498 #endif | |
2499 | |
2500 xfree(base); | |
2501 | |
2502 return (char *)tnew; | |
2503 } |