Mercurial > repos > dawe > srf2fastq
diff srf2fastq/io_lib-1.12.2/io_lib/compression.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/srf2fastq/io_lib-1.12.2/io_lib/compression.c Tue Jun 07 17:48:05 2011 -0400 @@ -0,0 +1,2503 @@ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <zlib.h> +#include <assert.h> +#include <math.h> +#include <ctype.h> + +#ifndef M_PI +# define M_PI 3.14159265358979323846 +#endif + +#include "io_lib/ztr.h" +#include "io_lib/os.h" +#include "io_lib/compression.h" +#include "io_lib/xalloc.h" + +#ifndef NDEBUG +# define NDEBUG +#endif + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_ZLIB + * --------------------------------------------------------------------------- + */ + +/* + * Some comments on zlib usage. + * + * - Ideally for trace data, after decorrelation, we should use Z_FILTERED. + * Empirical studies show that this gives the best compression ratio, but + * it is slow (about the same speed as normal gzip). MUCH faster is huffman + * only, and it doesn't give radically different compression ratios. + * + * - When compressing using Z_HUFFMAN_ONLY we used compression level + * '1' as this invokes the deflate_fast() algorithm. It makes no + * difference to the compression level, but it seems to be quicker still. + * + */ + +/* + * zlib_huff() + * + * Compresses data using huffman encoding, as implemented by zlib. + * + * Arguments: + * uncomp Uncompressed input data + * uncomp_len Length of uncomp data + * comp_len Output: length of compressed data + * + * Returns: + * Compressed data if successful + * NULL if not successful + */ +char *zlib_huff(char *uncomp, int uncomp_len, int strategy, int *comp_len) { + z_stream zstr; + int err; + int comp_len_tmp = (int)(uncomp_len * 1.001 + 12); /* Maximum expansion size */ + char *comp = (char *)xmalloc(comp_len_tmp+5); + int c_len; + + /* Initialise zlib */ + zstr.zalloc = (alloc_func)0; + zstr.zfree = (free_func)0; + zstr.opaque = (voidpf)0; + + if ((err = deflateInit2(&zstr, 1, Z_DEFLATED, 15, 8, strategy)) != Z_OK) { + fprintf(stderr, "zlib errror in deflateInit2(): %d\n", err); + return NULL; + } + + /* Set up input and output buffers */ + zstr.next_in = (unsigned char *)uncomp; + zstr.avail_in = uncomp_len; + zstr.next_out = (unsigned char *)comp+5; + zstr.avail_out = comp_len_tmp; + + /* Do the compression */ + if ((err = deflate(&zstr, Z_FINISH)) != Z_STREAM_END) { + fprintf(stderr, "zlib errror in deflate(): %d\n", err); + return NULL; + } + + /* Tidy up */ + deflateEnd(&zstr); + + c_len = zstr.total_out; + + /* Return */ + comp[0] = ZTR_FORM_ZLIB; + comp[1] = (uncomp_len >> 0) & 0xff; + comp[2] = (uncomp_len >> 8) & 0xff; + comp[3] = (uncomp_len >> 16) & 0xff; + comp[4] = (uncomp_len >> 24) & 0xff; + + if (comp_len) + *comp_len = c_len+5; + + return comp; +} + +/* + * zlib_dehuff() + * + * Uncompresses data using huffman encoding, as implemented by zlib. + * + * Arguments: + * comp Compressed input data + * comp_len Length of comp data + * uncomp_len Output: length of uncompressed data + * + * Returns: + * Uncompressed data if successful + * NULL if not successful + */ +char *zlib_dehuff(char *comp, int comp_len, int *uncomp_len) { + z_stream zstr; + int err; + char *uncomp; + int ulen; + + /* Allocate */ + ulen = + ((unsigned char)comp[1] << 0) + + ((unsigned char)comp[2] << 8) + + ((unsigned char)comp[3] << 16) + + ((unsigned char)comp[4] << 24); + uncomp = (char *)xmalloc(ulen); + + /* Initialise zlib */ + zstr.zalloc = (alloc_func)0; + zstr.zfree = (free_func)0; + zstr.opaque = (voidpf)0; + + if ((err = inflateInit(&zstr)) != Z_OK) { + fprintf(stderr, "zlib errror in inflateInit(): %d\n", err); + return NULL; + } + + /* Set up input and output buffers */ + zstr.next_in = (unsigned char *)comp+5; + zstr.avail_in = comp_len-5; + zstr.next_out = (unsigned char *)uncomp; + zstr.avail_out = ulen; + + /* Do the decompression */ + if ((err = inflate(&zstr, Z_FINISH)) != Z_STREAM_END) { + fprintf(stderr, "zlib errror in deflate(): %d\n", err); + return NULL; + } + + /* Tidy up */ + inflateEnd(&zstr); + + if (uncomp_len) + *uncomp_len = ulen; + + return uncomp; +} + + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_RLE + * --------------------------------------------------------------------------- + */ + +/* + * Run length encoding. + * + * Any run of 3 or more identical characters (up to 255 in a row) are replaced + * by a 'guard' byte followed by the number of characters followed by + * the character value itself. + * Any single guard value in the input is escaped using 'guard 0'. + * + * Specifying guard as -1 will automatically pick one of the least used + * characters in the input as the guard. + * + * Arguments: + * uncomp Input data + * uncomp_len Length of input data 'uncomp' + * guard Guard byte - used to encode "N" copies of data + * comp_len Output: length of compressed data + * + * Returns: + * Compressed data if successful + * NULL if not successful + */ +char *rle(char *uncomp, int uncomp_len, int guard, int *comp_len) { + int i, k, c_len = 0; + char *comp = xmalloc(2 * uncomp_len + 6); + char *out = comp + 6; + + /* A guard of -1 implies to search for the least frequent symbol */ + if (guard == -1) { + int cnt[256]; + int bestcnt = uncomp_len + 1; + + for (i = 0; i < 256; i++) + cnt[i] = 0; + + for (i = 0; i < uncomp_len; i++) { + cnt[(unsigned char)uncomp[i]]++; + } + + for (i = 0; i < 256; i++) { + if (cnt[i] < bestcnt) { + bestcnt = cnt[i]; + guard = i; + } + } + } + + for (i = 0; i < uncomp_len; i=k) { + /* + * Detect blocks of up identical bytes up to 255 bytes long. + */ + for (k = i; k < uncomp_len && uncomp[i] == uncomp[k]; k++) + if (k-i == 255) + break; + + /* 1, 2 or 3 bytes are best stored "as is" */ + if (k-i < 4) { + do { + /* + * If we find 'guard' in our sequence, escape it by + * outputting 'guard' <zero>. (We know that we'll never + * write out zero copies of a token in our rle compression + * algorithm.) + */ + if ((unsigned char)(uncomp[i]) == guard) { + out[c_len++] = guard; + out[c_len++] = 0; + } else { + out[c_len++] = uncomp[i]; + } + i++; + } while (k >= i+1); + } else { + /* More than 3 bytes: store as ('guard', length, byte value) */ + out[c_len++] = guard; + out[c_len++] = k-i; + out[c_len++] = uncomp[i]; + } + } + + /* Return */ + comp[0] = ZTR_FORM_RLE; + comp[1] = (uncomp_len >> 0) & 0xff; + comp[2] = (uncomp_len >> 8) & 0xff; + comp[3] = (uncomp_len >> 16) & 0xff; + comp[4] = (uncomp_len >> 24) & 0xff; + comp[5] = guard; + + if (comp_len) + *comp_len = c_len+6; + + return comp; +} + +/* + * Reverses run length encoding. + * + * Arguments: + * comp Compressed input data + * comp_len Length of comp data + * uncomp_len Output: length of uncompressed data + * + * Returns: + * Uncompressed data if successful + * NULL if not successful + */ +/* ARGSUSED */ +char *unrle(char *comp, int comp_len, int *uncomp_len) { + int in_i, out_i, i, val, count, out_len; + char *uncomp; + unsigned char *in = (unsigned char *)comp+6; + char *out; + int guard = (unsigned char)comp[5]; + + /* Allocate */ + out_len = + ((unsigned char)comp[1] << 0) + + ((unsigned char)comp[2] << 8) + + ((unsigned char)comp[3] << 16) + + ((unsigned char)comp[4] << 24); + out = uncomp = (char *)xmalloc(out_len); + + for (in_i = out_i = 0; out_i < out_len; in_i++) { + if (in[in_i] != guard) { + /* When not 'guard' it's easy - just output this token */ + assert(out_i >= 0 && out_i < out_len); + out[out_i++] = in[in_i]; + + } else { + /* + * Found an 'guard' token. If next token is zero, then + * we were simply escaping a real 'guard' token in the input + * data, otherwise output a string of bytes. + */ + count = in[++in_i]; + if (count != 0) { + val = in[++in_i]; + for (i = 0; i < count; i++) { + assert(out_i >= 0 && out_i < out_len); + out[out_i++] = val; + } + } else { + assert(out_i >= 0 && out_i < out_len); + out[out_i++] = guard; + } + } + } + + if (uncomp_len) + *uncomp_len = out_len; + + return uncomp; +} + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_XRLE + * --------------------------------------------------------------------------- + */ + +/* + * Mutli-byte run length encoding. + * + * Any run of 3 or more identical characters (up to 255 in a row) are replaced + * by a 'guard' byte followed by the number of characters followed by + * the character value itself. + * Any single guard value in the input is escaped using 'guard 0'. + * + * Specifying guard as -1 will automatically pick one of the least used + * characters in the input as the guard. + * + * Arguments: + * uncomp Input data + * uncomp_len Length of input data 'uncomp' + * guard Guard byte - used to encode "N" copies of data + * rsz Size of blocks to compare for run checking. + * comp_len Output: length of compressed data + * + * Returns: + * Compressed data if successful + * NULL if not successful + */ +char *xrle(char *uncomp, int uncomp_len, int guard, int rsz, int *comp_len) { + char *comp = (char *)malloc(2 * uncomp_len + 3); + char *out = comp; + int i, j, k; + + /* A guard of -1 implies to search for the least frequent symbol */ + if (guard == -1) { + int cnt[256]; + int bestcnt = uncomp_len + 1; + + for (i = 0; i < 256; i++) + cnt[i] = 0; + + for (i = 0; i < uncomp_len; i++) { + cnt[(unsigned char)uncomp[i]]++; + } + + for (i = 0; i < 256; i++) { + if (cnt[i] < bestcnt) { + bestcnt = cnt[i]; + guard = i; + } + } + } + + *out++ = ZTR_FORM_XRLE; + *out++ = rsz; + *out++ = guard; + + for (i = 0; i < uncomp_len; i = k) { + /* Count repeats */ + k = i + rsz; + while (k <= uncomp_len - rsz && !memcmp(&uncomp[i], &uncomp[k], rsz)) { + k += rsz; + if ((k-i)/rsz == 255) + break; + } + + if (k-i > rsz) { + /* Duplicates, so RLE */ + *out++ = guard; + *out++ = (k-i)/rsz; + for (j = 0; j < rsz; j++) + *out++ = uncomp[i+j]; + } else { + /* No dups, store as is escaping guarding as appropriate */ + if ((unsigned char)(uncomp[i]) == guard) { + *out++ = guard; + *out++ = 0; + } else { + *out++ = uncomp[i]; + } + k = i+1; + } + } + + *comp_len = out-comp; + + return comp; +} + + +/* + * Reverses multi-byte run length encoding. + * + * Arguments: + * comp Compressed input data + * comp_len Length of comp data + * uncomp_len Output: length of uncompressed data + * + * Returns: + * Uncompressed data if successful + * NULL if not successful + */ +char *unxrle(char *comp, int comp_len, int *uncomp_len) { + char *uncomp; + char *out; + int rsz = (unsigned char)comp[1]; + int guard = (unsigned char)comp[2]; + unsigned char *in; + int unclen = 0, cpos, len; + + /* Calculate uncompressed length */ + for (in = (unsigned char *)comp, cpos = 3; cpos < comp_len; unclen++) { + if (in[cpos++] == guard) { + if ((len = in[cpos++])) { + cpos += rsz; + unclen += len*rsz -1; + } + } + } + *uncomp_len = unclen; + + /* Expand */ + uncomp = out = (char *)malloc(unclen+1); + for (in = (unsigned char *)comp, cpos = 3; cpos < comp_len;) { + char c; + if ((c = in[cpos++]) != guard) { + *out++ = c; + } else { + int len = in[cpos++]; + if (len) { + int i, j; + for (i = 0; i < len; i++) { + for (j = 0; j < rsz; j++) { + *out++ = in[cpos+j]; + } + } + cpos += rsz; + } else { + *out++ = guard; + } + } + } + *out++ = 0; + + return uncomp; +} + +/* + * Mutli-byte run length encoding. + * + * Steps along in words of size 'rsz'. Unlike XRLE above this does run-length + * encoding by writing out an additional "length" word every time 2 or more + * words in a row are spotted. This removes the need for a guard byte. + * + * Additionally this method ensures that both input and output formats remain + * aligned on words of size 'rsz'. + * + * Arguments: + * uncomp Input data + * uncomp_len Length of input data 'uncomp' + * rsz Size of blocks to compare for run checking. + * comp_len Output: length of compressed data + * + * Returns: + * Compressed data if successful + * NULL if not successful + */ +char *xrle2(char *uncomp, int uncomp_len, int rsz, int *comp_len) { + char *comp = (char *)malloc(1.4*uncomp_len + rsz); + char *last = uncomp; + char *out = comp; + int i, j, k, run_len = 0; + + *out++ = ZTR_FORM_XRLE2; + *out++ = rsz; + for (i = 2; i < rsz; i++) + *out++ = -40; + + /* FIXME: how to deal with uncomp_len not being a multiple of rsz */ + for (i = 0; i < uncomp_len; i += rsz) { + /* FIXME: use inline #def versions of memcmp/memcpy for speed? */ + memcpy(out, &uncomp[i], rsz); + out += rsz; + + if (memcmp(last, &uncomp[i], rsz) == 0) { + run_len++; + } else { + run_len = 1; + last = &uncomp[i]; + } + + /* NB: >= 3 is more optimal in many cases */ + if (run_len >= 2) { + /* Count remaining copies */ + for (k = i+rsz; k < uncomp_len && run_len < 257; k += rsz) { + if (memcmp(last, &uncomp[k], rsz) != 0) + break; + run_len++; + } + run_len -= 2; + + *out++ = run_len; + for (j = 1; j < rsz; j++) { + /* Padding with last reduces entropy compared to padding + * with copies of run_len + */ + *out++ = last[j]; + } + i = k-rsz; + + last = out-rsz; + run_len = 0; + } + } + + *comp_len = out-comp; + + return comp; +} + +/* + * Reverses multi-byte run length encoding (xrle_new). + * + * Arguments: + * comp Compressed input data + * comp_len Length of comp data + * uncomp_len Output: length of uncompressed data + * + * Returns: + * Uncompressed data if successful + * NULL if not successful + */ +char *unxrle2(char *comp, int comp_len, int *uncomp_len) { + char *out, *last; + int out_len, out_alloc, rsz, i, j, run_len; + + out_alloc = comp_len*2; /* just an estimate */ + out_len = 0; + if (NULL == (out = (char *)malloc(out_alloc))) + return NULL; + + if (*comp++ != ZTR_FORM_XRLE2) + return NULL; + + /* Read rsz and swallow padding */ + rsz = *comp++; + comp_len -= 2; + for (i = 2; i < rsz; i++) { + comp++; + comp_len--; + } + + /* Uncompress */ + run_len = 0; + last = comp; + for (i = 0; i < comp_len;) { + while (out_len + rsz > out_alloc) { + out_alloc *= 2; + if (NULL == (out = (char *)realloc(out, out_alloc))) + return NULL; + } + memcpy(&out[out_len], &comp[i], rsz); + + if (memcmp(&out[out_len], last, rsz) == 0) { + run_len++; + } else { + run_len = 1; + } + + i += rsz; + out_len += rsz; + + /* NB: >= 3 is more optimal in many cases */ + if (run_len >= 2) { + /* Count remaining copies */ + run_len = (unsigned char)comp[i]; + i += rsz; + + while (out_len + run_len * rsz > out_alloc) { + out_alloc *= 2; + if (NULL == (out = (char *)realloc(out, out_alloc))) + return NULL; + } + + for (j = 0; j < run_len; j++) { + memcpy(&out[out_len], last, rsz); + out_len += rsz; + } + run_len = 0; + } + + last = &comp[i-rsz]; + } + + /* Shrink back down to avoid excessive memory usage */ + out = realloc(out, out_len); + *uncomp_len = out_len; + + return out; +} + + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_DELTA1 + * --------------------------------------------------------------------------- + */ + +/* + * This replaces 'samples' with successive differences between samples. + * These implementations support 'level's of 1, 2 and 3. + * + * NB: This is analogous to our SCF delta_samples1 (etc) function, except that + * this function about 40% faster. + * + * Implementation details taken from Jean Thierry-Mieg's CTF code. + */ + +/* + * decorrelate1() + * + * Produce successive deltas from a 1-byte array. + * + * Arguments: + * uncomp Uncompressed data + * uncomp_len Length of uncompressed data + * level Differencing level (must be 1, 2 or 3) + * comp_len Return: where to store new compressed length + * + * Returns: + * Success: A decorrelated buffer (malloced) + * Failure: NULL + */ +char *decorrelate1(char *x_uncomp, + int uncomp_len, + int level, + int *comp_len) { + int i, z; + int u1 = 0, u2 = 0, u3 = 0; + char *comp = (char *)xmalloc(uncomp_len + 2); + unsigned char *u_uncomp = (unsigned char *)x_uncomp; + + if (!comp) + return NULL; + + comp+=2; + switch (level) { + case 1: + for (i = 0; i < uncomp_len; i++) { + z = u1; + u1 = u_uncomp[i]; + comp[i] = u_uncomp[i] - z; + } + break; + + case 2: + for (i = 0; i < uncomp_len; i++) { + z = 2*u1 - u2; + u2 = u1; + u1 = u_uncomp[i]; + comp[i] = u_uncomp[i] - z; + } + break; + + case 3: + for (i = 0; i < uncomp_len; i++) { + z = 3*u1 - 3*u2 + u3; + u3 = u2; + u2 = u1; + u1 = u_uncomp[i]; + comp[i] = u_uncomp[i] - z; + } + break; + + default: + return NULL; + } + comp-=2; + comp[0] = ZTR_FORM_DELTA1; + comp[1] = level; + + *comp_len = uncomp_len+2; + + return comp; +} + +#define ABS(a) ((a) > 0 ? (a) : -(a)) + +/* ZTR_FORM_DDELTA1 - experimental */ +char *decorrelate1dyn(char *x_uncomp, + int uncomp_len, + int *comp_len) { + int i, j, z[4]; + int u1 = 0, u2 = 0, u3 = 0; + char *comp = (char *)xmalloc(uncomp_len + 2); + unsigned char *u_uncomp = (unsigned char *)x_uncomp; + int level = 3; /* default level */ + int last_level = level; + int best; + + if (!comp) + return NULL; + + comp+=2; + for (i = 0; i < uncomp_len; i++) { + z[1] = u1; + z[2] = 2*u1 - u2; + z[3] = 3*u1 - 3*u2 + u3; + comp[i] = u_uncomp[i] - z[last_level]; + best = 10000; + for (j = 1; j < 3; j++) { + if (ABS(u_uncomp[i] - z[j]) < best) { + best = ABS(u_uncomp[i] - z[j]); + last_level = j; + } + } + u3 = u2; + u2 = u1; + u1 = u_uncomp[i]; + } + comp-=2; + comp[0] = ZTR_FORM_DDELTA1; + comp[1] = level; + + *comp_len = uncomp_len+2; + + return comp; +} + +/* + * recorrelate1() + * + * The reverse of decorrelate1() + * + * Arguments: + * comp Compressed input data + * comp_len Length of comp data + * uncomp_len Output: length of uncompressed data + * + * Returns: + * Success: uncompressed data + * Failure: NULL + */ +char *recorrelate1(char *x_comp, + int comp_len, + int *uncomp_len) { + int i, z; + int u1 = 0, u2 = 0, u3 = 0; + int level = x_comp[1]; + char *uncomp; + + uncomp = (char *)xmalloc(comp_len-2); + if (!uncomp) + return NULL; + + x_comp+=2; + comp_len-=2; + *uncomp_len = comp_len; + + switch (level) { + case 1: + for (i = 0; i < comp_len; i++) { + z = u1; + u1 = uncomp[i] = x_comp[i] + z; + } + break; + + case 2: + for (i = 0; i < comp_len; i++) { + z = 2*u1 - u2; + u2 = u1; + u1 = uncomp[i] = x_comp[i] + z; + } + break; + + case 3: + for (i = 0; i < comp_len; i++) { + z = 3*u1 - 3*u2 + u3; + u3 = u2; + u2 = u1; + u1 = uncomp[i] = x_comp[i] + z; + } + break; + } + + return uncomp; +} + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_DELTA2 + * --------------------------------------------------------------------------- + */ + +/* + * decorrelate2() + * + * Produce successive deltas from a 2-byte array (big endian) + * + * Arguments: + * uncomp Uncompressed data + * uncomp_len Length of uncompressed data + * level Differencing level (must be 1, 2 or 3) + * comp_len Return: where to store new compressed length + * + * Returns: + * Success: A decorrelated buffer (malloced) + * Failure: NULL + */ +char *decorrelate2(char *x_uncomp, + int uncomp_len, + int level, + int *comp_len) { + int i, z, delta; + int u1 = 0, u2 = 0, u3 = 0; + char *comp = (char *)xmalloc(uncomp_len + 2); + unsigned char *u_uncomp = (unsigned char *)x_uncomp; + + if (!comp) + return NULL; + + comp+=2; + switch (level) { + case 1: + for (i = 0; i < uncomp_len; i+=2) { + z = u1; + u1 = (u_uncomp[i] << 8) + u_uncomp[i+1]; + delta = u1 - z; + comp[i ] = (delta >> 8) & 0xff; + comp[i+1] = (delta >> 0) & 0xff; + } + break; + + case 2: + for (i = 0; i < uncomp_len; i+=2) { + z = 2*u1 - u2; + u2 = u1; + u1 = (u_uncomp[i] << 8) + u_uncomp[i+1]; + delta = u1 - z; + comp[i ] = (delta >> 8) & 0xff; + comp[i+1] = (delta >> 0) & 0xff; + } + break; + + case 3: + for (i = 0; i < uncomp_len; i+=2) { + z = 3*u1 - 3*u2 + u3; + u3 = u2; + u2 = u1; + u1 = (u_uncomp[i] << 8) + u_uncomp[i+1]; + delta = u1 - z; + comp[i ] = (delta >> 8) & 0xff; + comp[i+1] = (delta >> 0) & 0xff; + } + break; + + default: + return NULL; + } + comp-=2; + comp[0] = ZTR_FORM_DELTA2; + comp[1] = level; + + *comp_len = uncomp_len+2; + + return comp; +} + +char *decorrelate2dyn(char *x_uncomp, + int uncomp_len, + int *comp_len) { + int i, j, z[4]; + int u1 = 0, u2 = 0, u3 = 0; + char *comp = (char *)xmalloc(uncomp_len + 2); + unsigned char *u_uncomp = (unsigned char *)x_uncomp; + int level = 2; /* minimum level */ + int last_level = level; + int best; + + if (!comp) + return NULL; + + comp+=2; + for (i = 0; i < uncomp_len; i+=2) { + z[0] = 0; + z[1] = u1; + z[2] = 2*u1 - u2; + z[3] = 3*u1 - 3*u2 + u3; + u3 = u2; + u2 = u1; + u1 = (u_uncomp[i]<<8) + u_uncomp[i+1]; + comp[i ] = ((u1 - z[last_level]) >> 8) & 0xff; + comp[i+1] = ((u1 - z[last_level]) >> 0) & 0xff; + best = 10000; + for (j = level; j < 4; j++) { + if (ABS(u1 - z[j]) < best) { + best = ABS(u1 - z[j]); + last_level = j; + } + } + } + comp-=2; + comp[0] = ZTR_FORM_DDELTA2; + comp[1] = level; + + *comp_len = uncomp_len+2; + + return comp; +} + +/* + * recorrelate2() + * + * The reverse of decorrelate2() + * + * Arguments: + * comp Compressed input data + * comp_len Length of comp data + * uncomp_len Output: length of uncompressed data + * + * Returns: + * Success: uncompressed data + * Failure: NULL + */ +char *recorrelate2(char *x_comp, + int comp_len, + int *uncomp_len) { + int i, z; + int u1 = 0, u2 = 0, u3 = 0; + int level = x_comp[1]; + char *uncomp; + unsigned char *u_comp = (unsigned char *)x_comp; + + uncomp = (char *)xmalloc(comp_len-2); + if (!uncomp) + return NULL; + + u_comp+=2; + comp_len-=2; + *uncomp_len = comp_len; + + switch (level) { + case 1: + for (i = 0; i < comp_len; i+=2) { + z = u1; + u1 = ((u_comp[i] << 8) | u_comp[i+1]) + z; + uncomp[i ] = (u1 >> 8) & 0xff; + uncomp[i+1] = (u1 >> 0) & 0xff; + } + break; + + case 2: + for (i = 0; i < comp_len; i+=2) { + z = 2*u1 - u2; + u2 = u1; + u1 = ((u_comp[i] << 8) | u_comp[i+1]) + z; + uncomp[i ] = (u1 >> 8) & 0xff; + uncomp[i+1] = (u1 >> 0) & 0xff; + } + break; + + case 3: + for (i = 0; i < comp_len; i+=2) { + z = 3*u1 - 3*u2 + u3; + u3 = u2; + u2 = u1; + u1 = ((u_comp[i] << 8) | u_comp[i+1]) + z; + uncomp[i ] = (u1 >> 8) & 0xff; + uncomp[i+1] = (u1 >> 0) & 0xff; + } + break; + } + + return uncomp; +} + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_DELTA4 + * --------------------------------------------------------------------------- + */ + +/* + * decorrelate4() + * + * Produce successive deltas from a 4-byte array (big endian) + * + * Arguments: + * uncomp Uncompressed data + * uncomp_len Length of uncompressed data + * level Differencing level (must be 1, 2 or 3) + * comp_len Return: where to store new compressed length + * + * Returns: + * Success: A decorrelated buffer (malloced) + * Failure: NULL + */ +char *decorrelate4(char *x_uncomp, + int uncomp_len, + int level, + int *comp_len) { + int i, z, delta; + int u1 = 0, u2 = 0, u3 = 0; + char *comp = (char *)xmalloc(uncomp_len + 4); + unsigned char *u_uncomp = (unsigned char *)x_uncomp; + + if (!comp) + return NULL; + + comp+=4; + switch (level) { + case 1: + for (i = 0; i < uncomp_len; i+=4) { + z = u1; + u1 =(u_uncomp[i ] << 24) + + (u_uncomp[i+1] << 16) + + (u_uncomp[i+2] << 8) + + (u_uncomp[i+3] << 0); + delta = u1 - z; + comp[i ] = (delta >> 24) & 0xff; + comp[i+1] = (delta >> 16) & 0xff; + comp[i+2] = (delta >> 8) & 0xff; + comp[i+3] = (delta >> 0) & 0xff; + } + break; + + case 2: + for (i = 0; i < uncomp_len; i+=4) { + z = 2*u1 - u2; + u2 = u1; + u1 =(u_uncomp[i ] << 24) + + (u_uncomp[i+1] << 16) + + (u_uncomp[i+2] << 8) + + (u_uncomp[i+3] << 0); + delta = u1 - z; + comp[i ] = (delta >> 24) & 0xff; + comp[i+1] = (delta >> 16) & 0xff; + comp[i+2] = (delta >> 8) & 0xff; + comp[i+3] = (delta >> 0) & 0xff; + } + break; + + case 3: + for (i = 0; i < uncomp_len; i+=4) { + z = 3*u1 - 3*u2 + u3; + u3 = u2; + u2 = u1; + u1 =(u_uncomp[i ] << 24) + + (u_uncomp[i+1] << 16) + + (u_uncomp[i+2] << 8) + + (u_uncomp[i+3] << 0); + delta = u1 - z; + comp[i ] = (delta >> 24) & 0xff; + comp[i+1] = (delta >> 16) & 0xff; + comp[i+2] = (delta >> 8) & 0xff; + comp[i+3] = (delta >> 0) & 0xff; + } + break; + + default: + return NULL; + } + comp-=4; + comp[0] = ZTR_FORM_DELTA4; + comp[1] = level; + comp[2] = 0; /* dummy - to align on 4-byte boundary */ + comp[3] = 0; /* dummy - to align on 4-byte boundary */ + + *comp_len = uncomp_len+4; + + return comp; +} + +/* + * recorrelate4() + * + * The reverse of decorrelate4() + * + * Arguments: + * comp Compressed input data + * comp_len Length of comp data + * uncomp_len Output: length of uncompressed data + * + * Returns: + * Success: uncompressed data + * Failure: NULL + */ +char *recorrelate4(char *x_comp, + int comp_len, + int *uncomp_len) { + int i, z; + int u1 = 0, u2 = 0, u3 = 0; + int level = x_comp[1]; + char *uncomp; + unsigned char *u_comp = (unsigned char *)x_comp; + + uncomp = (char *)xmalloc(comp_len-4); + if (!uncomp) + return NULL; + + u_comp+=4; + comp_len-=4; + *uncomp_len = comp_len; + + switch (level) { + case 1: + for (i = 0; i < comp_len; i+=4) { + z = u1; + u1 = z + + ((u_comp[i ] << 24) | + (u_comp[i+1] << 16) | + (u_comp[i+2] << 8) | + u_comp[i+3]); + uncomp[i ] = (u1 >> 24) & 0xff; + uncomp[i+1] = (u1 >> 16) & 0xff; + uncomp[i+2] = (u1 >> 8) & 0xff; + uncomp[i+3] = (u1 >> 0) & 0xff; + } + break; + + case 2: + for (i = 0; i < comp_len; i+=4) { + z = 2*u1 - u2; + u2 = u1; + u1 = z + + ((u_comp[i ] << 24) | + (u_comp[i+1] << 16) | + (u_comp[i+2] << 8) | + u_comp[i+3]); + uncomp[i ] = (u1 >> 24) & 0xff; + uncomp[i+1] = (u1 >> 16) & 0xff; + uncomp[i+2] = (u1 >> 8) & 0xff; + uncomp[i+3] = (u1 >> 0) & 0xff; + } + break; + + case 3: + for (i = 0; i < comp_len; i+=4) { + z = 3*u1 - 3*u2 + u3; + u3 = u2; + u2 = u1; + u1 = z + + ((u_comp[i ] << 24) | + (u_comp[i+1] << 16) | + (u_comp[i+2] << 8) | + u_comp[i+3]); + uncomp[i ] = (u1 >> 24) & 0xff; + uncomp[i+1] = (u1 >> 16) & 0xff; + uncomp[i+2] = (u1 >> 8) & 0xff; + uncomp[i+3] = (u1 >> 0) & 0xff; + } + break; + } + + return uncomp; +} + + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_16TO8 + * --------------------------------------------------------------------------- + */ + +/* + * shrink_16to8() + * + * Stores an array of 16-bit (big endian) array elements in an 8-bit array. + * We assume that most 16-bit elements encode numbers that fit in an 8-bit + * value. When not possible, we store a marker followed by the 16-bit value + * stored as multiple 8-bit values. + * + * uncomp Uncompressed data + * uncomp_len Length of uncompressed data (in bytes) + * comp_len Return: where to store new compressed length + * + * Returns: + * Success: An 8-bit array (malloced) + * Failure: NULL + */ +char *shrink_16to8(char *x_uncomp, int uncomp_len, int *comp_len) { + char *comp; + int i, j, i16; + signed char *s_uncomp = (signed char *)x_uncomp; + + /* Allocation - worst case is 3 * (uncomp_len/2) + 1 */ + if (NULL == (comp = (char *)xmalloc(3 * (uncomp_len/2) + 1))) + return NULL; + + comp[0] = ZTR_FORM_16TO8; + for (i = 0, j = 1; i < uncomp_len; i+=2) { + i16 = (s_uncomp[i] << 8) | (unsigned char)s_uncomp[i+1]; + if (i16 >= -127 && i16 <= 127) { + comp[j++] = i16; + } else { + comp[j++] = -128; + comp[j++] = s_uncomp[i]; + comp[j++] = s_uncomp[i+1]; + } + } + + /* Reclaim unneeded memory */ + comp = xrealloc(comp, j); + + *comp_len = j; + return comp; +} + + +/* + * expand_8to16() + * + * The opposite of the shrink_16to8() function. + * + * comp Compressed input data + * comp_len Length of comp data (in bytes) + * uncomp_len Output: length of uncompressed data (in bytes) + * + * Returns: + * Success: Uncompressed data (char *) + * Failure: NULL + */ +char *expand_8to16(char *x_comp, int comp_len, int *uncomp_len) { + int i, j; + char *uncomp; + signed char *s_comp = (signed char *)x_comp; + + /* Allocation - worst case is twice comp_len */ + if (NULL == (uncomp = (char *)xmalloc(comp_len*2))) + return NULL; + +#if 0 + for (i = 0, j = 1; j < comp_len; i+=2) { + if (s_comp[j] != -128) { + uncomp[i ] = s_comp[j] < 0 ? -1 : 0; + uncomp[i+1] = s_comp[j++]; + } else { + j++; + uncomp[i ] = s_comp[j++]; + uncomp[i+1] = s_comp[j++]; + } + } +#endif + + for (i = 0, j = 1; j < comp_len; i+=2) { + if (s_comp[j] >= 0) { + uncomp[i ] = 0; + uncomp[i+1] = s_comp[j++]; + } else { + if (s_comp[j] != -128) { + uncomp[i+1] = s_comp[j++]; + uncomp[i ] = -1; + } else { + j++; + uncomp[i ] = s_comp[j++]; + uncomp[i+1] = s_comp[j++]; + } + } + } + + /* Reclaim unneeded memory */ + uncomp = xrealloc(uncomp, i); + + *uncomp_len = i; + return uncomp; +} + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_32TO8 + * --------------------------------------------------------------------------- + */ + +/* + * shrink_32to8() + * + * Stores an array of 32-bit (big endian) array elements in an 8-bit array. + * We assume that most 32-bit elements encode numbers that fit in an 8-bit + * value. When not possible, we store a marker followed by the 32-bit value + * stored as multiple 8-bit values. + * + * uncomp Uncompressed data + * uncomp_len Length of uncompressed data (in bytes) + * comp_len Return: where to store new compressed length + * + * Returns: + * Success: An 8-bit array (malloced) + * Failure: NULL + */ +char *shrink_32to8(char *x_uncomp, int uncomp_len, int *comp_len) { + char *comp; + int i, j, i32; + signed char *s_uncomp = (signed char *)x_uncomp; + + /* Allocation - worst case is 5 * (uncomp_len/4) + 1 */ + if (NULL == (comp = (char *)xmalloc(5 * (uncomp_len/4) + 1))) + return NULL; + + comp[0] = ZTR_FORM_32TO8; + for (i = 0, j = 1; i < uncomp_len; i+=4) { + i32 = (s_uncomp[i] << 24) | + (s_uncomp[i+1] << 16) | + (s_uncomp[i+2] << 8) | + (unsigned char)s_uncomp[i+3]; + if (i32 >= -127 && i32 <= 127) { + comp[j++] = i32; + } else { + comp[j++] = -128; + comp[j++] = s_uncomp[i]; + comp[j++] = s_uncomp[i+1]; + comp[j++] = s_uncomp[i+2]; + comp[j++] = s_uncomp[i+3]; + } + } + + /* Reclaim unneeded memory */ + comp = xrealloc(comp, j); + + *comp_len = j; + return comp; +} + +/* + * expand_8to32() + * + * The opposite of the shrink_32to8() function. + * + * comp Compressed input data + * comp_len Length of comp data (in bytes) + * uncomp_len Output: length of uncompressed data (in bytes) + * + * Returns: + * Success: Uncompressed data (char *) + * Failure: NULL + */ +char *expand_8to32(char *comp, int comp_len, int *uncomp_len) { + int i, j; + char *uncomp; + signed char *s_comp = (signed char *)comp; + + /* Allocation - worst case is four times comp_len */ + if (NULL == (uncomp = (char *)xmalloc(comp_len*4))) + return NULL; + + for (i = 0, j = 1; j < comp_len; i+=4) { + if (s_comp[j] != -128) { + uncomp[i ] = s_comp[j] < 0 ? -1 : 0; + uncomp[i+1] = s_comp[j] < 0 ? -1 : 0; + uncomp[i+2] = s_comp[j] < 0 ? -1 : 0; + uncomp[i+3] = s_comp[j++]; + } else { + j++; + uncomp[i ] = s_comp[j++]; + uncomp[i+1] = s_comp[j++]; + uncomp[i+2] = s_comp[j++]; + uncomp[i+3] = s_comp[j++]; + } + } + + /* Reclaim unneeded memory */ + uncomp = xrealloc(uncomp, i); + + *uncomp_len = i; + return uncomp; +} + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_FOLLOW1 + * --------------------------------------------------------------------------- + */ +static int follow_tab[256][256]; +char *follow1(char *x_uncomp, + int uncomp_len, + int *comp_len) { + char *comp = (char *)xmalloc(uncomp_len + 256 + 1); + unsigned char *u_uncomp = (unsigned char *)x_uncomp; + signed char *s_uncomp = ((signed char *)x_uncomp); + int i, j; + char next[256]; + int count[256]; + + if (!comp) + return NULL; + + /* Count di-freqs */ + memset(follow_tab, 0, 256*256*sizeof(int)); +#if 0 + for (i = 0; i < uncomp_len-1; i++) + follow_tab[u_uncomp[i]][u_uncomp[i+1]]++; + + /* Pick the most frequent next byte from the preceeding byte */ + for (i = 0; i < 256; i++) { + int bestval, bestind; + + bestval = bestind = 0; + for (j = 0; j < 256; j++) { + if (follow_tab[i][j] > bestval) { + bestval = follow_tab[i][j]; + bestind = j; + } + } + next[i] = bestind; + } +#endif + memset(next, 0, 256*sizeof(*next)); + memset(count, 0, 256*sizeof(*count)); + /* Pick the most frequent next byte from the preceeding byte */ + for (i = 0; i < uncomp_len-1; ) { + int cur = u_uncomp[i]; + int nxt = u_uncomp[++i]; + int folcnt = ++follow_tab[cur][nxt]; + if (folcnt > count[cur]) { + count[cur] = folcnt; + next[cur] = nxt; + } + } + + j = 0; + comp[j++] = ZTR_FORM_FOLLOW1; + + /* Output 'next' array */ + for (i = 0; i < 256; i++, j++) + comp[j] = next[i]; + + /* Output new 'uncomp' as next['uncomp'] */ + comp[j++] = u_uncomp[0]; + for (i = 1; i < uncomp_len; i++, j++) { + comp[j] = next[u_uncomp[i-1]] - s_uncomp[i]; + } + *comp_len = j; + + return comp; +} + +char *unfollow1(char *x_comp, + int comp_len, + int *uncomp_len) { + unsigned char *u_uncomp; + int i, j; + char next[256]; + unsigned char *u_comp = (unsigned char *)x_comp; + signed char *s_comp = (signed char *)x_comp; + + u_uncomp = (unsigned char *)xmalloc(comp_len-256-1); + if (!u_uncomp) + return NULL; + + /* Load next[] array */ + j = 1; + for (i = 0; i < 256; i++, j++) + next[i] = u_comp[j]; + + /* Replace comp[x] with next[comp[x-1]] - comp[x]*/ + u_uncomp[0] = u_comp[j++]; + + comp_len -= 257; + s_comp += 257; + for (i = 1; i < comp_len; i++) { + u_uncomp[i] = next[u_uncomp[i-1]] - s_comp[i]; + } + + *uncomp_len = i; + + return (char *)u_uncomp; +} + + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_CHEB445 + * --------------------------------------------------------------------------- + */ + +#if 0 +/* + * Compresses using chebyshev polynomials to predict the next peak. + * Based on around 96 modern ABI files it compresses by around 5% (varied + * between 3.9 and 6.6). + */ + +/* + * For now this has been disabled in favour of the integer version below + * as we cannot guarantee all systems to have the same floating point + * roundings, especially with the final conversion to integer. + * (Also, for some unknown reason, the integer version produces smaller + * files.) + */ + +char *cheb445comp(char *uncomp, int uncomp_len, int *data_len) +{ + int i, k, l, z; + int datap; + float frac[5]; + float fz[25]; + signed short *d16 = (signed short *)uncomp; + int nwords = uncomp_len / 2; + short *dptr = d16; + signed short *data = (signed short *)malloc((nwords+1)*sizeof(short)); + + data[0] = le_int2(ZTR_FORM_CHEB445); + /* Check for boundary cases */ + if (nwords <= 4) { + switch (nwords) { + case 4: + data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2])); + case 3: + data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1])); + case 2: + data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0])); + case 1: + data[1] = be_int2(d16[0]); + } + *data_len = nwords*2; + return (char *)data; + } + + /* First 4 values are just direct deltas */ + data[1] = be_int2(d16[0]); + data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0])); + data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1])); + data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2])); + datap = 5; + + /* Initialise - speeds up loop */ + for (k = 0; k < 5; k++) { + float kx = cos(M_PI*(k+0.5)/5)*1.5; + frac[k] = (kx + 1.5) - (int)(kx + 1.5); + } + for (z = l = 0; l < 5; l++) { + for (k = 0; k < 5; k++, z++) { + fz[z] = 0.4 * cos(l * M_PI*(k+0.5)/5); + } + } + + /* Loop */ + for (i = 0; i < nwords-4; i++) { + float dd, y = 10/3.0; + float f[5], coef[5]; + signed short diff; + int p; + + f[0] = be_int2(dptr[2])*frac[4] + be_int2(dptr[3])*frac[0]; + f[1] = be_int2(dptr[2])*frac[3] + be_int2(dptr[3])*frac[1]; + f[2] = be_int2(dptr[1])*frac[2] + be_int2(dptr[2])*frac[2]; + f[3] = be_int2(dptr[0])*frac[1] + be_int2(dptr[1])*frac[3]; + f[4] = be_int2(dptr[0])*frac[0] + be_int2(dptr[1])*frac[4]; + + for (z = l = 0; l < 5; l++, z+=5) + coef[l] = f[0] * fz[z+0] + + f[1] * fz[z+1] + + f[2] * fz[z+2] + + f[3] * fz[z+3] + + f[4] * fz[z+4]; + + dd = y*coef[3]+coef[2]; + p = 0.5 + 5/3.0*(y*dd-coef[3]+coef[1])-dd+coef[0]/2.0; + + if (p < 0) p = 0; + diff = be_int2(dptr[4]) - p; + data[datap++] = be_int2(diff); + + dptr++; + } + + *data_len = datap*2; + + return (char *)data; +} + +char *cheb445uncomp(char *comp, int comp_len, int *uncomp_len) +{ + int i, k, l, z; + float frac[5]; + float fz[25]; + signed short *d16 = (signed short *)comp; + int nwords = comp_len / 2; + signed short *data = (signed short *)xmalloc(comp_len); + short *dptr = data, *dptr2 = d16; + + /* Check for boundary cases */ + if (nwords <= 3) { + switch (nwords) { + case 3: + data[0] = be_int2(d16[1]); + data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); + data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); + break; + case 2: + data[0] = be_int2(d16[1]); + data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); + break; + case 1: + data[0] = be_int2(d16[1]); + break; + } + *uncomp_len = (nwords-1)*2; + return (char *)data; + } + + /* First 3 values are just direct deltas */ + data[0] = be_int2(d16[1]); + data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); + data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); + data[3] = be_int2(be_int2(d16[4])+be_int2(data[2])); + dptr2 += 5; + + /* Initialise - speeds up loop */ + for (k = 0; k < 5; k++) { + float kx = cos(M_PI*(k+0.5)/5)*1.5; + frac[k] = (kx + 1.5) - (int)(kx + 1.5); + } + for (z = l = 0; l < 5; l++) { + for (k = 0; k < 5; k++, z++) { + fz[z] = 0.4 * cos(l * M_PI*(k+0.5)/5); + } + } + + /* Loop */ + for (i = 0; i < nwords-3; i++) { + float dd, y = 10/3.0; + float f[5], coef[5]; + signed short diff; + int p; + + f[0] = be_int2(dptr[2])*frac[4] + be_int2(dptr[3])*frac[0]; + f[1] = be_int2(dptr[2])*frac[3] + be_int2(dptr[3])*frac[1]; + f[2] = be_int2(dptr[1])*frac[2] + be_int2(dptr[2])*frac[2]; + f[3] = be_int2(dptr[0])*frac[1] + be_int2(dptr[1])*frac[3]; + f[4] = be_int2(dptr[0])*frac[0] + be_int2(dptr[1])*frac[4]; + + for (z = l = 0; l < 5; l++, z+=5) + coef[l] = f[0] * fz[z+0] + + f[1] * fz[z+1] + + f[2] * fz[z+2] + + f[3] * fz[z+3] + + f[4] * fz[z+4]; + + dd = y*coef[3]+coef[2]; + p = 0.5 + 5/3.0*(y*dd-coef[3]+coef[1])-dd+coef[0]/2.0; + + if (p < 0) p = 0; + diff = be_int2(*dptr2) + p; + dptr[4] = be_int2(diff); + + dptr++; + dptr2++; + } + + *uncomp_len = (nwords-1)*2; + return (char *)data; +} +#endif + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_ICHEB + * --------------------------------------------------------------------------- + */ + +/* + * Integer versions of the chebyshev polynomial compressor. This uses + * the polynomials to predict the next peak from the preceeding 3. + * Tested on 100 ABI-3700, Megabace and Licor files it compressed by + * around 7-8%. (Oddly this is slightly more than the floating point + * version.) + * + * These require 32-bit integers and have code to make sure that arithmetic + * does not overflow this. + */ +#define CH1 150 +#define CH2 105 +char *ichebcomp(char *uncomp, int uncomp_len, int *data_len) +{ + int i, l, z; + int datap; + int frac[5] = {139,57,75,93,11}; + int fz[20] = {42, 42, 42, 42, 42, + 39, 24, 0,-24,-39, + 33,-12,-42,-12, 33, + 24,-39, 0, 39,-24}; + int dfac; + signed short *d16 = (signed short *)uncomp; + int nwords = uncomp_len / 2; + short *dptr = d16; + signed short *data = (signed short *)malloc((nwords+1)*sizeof(short)); + + data[0] = le_int2(ZTR_FORM_ICHEB); + /* Check for boundary cases */ + if (nwords <= 4) { + switch (nwords) { + case 4: + data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2])); + case 3: + data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1])); + case 2: + data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0])); + case 1: + data[1] = be_int2(d16[0]); + } + *data_len = nwords*2; + return (char *)data; + } + + /* First 4 values are just direct deltas */ + data[1] = be_int2(d16[0]); + data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0])); + data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1])); + data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2])); + datap = 5; + + /* Loop */ + for (i = 4; i < nwords; i++) { + int dd, f[5]; + signed int coef[4]; + signed short diff; + int p; + + /* + * FIXME: As an alternative to the range checking below, if we + * scale dptr[X] such that it's never higher than 2800 then + * the 32-bit arithmetic will never overflow. Practically speaking, + * all observed ABI and Megabace files have vales up to 1600 only. + */ + + /* + * frac[N] is always paired with frac[4-N], summing to 1.0 - or + * 150 when scaled. + * Hence f[0] has range 0 to 65536*150. + */ + f[0] = ((unsigned short)be_int2(dptr[2]))*frac[4] + + ((unsigned short)be_int2(dptr[3]))*frac[0]; + f[1] = ((unsigned short)be_int2(dptr[2]))*frac[3] + + ((unsigned short)be_int2(dptr[3]))*frac[1]; + f[2] = ((unsigned short)be_int2(dptr[1]))*frac[2] + + ((unsigned short)be_int2(dptr[2]))*frac[2]; + f[3] = ((unsigned short)be_int2(dptr[0]))*frac[1] + + ((unsigned short)be_int2(dptr[1]))*frac[3]; + f[4] = ((unsigned short)be_int2(dptr[0]))*frac[0] + + ((unsigned short)be_int2(dptr[1]))*frac[4]; + + /* + * fz[z+0..5] sums to no more than 210 (5*42) and no less than 0. + * Therefore coef[l] has range 0 to 65536*150*210, which (just) + * fits in 31-bits, plus 1 for the sign. + */ + for (z = l = 0; l < 4; l++, z+=5) + coef[l] = (f[0] * fz[z+0] + + f[1] * fz[z+1] + + f[2] * fz[z+2] + + f[3] * fz[z+3] + + f[4] * fz[z+4]); + + /* + * computing p requires at most a temporary variable of + * 24.1 * coef, but coef may be a full 32-bit integer. + * If coef is sufficiently close to cause an integer overflow then + * we scale it down. + */ + { + int max = 0; + + for (l = 0; l < 4; l++) { + if (max < ABS(coef[l])) + max = ABS(coef[l]); + } + + + if (max > 1<<26) { + dfac = max / (1<<26) + 1; + for (l = 0; l < 4; l++) + coef[l] /= dfac; + } else { + dfac = 1; + } + } + + dd = (coef[3]/3)*10+coef[2]; + p = ((((dd/3)*10-coef[3]+coef[1])/3)*5-dd+coef[0]/2)/(CH1*CH2); + p *= dfac; + + if (p < 0) p = 0; + diff = be_int2(dptr[4]) - p; + data[datap++] = be_int2(diff); + + dptr++; + } + + *data_len = datap*2; + + return (char *)data; +} + +char *ichebuncomp(char *comp, int comp_len, int *uncomp_len) +{ + int i, l, z; + int frac[5] = {139,57,75,93,11}; + int fz[20] = {42, 42, 42, 42, 42, + 39, 24, 0,-24,-39, + 33,-12,-42,-12, 33, + 24,-39, 0, 39,-24}; + signed short *d16 = (signed short *)comp; + int nwords = comp_len / 2 - 1; + signed short *data = (signed short *)xmalloc(comp_len); + short *dptr = data, *dptr2 = d16; + int dfac; + + /* Check for boundary cases */ + if (nwords <= 4) { + switch (nwords) { + case 4: + data[0] = be_int2(d16[1]); + data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); + data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); + data[3] = be_int2(be_int2(d16[4])+be_int2(data[2])); + break; + case 3: + data[0] = be_int2(d16[1]); + data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); + data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); + break; + case 2: + data[0] = be_int2(d16[1]); + data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); + break; + case 1: + data[0] = be_int2(d16[1]); + break; + } + *uncomp_len = nwords*2; + return (char *)data; + } + + /* First 4 values are just direct deltas */ + data[0] = be_int2(d16[1]); + data[1] = be_int2(be_int2(d16[2])+be_int2(data[0])); + data[2] = be_int2(be_int2(d16[3])+be_int2(data[1])); + data[3] = be_int2(be_int2(d16[4])+be_int2(data[2])); + dptr2 += 5; + + /* Loop */ + for (i = 4; i < nwords; i++) { + int dd, coef[5], f[5]; + signed short diff; + int p; + + f[0] = ((unsigned short)be_int2(dptr[2]))*frac[4] + + ((unsigned short)be_int2(dptr[3]))*frac[0]; + f[1] = ((unsigned short)be_int2(dptr[2]))*frac[3] + + ((unsigned short)be_int2(dptr[3]))*frac[1]; + f[2] = ((unsigned short)be_int2(dptr[1]))*frac[2] + + ((unsigned short)be_int2(dptr[2]))*frac[2]; + f[3] = ((unsigned short)be_int2(dptr[0]))*frac[1] + + ((unsigned short)be_int2(dptr[1]))*frac[3]; + f[4] = ((unsigned short)be_int2(dptr[0]))*frac[0] + + ((unsigned short)be_int2(dptr[1]))*frac[4]; + + for (z = l = 0; l < 4; l++, z+=5) + coef[l] = f[0] * fz[z+0] + + f[1] * fz[z+1] + + f[2] * fz[z+2] + + f[3] * fz[z+3] + + f[4] * fz[z+4]; + + /* + * computing p requires at most a temporary variable of + * 24.1 * coef, but coef may be a full 32-bit integer. + * If coef is sufficiently close to cause an integer overflow then + * we scale it down. + */ + { + int max = 0; + + for (l = 0; l < 4; l++) { + if (max < ABS(coef[l])) + max = ABS(coef[l]); + } + + + if (max > 1<<26) { + dfac = max / (1<<26) + 1; + for (l = 0; l < 4; l++) + coef[l] /= dfac; + } else { + dfac = 1; + } + } + + dd = (coef[3]/3)*10+coef[2]; + p = ((((dd/3)*10-coef[3]+coef[1])/3)*5-dd+coef[0]/2)/(CH1*CH2); + p *= dfac; + + if (p < 0) p = 0; + diff = be_int2(*dptr2) + p; + dptr[4] = be_int2(diff); + + dptr++; + dptr2++; + } + + *uncomp_len = nwords*2; + return (char *)data; +} + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_LOG + * --------------------------------------------------------------------------- + */ + +/* + * This is a LOSSY compression. It replaces N with 10 * log2(N). + * (Just an idea, and not a great one it seems.) + */ +char *log2_data(char *x_uncomp, + int uncomp_len, + int *comp_len) { + int i, u1, l1; + char *comp = (char *)xmalloc(uncomp_len + 2); + unsigned char *u_uncomp = (unsigned char *)x_uncomp; + + if (!comp) + return NULL; + + comp+=2; + for (i = 0; i < uncomp_len; i+=2) { + u1 =(u_uncomp[i ] << 8) + + (u_uncomp[i+1] << 0); + l1 = (int)(10 * log(u1+1) / log(2)); + comp[i ] = (l1 >> 8) & 0xff; + comp[i+1] = (l1 >> 0) & 0xff; + } + + comp-=2; + comp[0] = ZTR_FORM_LOG2; + comp[1] = 0; /* dummy - to align on 2-byte boundary */ + + *comp_len = uncomp_len+2; + + return comp; +} + +char *unlog2_data(char *x_comp, + int comp_len, + int *uncomp_len) { + int i, u1, l1; + char *uncomp; + unsigned char *u_comp = (unsigned char *)x_comp; + + uncomp = (char *)xmalloc(comp_len-2); + if (!uncomp) + return NULL; + + u_comp+=2; + comp_len-=2; + *uncomp_len = comp_len; + + for (i = 0; i < comp_len; i+=2) { + l1 = ((u_comp[i ] << 8) | + (u_comp[i+1] << 0)); + u1 = (int)pow(2.0, l1/10.0)-1; + uncomp[i ] = (u1 >> 8) & 0xff; + uncomp[i+1] = (u1 >> 0) & 0xff; + } + + return uncomp; +} + +/* + * --------------------------------------------------------------------------- + * ZTR_FORM_STHUFF + * --------------------------------------------------------------------------- + */ + +/* + * Implements compression using a set of static huffman codes stored using + * the Deflate algorithm (and so in this respect it's similar to zlib). + * + * The huffman codes though can be previously stored in the ztr object + * using ztr_add_hcode(). "cset" indicates which numbered stored huffman + * code set is to be used, or passing zero will use inline codes (ie they + * are stored in the data stream itself, just as in standard deflate). + * + * Arguments: + * ztr ztr_t pointer; used to find stored code-sets + * uncomp The uncompressed input data + * uncomp_len Length of uncomp + * cset Stored code-set number, zero for inline + * recsz Record size - only used when cset == 0. + * comp_len Output: length of compressed data + * + * Returns: + * Compressed data stream if successful + comp_len + * NULL on failure + */ +char *sthuff(ztr_t *ztr, char *uncomp, int uncomp_len, + int cset, int recsz, int *comp_len) { + block_t *blk = block_create(NULL, 2); + unsigned char bytes[2]; + huffman_codeset_t *c = NULL; + unsigned char *comp = NULL; + ztr_hcode_t *hc = NULL; + + if (cset >= CODE_USER) { + if (NULL == (hc = ztr_find_hcode(ztr, cset))) + return NULL; + c = hc->codes; + } else if (cset != CODE_INLINE) { + c = generate_code_set(cset, 1, NULL, 0, 1, MAX_CODE_LEN, 0); + } + + if (!c) { + /* No cached ones found, so inline some instead */ + cset = 0; + c = generate_code_set(0, recsz, (unsigned char *)uncomp, uncomp_len, 1, + MAX_CODE_LEN, 0); + } + + bytes[0] = ZTR_FORM_STHUFF; + bytes[1] = cset; + store_bytes(blk, bytes, 2); + + if (hc) { + if (!c->blk) { + c->blk = block_create(NULL, 2); + store_codes(c->blk, c, 1); + } + blk->bit = c->blk->bit; + } else { + store_codes(blk, c, 1); + } + + /* + {int i; + for (i = 0; i < c->ncodes; i++) { + output_code_set(stderr, c->codes[i]); + }} + */ + + + /* + * Unless CODE_INLINE, all we wanted to know is what bit number + * to start on. The above is therefore somewhat inefficient. + */ + if (cset != 0) { + blk->byte = 2; + memset(&blk->data[2], 0, blk->alloc - 2); + } + + if (0 == huffman_multi_encode(blk, c, cset, + (unsigned char *)uncomp, uncomp_len)) { + comp = blk->data; + *comp_len = blk->byte + (blk->bit != 0); + block_destroy(blk, 1); + } + + if (cset == 0) + huffman_codeset_destroy(c); + + return (char *)comp; +} + +char *unsthuff(ztr_t *ztr, char *comp, int comp_len, int *uncomp_len) { + int cset = (unsigned char)(comp[1]); + huffman_codeset_t *cs = NULL, *cs_free = NULL; + block_t *blk_in = block_create(NULL, comp_len), + *blk_out = block_create(NULL, 1000); + int bfinal = 1, bit_num = 0; + char *uncomp; + + if (cset >= CODE_USER) { + /* Scans through HUFF chunks */ + ztr_hcode_t *hc = ztr_find_hcode(ztr, cset); + if (!hc) + return NULL; + + cs = hc->codes; + bit_num = cs->bit_num; + blk_in->bit = 0; + } else if (cset > 0) { + /* Create some temporary huffman_codes to stringify */ + cs_free = cs = generate_code_set(cset, 1, NULL, 0, 1, MAX_CODE_LEN, 0); + if (!cs) + return NULL; + + bit_num = cs->bit_num; + blk_in->bit = 0; + } /* else inline codes */ + + /* + * We need to know at what bit the huffman codes would have ended on + * so we can store our huffman encoded symbols immediately following it. + * For speed though this bit-number is cached. + */ + blk_in->data[blk_in->byte++] |= *(comp+2); + store_bytes(blk_in, (unsigned char *)comp+3, comp_len-3); + + + /* Rewind */ + blk_in->byte = 0; + blk_in->bit = bit_num; + + do { + block_t *out; + + /* + * We're either at the start of a block with codes to restore + * (cset == INLINE or the 2nd onwards block) or we've already + * got some codes in cs and we're at the position where huffman + * encoded symbols are stored. + */ + if (!cs) + if (NULL == (cs = cs_free = restore_codes(blk_in, &bfinal))) + return NULL; + + /* + {int i; + for (i = 0; i < cs->ncodes; i++) { + output_code_set(stderr, cs->codes[i]); + }} + */ + + if (NULL == (out = huffman_multi_decode(blk_in, cs))) { + huffman_codeset_destroy(cs); + return NULL; + } + + /* Could optimise this for the common case of only 1 block */ + store_bytes(blk_out, out->data, out->byte); + block_destroy(out, 0); + if (cs_free) + huffman_codeset_destroy(cs_free); + cs = cs_free = NULL; + } while (!bfinal); + + *uncomp_len = blk_out->byte; + uncomp = (char *)blk_out->data; + + block_destroy(blk_in, 0); + block_destroy(blk_out, 1); + + return uncomp; +} + + +#ifndef NDEBUG +#define SYM_EOF 256 +static void output_code_set(FILE *fp, huffman_codes_t *cds) { + int i, j; + int nbits_in = 0, nbits_out = 0; + huffman_code_t *codes = cds->codes; + int ncodes = cds->ncodes; + + fprintf(fp, "static huffman_code_t codes_FIXME[] = {\n"); + for (i = j = 0; i < ncodes; i++) { + nbits_out += codes[i].nbits * codes[i].freq; + nbits_in += 8*codes[i].freq; + if (j == 0) + fprintf(fp, " "); + if (codes[i].symbol == SYM_EOF) { + fprintf(fp, "{SYM_EOF,%3d}, ", codes[i].nbits); + j = 10; + } else { + if (isalnum(codes[i].symbol)) { + fprintf(fp, "{'%c',%3d}, ", codes[i].symbol, codes[i].nbits); + } else { + fprintf(fp, "{%3d,%3d}, ", codes[i].symbol, codes[i].nbits); + } + } + j++; + + if (j >= 6) { + fputc('\n', fp); + j = 0; + } + } + if (j) + fputc('\n', fp); + fprintf(fp, "};\n"); + fprintf(fp, "/* Expected compression to %f of input */\n", + (double)nbits_out/nbits_in); +} +#endif + +/* + * Reorders quality data from its RAW format to an interleaved 4-byte + * aligned format. + * + * Starting with sequence A1 C2 G3 the raw format is quality of called + * bases followed by quality of remaining bases in triplets per base: + * 0 (RAW format) + * Q(A1) Q(C2) Q(G3) + * Q(C1) Q(G1) Q(T1) + * Q(A2) Q(G2) Q(T2) + * Q(A3) Q(C3) Q(T3) + * + * We reorder it to: + * ZTR_FORM_QSHIFT <any> <any> 0(raw) + * Q(A1) Q(C1) Q(G1) Q(T1) + * Q(C2) Q(A2) Q(G2) Q(T2) + * Q(G3) Q(A3) Q(C3) Q(T3) + * + * Returns shifted data on success + * NULL on failure + */ +char *qshift(char *qold, int qlen, int *new_len) { + int i, j, k; + char *qnew; + int nbases; + + /* + * Correct input is raw encoding + 4x nbases bytes + */ + if ((qlen-1)%4 != 0 || *qold != 0) + return NULL; + + nbases = (qlen-1)/4; + qnew = (char *)malloc((nbases+1)*4); + qnew[0] = ZTR_FORM_QSHIFT; /* reorder code */ + qnew[1] = -40; /* pad */ + qnew[2] = -40; /* pad */ + qnew[3] = qold[0]; + + for (i = 0, j = 4, k = nbases; i < nbases; i++, j+=4, k+=3) { + qnew[j ] = qold[1+i ]; + qnew[j+1] = qold[1+k ]; + qnew[j+2] = qold[1+k+1]; + qnew[j+3] = qold[1+k+2]; + } + + *new_len = (nbases+1)*4; + return qnew; +} + +/* + * The opposite transform from qshift() above. + * + * Returns unshifted data on success + * NULL on failure. + */ +char *unqshift(char *qold, int qlen, int *new_len) { + int i, j, k; + char *qnew; + int nbases; + + /* + * Correct input is 4x (nbases+1) bytes + */ + if (qlen%4 != 0 || *qold != ZTR_FORM_QSHIFT) + return NULL; + + nbases = qlen/4-1; + qnew = (char *)malloc(nbases*4+1); + qnew[0] = 0; /* raw byte */ + + for (i = 0, j = 4, k = nbases; i < nbases; i++, j+=4, k+=3) { + qnew[1+i ] = qold[j ]; + qnew[1+k ] = qold[j+1]; + qnew[1+k+1] = qold[j+2]; + qnew[1+k+2] = qold[j+3]; + } + + *new_len = nbases*4+1; + return qnew; +} + +/* + * Given a sequence ACTG this shifts trace data from the order: + * + * A1A2A3A4 C1C2C3C4 G1G2G3G4 T1T2T3T4 + * + * to + * + * A1C1G1T1 C2A2G2T2 T3A3C3G3 G4C4C4T4 + * + * Ie for each base it ouputs the signal for the called base first + * followed by the remaining 3 signals in A,C,G,T order (minus the + * called signal already output). + */ + +/* + * NCBI uses a rotation mechanism. Thus instead of converting acGt (G + * called) to Gact they produce Gtac. + * + * Given 4 columns of data the two schemes produce value orderings as: + * + * Rotate: Acgt Shift: Acgt + * Cgta Cagt + * Gtac Gact + * Tacg Tacg + * + * As a consequence any channel bias for a/c/g/t is better preserved + * by shifting than it is by rotating as each column (except #1) are + * only populated by two base types and 2 of those columns are on a + * 3:1 ratio. + */ +/* #define ROTATE_INSTEAD */ +char *tshift(ztr_t *ztr, char *told_c, int tlen, int *new_len) { + int nc, i; + ztr_chunk_t **base = ztr_find_chunks(ztr, ZTR_TYPE_BASE, &nc); + char *bases; + int nbases; + unsigned short *tnew, *told; + + if (nc == 0) + return NULL; + + if (*told_c != 0) + return NULL; /* assume RAW format trace input */ + + /* Use last BASE chunk if multiple are present */ + /* FIXME: ensure uncompressed first */ + bases = base[nc-1]->data+1; + nbases = base[nc-1]->dlength-1; + + if (nbases != (tlen-2)/8) { + fprintf(stderr, "Mismatch in number of base calls to samples\n"); + return NULL; + } + + /* Allocate and initialise header */ + told = ((unsigned short *)told_c) + 1; + *new_len = (nbases*4+4) * sizeof(*tnew); + tnew = (unsigned short *)malloc(*new_len); + for (i = 0; i < 4; i++) { + tnew[i] = 0; + } + ((char *)tnew)[0] = ZTR_FORM_TSHIFT; + +#ifdef ROTATE_INSTEAD + /* Reorder */ + for (i = 0; i < nbases; i++) { + switch(bases[i]) { + case 'T': + tnew[4+i*4+0] = told[3*nbases+i]; /* TACG */ + tnew[4+i*4+1] = told[0*nbases+i]; + tnew[4+i*4+2] = told[1*nbases+i]; + tnew[4+i*4+3] = told[2*nbases+i]; + break; + case 'G': + tnew[4+i*4+0] = told[2*nbases+i]; /* GTAC */ + tnew[4+i*4+1] = told[3*nbases+i]; + tnew[4+i*4+2] = told[0*nbases+i]; + tnew[4+i*4+3] = told[1*nbases+i]; + break; + case 'C': + tnew[4+i*4+0] = told[1*nbases+i]; /* CGTA */ + tnew[4+i*4+1] = told[2*nbases+i]; + tnew[4+i*4+2] = told[3*nbases+i]; + tnew[4+i*4+3] = told[0*nbases+i]; + break; + default: + tnew[4+i*4+0] = told[0*nbases+i]; /* ACGT */ + tnew[4+i*4+1] = told[1*nbases+i]; + tnew[4+i*4+2] = told[2*nbases+i]; + tnew[4+i*4+3] = told[3*nbases+i]; + break; + } + } +#else + /* Reorder */ + for (i = 0; i < nbases; i++) { + switch(bases[i]) { + case 'T': + tnew[4+i*4+0] = told[3*nbases+i]; /* TACG */ + tnew[4+i*4+1] = told[0*nbases+i]; + tnew[4+i*4+2] = told[1*nbases+i]; + tnew[4+i*4+3] = told[2*nbases+i]; + break; + case 'G': + tnew[4+i*4+0] = told[2*nbases+i]; /* GACT */ + tnew[4+i*4+1] = told[0*nbases+i]; + tnew[4+i*4+2] = told[1*nbases+i]; + tnew[4+i*4+3] = told[3*nbases+i]; + break; + case 'C': + tnew[4+i*4+0] = told[1*nbases+i]; /* CAGT */ + tnew[4+i*4+1] = told[0*nbases+i]; + tnew[4+i*4+2] = told[2*nbases+i]; + tnew[4+i*4+3] = told[3*nbases+i]; + break; + default: + tnew[4+i*4+0] = told[0*nbases+i]; /* ACGT */ + tnew[4+i*4+1] = told[1*nbases+i]; + tnew[4+i*4+2] = told[2*nbases+i]; + tnew[4+i*4+3] = told[3*nbases+i]; + break; + } + } +#endif + + xfree(base); + + return (char *)tnew; +} + +char *untshift(ztr_t *ztr, char *told_c, int tlen, int *new_len) { + unsigned short *tnew, *told = (unsigned short *)told_c; + int nc, nbases, i; + char *bases; + ztr_chunk_t **base = ztr_find_chunks(ztr, ZTR_TYPE_BASE, &nc); + + if (nc == 0) + return NULL; + + /* Use last BASE chunk if multiple are present */ + uncompress_chunk(ztr, base[nc-1]); + bases = base[nc-1]->data+1; + nbases = base[nc-1]->dlength-1; + + *new_len = 2 + nbases*4 * sizeof(*tnew); + tnew = (unsigned short *)malloc(*new_len); + tnew[0] = 0; + told += 4; + +#ifdef ROTATE_INSTEAD + /* Reorder */ + for (i = 0; i < nbases; i++) { + switch(bases[i]) { + case 'T': + tnew[1+3*nbases+i] = *told++; /* TACG */ + tnew[1+0*nbases+i] = *told++; + tnew[1+1*nbases+i] = *told++; + tnew[1+2*nbases+i] = *told++; + break; + case 'G': + tnew[1+2*nbases+i] = *told++; /* GTAC */ + tnew[1+3*nbases+i] = *told++; + tnew[1+0*nbases+i] = *told++; + tnew[1+1*nbases+i] = *told++; + break; + case 'C': + tnew[1+1*nbases+i] = *told++; /* CGTA */ + tnew[1+2*nbases+i] = *told++; + tnew[1+3*nbases+i] = *told++; + tnew[1+0*nbases+i] = *told++; + break; + default: + tnew[1+0*nbases+i] = *told++; /* ACGT */ + tnew[1+1*nbases+i] = *told++; + tnew[1+2*nbases+i] = *told++; + tnew[1+3*nbases+i] = *told++; + break; + } + } +#else + /* Reorder */ + for (i = 0; i < nbases; i++) { + switch(bases[i]) { + case 'T': + tnew[1+3*nbases+i] = *told++; /* TACG */ + tnew[1+0*nbases+i] = *told++; + tnew[1+1*nbases+i] = *told++; + tnew[1+2*nbases+i] = *told++; + break; + case 'G': + tnew[1+2*nbases+i] = *told++; /* GACT */ + tnew[1+0*nbases+i] = *told++; + tnew[1+1*nbases+i] = *told++; + tnew[1+3*nbases+i] = *told++; + break; + case 'C': + tnew[1+1*nbases+i] = *told++; /* CAGT */ + tnew[1+0*nbases+i] = *told++; + tnew[1+2*nbases+i] = *told++; + tnew[1+3*nbases+i] = *told++; + break; + default: + tnew[1+0*nbases+i] = *told++; /* ACGT */ + tnew[1+1*nbases+i] = *told++; + tnew[1+2*nbases+i] = *told++; + tnew[1+3*nbases+i] = *told++; + break; + } + } +#endif + + xfree(base); + + return (char *)tnew; +}