srf2fastq: srf2fastq/io_lib-1.12.2/io_lib/compression.c comparison

comparison srf2fastq/io_lib-1.12.2/io_lib/compression.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository

author	dawe
date	Tue, 07 Jun 2011 17:48:05 -0400
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:d901c9f41a6a
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <zlib.h>
+#include <assert.h>
+#include <math.h>
+#include <ctype.h>
+#ifndef M_PI
+#  define M_PI 3.14159265358979323846
+#endif
+#include "io_lib/ztr.h"
+#include "io_lib/os.h"
+#include "io_lib/compression.h"
+#include "io_lib/xalloc.h"
+#ifndef NDEBUG
+#  define NDEBUG
+#endif
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_ZLIB
+* ---------------------------------------------------------------------------
+*/
+/*
+* Some comments on zlib usage.
+*
+* - Ideally for trace data, after decorrelation, we should use Z_FILTERED.
+*   Empirical studies show that this gives the best compression ratio, but
+*   it is slow (about the same speed as normal gzip). MUCH faster is huffman
+*   only, and it doesn't give radically different compression ratios.
+*
+* - When compressing using Z_HUFFMAN_ONLY we used compression level
+*   '1' as this invokes the deflate_fast() algorithm. It makes no
+*   difference to the compression level, but it seems to be quicker still.
+*
+*/
+/*
+* zlib_huff()
+*
+* Compresses data using huffman encoding, as implemented by zlib.
+*
+* Arguments:
+*	uncomp		Uncompressed input data
+*	uncomp_len	Length of uncomp data
+*	comp_len	Output: length of compressed data
+*
+* Returns:
+*	Compressed data if successful
+*	NULL if not successful
+*/
+char *zlib_huff(char *uncomp, int uncomp_len, int strategy, int *comp_len) {
+z_stream zstr;
+int err;
+int comp_len_tmp = (int)(uncomp_len * 1.001 + 12); /* Maximum expansion size */
+char *comp = (char *)xmalloc(comp_len_tmp+5);
+int c_len;
+/* Initialise zlib */
+zstr.zalloc = (alloc_func)0;
+zstr.zfree = (free_func)0;
+zstr.opaque = (voidpf)0;
+if ((err = deflateInit2(&zstr, 1, Z_DEFLATED, 15, 8, strategy)) != Z_OK) {
+	fprintf(stderr, "zlib errror in deflateInit2(): %d\n", err);
+	return NULL;
+}
+/* Set up input and output buffers */
+zstr.next_in = (unsigned char *)uncomp;
+zstr.avail_in = uncomp_len;
+zstr.next_out = (unsigned char *)comp+5;
+zstr.avail_out = comp_len_tmp;
+/* Do the compression */
+if ((err = deflate(&zstr, Z_FINISH)) != Z_STREAM_END) {
+	fprintf(stderr, "zlib errror in deflate(): %d\n", err);
+	return NULL;
+}
+/* Tidy up */
+deflateEnd(&zstr);
+c_len = zstr.total_out;
+/* Return */
+comp[0] = ZTR_FORM_ZLIB;
+comp[1] = (uncomp_len >>  0) & 0xff;
+comp[2] = (uncomp_len >>  8) & 0xff;
+comp[3] = (uncomp_len >> 16) & 0xff;
+comp[4] = (uncomp_len >> 24) & 0xff;
+if (comp_len)
+	*comp_len = c_len+5;
+return comp;
+}
+/*
+* zlib_dehuff()
+*
+* Uncompresses data using huffman encoding, as implemented by zlib.
+*
+* Arguments:
+*	comp		Compressed input data
+*	comp_len	Length of comp data
+*	uncomp_len	Output: length of uncompressed data
+*
+* Returns:
+*	Uncompressed data if successful
+*	NULL if not successful
+*/
+char *zlib_dehuff(char *comp, int comp_len, int *uncomp_len) {
+z_stream zstr;
+int err;
+char *uncomp;
+int ulen;
+/* Allocate */
+ulen =
+	((unsigned char)comp[1] <<  0) +
+	((unsigned char)comp[2] <<  8) +
+	((unsigned char)comp[3] << 16) +
+	((unsigned char)comp[4] << 24);
+uncomp = (char *)xmalloc(ulen);
+/* Initialise zlib */
+zstr.zalloc = (alloc_func)0;
+zstr.zfree = (free_func)0;
+zstr.opaque = (voidpf)0;
+if ((err = inflateInit(&zstr)) != Z_OK) {
+	fprintf(stderr, "zlib errror in inflateInit(): %d\n", err);
+	return NULL;
+}
+/* Set up input and output buffers */
+zstr.next_in = (unsigned char *)comp+5;
+zstr.avail_in = comp_len-5;
+zstr.next_out = (unsigned char *)uncomp;
+zstr.avail_out = ulen;
+/* Do the decompression */
+if ((err = inflate(&zstr, Z_FINISH)) != Z_STREAM_END) {
+	fprintf(stderr, "zlib errror in deflate(): %d\n", err);
+	return NULL;
+}
+/* Tidy up */
+inflateEnd(&zstr);
+if (uncomp_len)
+	*uncomp_len = ulen;
+return uncomp;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_RLE
+* ---------------------------------------------------------------------------
+*/
+/*
+* Run length encoding.
+*
+* Any run of 3 or more identical characters (up to 255 in a row) are replaced
+* by a 'guard' byte followed by the number of characters followed by
+* the character value itself.
+* Any single guard value in the input is escaped using 'guard 0'.
+*
+* Specifying guard as -1 will automatically pick one of the least used
+* characters in the input as the guard.
+*
+* Arguments:
+*	uncomp		Input data
+*	uncomp_len	Length of input data 'uncomp'
+*	guard		Guard byte - used to encode "N" copies of data
+*	comp_len	Output: length of compressed data
+*
+* Returns:
+*	Compressed data if successful
+*	NULL if not successful
+*/
+char *rle(char *uncomp, int uncomp_len, int guard, int *comp_len) {
+int i, k, c_len = 0;
+char *comp = xmalloc(2 * uncomp_len + 6);
+char *out = comp + 6;
+/* A guard of -1 implies to search for the least frequent symbol */
+if (guard == -1) {
+	int cnt[256];
+	int bestcnt = uncomp_len + 1;
+	for (i = 0; i < 256; i++)
+	    cnt[i] = 0;
+	for (i = 0; i < uncomp_len; i++) {
+	    cnt[(unsigned char)uncomp[i]]++;
+	}
+	for (i = 0; i < 256; i++) {
+	    if (cnt[i] < bestcnt) {
+		bestcnt = cnt[i];
+		guard = i;
+	    }
+	}
+}
+for (i = 0; i < uncomp_len; i=k) {
+	/*
+	 * Detect blocks of up identical bytes up to 255 bytes long.
+	 */
+	for (k = i; k < uncomp_len && uncomp[i] == uncomp[k]; k++)
+	    if (k-i == 255)
+		break;
+	/* 1, 2 or 3 bytes are best stored "as is" */
+	if (k-i < 4) {
+	    do {
+		/*
+		 * If we find 'guard' in our sequence, escape it by
+		 * outputting 'guard' <zero>. (We know that we'll never
+		 * write out zero copies of a token in our rle compression
+		 * algorithm.)
+		 */
+		if ((unsigned char)(uncomp[i]) == guard) {
+		    out[c_len++] = guard;
+		    out[c_len++] = 0;
+		} else {
+		    out[c_len++] = uncomp[i];
+		}
+		i++;
+	    } while (k >= i+1);
+	} else {
+	    /* More than 3 bytes: store as ('guard', length, byte value) */
+	    out[c_len++] = guard;
+	    out[c_len++] = k-i;
+	    out[c_len++] = uncomp[i];
+	}
+}
+/* Return */
+comp[0] = ZTR_FORM_RLE;
+comp[1] = (uncomp_len >>  0) & 0xff;
+comp[2] = (uncomp_len >>  8) & 0xff;
+comp[3] = (uncomp_len >> 16) & 0xff;
+comp[4] = (uncomp_len >> 24) & 0xff;
+comp[5] = guard;
+if (comp_len)
+	*comp_len = c_len+6;
+return comp;
+}
+/*
+* Reverses run length encoding.
+*
+* Arguments:
+*	comp		Compressed input data
+*	comp_len	Length of comp data
+*	uncomp_len	Output: length of uncompressed data
+*
+* Returns:
+*	Uncompressed data if successful
+*	NULL if not successful
+*/
+/* ARGSUSED */
+char *unrle(char *comp, int comp_len, int *uncomp_len) {
+int in_i, out_i, i, val, count, out_len;
+char *uncomp;
+unsigned char *in = (unsigned char *)comp+6;
+char *out;
+int guard = (unsigned char)comp[5];
+/* Allocate */
+out_len =
+	((unsigned char)comp[1] <<  0) +
+	((unsigned char)comp[2] <<  8) +
+	((unsigned char)comp[3] << 16) +
+	((unsigned char)comp[4] << 24);
+out = uncomp = (char *)xmalloc(out_len);
+for (in_i = out_i = 0; out_i < out_len; in_i++) {
+	if (in[in_i] != guard) {
+	    /* When not 'guard' it's easy - just output this token */
+	    assert(out_i >= 0 && out_i < out_len);
+	    out[out_i++] = in[in_i];
+	} else {
+	    /*
+	     * Found an 'guard' token. If next token is zero, then
+	     * we were simply escaping a real 'guard' token in the input
+	     * data, otherwise output a string of bytes.
+	     */
+	    count = in[++in_i];
+	    if (count != 0) {
+		val = in[++in_i];
+		for (i = 0; i < count; i++) {
+		    assert(out_i >= 0 && out_i < out_len);
+		    out[out_i++] = val;
+		}
+	    } else {
+		assert(out_i >= 0 && out_i < out_len);
+		out[out_i++] = guard;
+	    }
+	}
+}
+if (uncomp_len)
+	*uncomp_len = out_len;
+return uncomp;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_XRLE
+* ---------------------------------------------------------------------------
+*/
+/*
+* Mutli-byte run length encoding.
+*
+* Any run of 3 or more identical characters (up to 255 in a row) are replaced
+* by a 'guard' byte followed by the number of characters followed by
+* the character value itself.
+* Any single guard value in the input is escaped using 'guard 0'.
+*
+* Specifying guard as -1 will automatically pick one of the least used
+* characters in the input as the guard.
+*
+* Arguments:
+*	uncomp		Input data
+*	uncomp_len	Length of input data 'uncomp'
+*	guard		Guard byte - used to encode "N" copies of data
+*      rsz             Size of blocks to compare for run checking.
+*	comp_len	Output: length of compressed data
+*
+* Returns:
+*	Compressed data if successful
+*	NULL if not successful
+*/
+char *xrle(char *uncomp, int uncomp_len, int guard, int rsz, int *comp_len) {
+char *comp = (char *)malloc(2 * uncomp_len + 3);
+char *out = comp;
+int i, j, k;
+/* A guard of -1 implies to search for the least frequent symbol */
+if (guard == -1) {
+	int cnt[256];
+	int bestcnt = uncomp_len + 1;
+	for (i = 0; i < 256; i++)
+	    cnt[i] = 0;
+	for (i = 0; i < uncomp_len; i++) {
+	    cnt[(unsigned char)uncomp[i]]++;
+	}
+	for (i = 0; i < 256; i++) {
+	    if (cnt[i] < bestcnt) {
+		bestcnt = cnt[i];
+		guard = i;
+	    }
+	}
+}
+*out++ = ZTR_FORM_XRLE;
+*out++ = rsz;
+*out++ = guard;
+for (i = 0; i < uncomp_len; i = k) {
+	/* Count repeats */
+	k = i + rsz;
+	while (k <= uncomp_len - rsz && !memcmp(&uncomp[i], &uncomp[k], rsz)) {
+	       k += rsz;
+	       if ((k-i)/rsz == 255)
+		   break;
+	}
+	if (k-i > rsz) {
+	    /* Duplicates, so RLE */
+	    *out++ = guard;
+	    *out++ = (k-i)/rsz;
+	    for (j = 0; j < rsz; j++)
+		*out++ = uncomp[i+j];
+	} else {
+	    /* No dups, store as is escaping guarding as appropriate */
+	    if ((unsigned char)(uncomp[i]) == guard) {
+		*out++ = guard;
+		*out++ = 0;
+	    } else {
+		*out++ = uncomp[i];
+	    }
+	    k = i+1;
+	}
+}
+*comp_len = out-comp;
+return comp;
+}
+/*
+* Reverses multi-byte run length encoding.
+*
+* Arguments:
+*	comp		Compressed input data
+*	comp_len	Length of comp data
+*	uncomp_len	Output: length of uncompressed data
+*
+* Returns:
+*	Uncompressed data if successful
+*	NULL if not successful
+*/
+char *unxrle(char *comp, int comp_len, int *uncomp_len) {
+char *uncomp;
+char *out;
+int rsz = (unsigned char)comp[1];
+int guard = (unsigned char)comp[2];
+unsigned char *in;
+int unclen = 0, cpos, len;
+/* Calculate uncompressed length */
+for (in = (unsigned char *)comp, cpos = 3; cpos < comp_len; unclen++) {
+	if (in[cpos++] == guard) {
+	    if ((len = in[cpos++])) {
+		cpos += rsz;
+		unclen += len*rsz -1;
+	    }
+	}
+}
+*uncomp_len = unclen;
+/* Expand */
+uncomp = out = (char *)malloc(unclen+1);
+for (in = (unsigned char *)comp, cpos = 3; cpos < comp_len;) {
+	char c;
+	if ((c = in[cpos++]) != guard) {
+	    *out++ = c;
+	} else {
+	    int len = in[cpos++];
+	    if (len) {
+		int i, j;
+		for (i = 0; i < len; i++) {
+		    for (j = 0; j < rsz; j++) {
+			*out++ = in[cpos+j];
+		    }
+		}
+		cpos += rsz;
+	    } else {
+		*out++ = guard;
+	    }
+	}
+}
+*out++ = 0;
+return uncomp;
+}
+/*
+* Mutli-byte run length encoding.
+*
+* Steps along in words of size 'rsz'. Unlike XRLE above this does run-length
+* encoding by writing out an additional "length" word every time 2 or more
+* words in a row are spotted. This removes the need for a guard byte.
+*
+* Additionally this method ensures that both input and output formats remain
+* aligned on words of size 'rsz'.
+*
+* Arguments:
+*	uncomp		Input data
+*	uncomp_len	Length of input data 'uncomp'
+*      rsz             Size of blocks to compare for run checking.
+*	comp_len	Output: length of compressed data
+*
+* Returns:
+*	Compressed data if successful
+*	NULL if not successful
+*/
+char *xrle2(char *uncomp, int uncomp_len, int rsz, int *comp_len) {
+char *comp = (char *)malloc(1.4*uncomp_len + rsz);
+char *last = uncomp;
+char *out = comp;
+int i, j, k, run_len = 0;
+*out++ = ZTR_FORM_XRLE2;
+*out++ = rsz;
+for (i = 2; i < rsz; i++)
+	*out++ = -40;
+/* FIXME: how to deal with uncomp_len not being a multiple of rsz */
+for (i = 0; i < uncomp_len; i += rsz) {
+	/* FIXME: use inline #def versions of memcmp/memcpy for speed? */
+	memcpy(out,  &uncomp[i], rsz);
+	out += rsz;
+	if (memcmp(last, &uncomp[i], rsz) == 0) {
+	    run_len++;
+	} else {
+	    run_len = 1;
+	    last = &uncomp[i];
+	}
+	/* NB: >= 3 is more optimal in many cases */
+	if (run_len >= 2) {
+	    /* Count remaining copies */
+	    for (k = i+rsz; k < uncomp_len && run_len < 257; k += rsz) {
+		if (memcmp(last, &uncomp[k], rsz) != 0)
+		    break;
+		run_len++;
+	    }
+	    run_len -= 2;
+	    *out++ = run_len;
+	    for (j = 1; j < rsz; j++) {
+		/* Padding with last reduces entropy compared to padding
+		 * with copies of run_len
+		 */
+		*out++ = last[j];
+	    }
+	    i = k-rsz;
+	    last = out-rsz;
+	    run_len = 0;
+	}
+}
+*comp_len = out-comp;
+return comp;
+}
+/*
+* Reverses multi-byte run length encoding (xrle_new).
+*
+* Arguments:
+*	comp		Compressed input data
+*	comp_len	Length of comp data
+*	uncomp_len	Output: length of uncompressed data
+*
+* Returns:
+*	Uncompressed data if successful
+*	NULL if not successful
+*/
+char *unxrle2(char *comp, int comp_len, int *uncomp_len) {
+char *out, *last;
+int out_len, out_alloc, rsz, i, j, run_len;
+out_alloc = comp_len*2; /* just an estimate */
+out_len = 0;
+if (NULL == (out = (char *)malloc(out_alloc)))
+	return NULL;
+if (*comp++ != ZTR_FORM_XRLE2)
+	return NULL;
+/* Read rsz and swallow padding */
+rsz = *comp++;
+comp_len -= 2;
+for (i = 2; i < rsz; i++) {
+	comp++;
+	comp_len--;
+}
+/* Uncompress */
+run_len = 0;
+last = comp;
+for (i = 0; i < comp_len;) {
+	while (out_len + rsz > out_alloc) {
+	    out_alloc *= 2;
+	    if (NULL == (out = (char *)realloc(out, out_alloc)))
+		return NULL;
+	}
+	memcpy(&out[out_len], &comp[i], rsz);
+	if (memcmp(&out[out_len], last, rsz) == 0) {
+	    run_len++;
+	} else {
+	    run_len = 1;
+	}
+	i += rsz;
+	out_len += rsz;
+	/* NB: >= 3 is more optimal in many cases */
+	if (run_len >= 2) {
+	    /* Count remaining copies */
+	    run_len = (unsigned char)comp[i];
+	    i += rsz;
+	    while (out_len + run_len * rsz > out_alloc) {
+		out_alloc *= 2;
+		if (NULL == (out = (char *)realloc(out, out_alloc)))
+		    return NULL;
+	    }
+	    for (j = 0; j < run_len; j++) {
+		memcpy(&out[out_len], last, rsz);
+		out_len += rsz;
+	    }
+	    run_len = 0;
+	}
+	last = &comp[i-rsz];
+}
+/* Shrink back down to avoid excessive memory usage */
+out = realloc(out, out_len);
+*uncomp_len = out_len;
+return out;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_DELTA1
+* ---------------------------------------------------------------------------
+*/
+/*
+* This replaces 'samples' with successive differences between samples.
+* These implementations support 'level's of 1, 2 and 3.
+*
+* NB: This is analogous to our SCF delta_samples1 (etc) function, except that
+* this function about 40% faster.
+*
+* Implementation details taken from Jean Thierry-Mieg's CTF code.
+*/
+/*
+* decorrelate1()
+*
+* Produce successive deltas from a 1-byte array.
+*
+* Arguments:
+*	uncomp		Uncompressed data
+*	uncomp_len	Length of uncompressed data
+*	level		Differencing level (must be 1, 2 or 3)
+*	comp_len	Return: where to store new compressed length
+*
+* Returns:
+*	Success: A decorrelated buffer (malloced)
+*	Failure: NULL
+*/
+char *decorrelate1(char *x_uncomp,
+		   int uncomp_len,
+		   int level,
+		   int *comp_len) {
+int i, z;
+int u1 = 0, u2 = 0, u3 = 0;
+char *comp = (char *)xmalloc(uncomp_len + 2);
+unsigned char *u_uncomp = (unsigned char *)x_uncomp;
+if (!comp)
+	return NULL;
+comp+=2;
+switch (level) {
+case 1:
+	for (i = 0; i < uncomp_len; i++) {
+	    z = u1;
+	    u1 = u_uncomp[i];
+	    comp[i] = u_uncomp[i] - z;
+	}
+	break;
+case 2:
+	for (i = 0; i < uncomp_len; i++) {
+	    z = 2*u1 - u2;
+	    u2 = u1;
+	    u1 = u_uncomp[i];
+	    comp[i] = u_uncomp[i] - z;
+	}
+	break;
+case 3:
+	for (i = 0; i < uncomp_len; i++) {
+	    z = 3*u1 - 3*u2 + u3;
+	    u3 = u2;
+	    u2 = u1;
+	    u1 = u_uncomp[i];
+	    comp[i] = u_uncomp[i] - z;
+	}
+	break;
+default:
+	return NULL;
+}
+comp-=2;
+comp[0] = ZTR_FORM_DELTA1;
+comp[1] = level;
+*comp_len = uncomp_len+2;
+return comp;
+}
+#define ABS(a) ((a) > 0 ? (a) : -(a))
+/* ZTR_FORM_DDELTA1 - experimental */
+char *decorrelate1dyn(char *x_uncomp,
+		      int uncomp_len,
+		      int *comp_len) {
+int i, j, z[4];
+int u1 = 0, u2 = 0, u3 = 0;
+char *comp = (char *)xmalloc(uncomp_len + 2);
+unsigned char *u_uncomp = (unsigned char *)x_uncomp;
+int level = 3; /* default level */
+int last_level = level;
+int best;
+if (!comp)
+	return NULL;
+comp+=2;
+for (i = 0; i < uncomp_len; i++) {
+	z[1] = u1;
+	z[2] = 2*u1 - u2;
+	z[3] = 3*u1 - 3*u2 + u3;
+	comp[i] = u_uncomp[i] - z[last_level];
+	best = 10000;
+	for (j = 1; j < 3; j++) {
+	    if (ABS(u_uncomp[i] - z[j]) < best) {
+		best = ABS(u_uncomp[i] - z[j]);
+		last_level = j;
+	    }
+	}
+	u3 = u2;
+	u2 = u1;
+	u1 = u_uncomp[i];
+}
+comp-=2;
+comp[0] = ZTR_FORM_DDELTA1;
+comp[1] = level;
+*comp_len = uncomp_len+2;
+return comp;
+}
+/*
+* recorrelate1()
+*
+* The reverse of decorrelate1()
+*
+* Arguments:
+*	comp		Compressed input data
+*	comp_len	Length of comp data
+*	uncomp_len	Output: length of uncompressed data
+*
+* Returns:
+*	Success: uncompressed data
+*	Failure: NULL
+*/
+char *recorrelate1(char *x_comp,
+		   int comp_len,
+		   int *uncomp_len) {
+int i, z;
+int u1 = 0, u2 = 0, u3 = 0;
+int level = x_comp[1];
+char *uncomp;
+uncomp = (char *)xmalloc(comp_len-2);
+if (!uncomp)
+	return NULL;
+x_comp+=2;
+comp_len-=2;
+*uncomp_len = comp_len;
+switch (level) {
+case 1:
+	for (i = 0; i < comp_len; i++) {
+	    z = u1;
+	    u1 = uncomp[i] = x_comp[i] + z;
+	}
+	break;
+case 2:
+	for (i = 0; i < comp_len; i++) {
+	    z = 2*u1 - u2;
+	    u2 = u1;
+	    u1 = uncomp[i] = x_comp[i] + z;
+	}
+	break;
+case 3:
+	for (i = 0; i < comp_len; i++) {
+	    z = 3*u1 - 3*u2 + u3;
+	    u3 = u2;
+	    u2 = u1;
+	    u1 = uncomp[i] = x_comp[i] + z;
+	}
+	break;
+}
+return uncomp;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_DELTA2
+* ---------------------------------------------------------------------------
+*/
+/*
+* decorrelate2()
+*
+* Produce successive deltas from a 2-byte array (big endian)
+*
+* Arguments:
+*	uncomp		Uncompressed data
+*	uncomp_len	Length of uncompressed data
+*	level		Differencing level (must be 1, 2 or 3)
+*	comp_len	Return: where to store new compressed length
+*
+* Returns:
+*	Success: A decorrelated buffer (malloced)
+*	Failure: NULL
+*/
+char *decorrelate2(char *x_uncomp,
+		   int uncomp_len,
+		   int level,
+		   int *comp_len) {
+int i, z, delta;
+int u1 = 0, u2 = 0, u3 = 0;
+char *comp = (char *)xmalloc(uncomp_len + 2);
+unsigned char *u_uncomp = (unsigned char *)x_uncomp;
+if (!comp)
+	return NULL;
+comp+=2;
+switch (level) {
+case 1:
+	for (i = 0; i < uncomp_len; i+=2) {
+	    z = u1;
+	    u1 = (u_uncomp[i] << 8) + u_uncomp[i+1];
+	    delta = u1 - z;
+	    comp[i  ] = (delta >> 8) & 0xff;
+	    comp[i+1] = (delta >> 0) & 0xff;
+	}
+	break;
+case 2:
+	for (i = 0; i < uncomp_len; i+=2) {
+	    z = 2*u1 - u2;
+	    u2 = u1;
+	    u1 = (u_uncomp[i] << 8) + u_uncomp[i+1];
+	    delta = u1 - z;
+	    comp[i  ] = (delta >> 8) & 0xff;
+	    comp[i+1] = (delta >> 0) & 0xff;
+	}
+	break;
+case 3:
+	for (i = 0; i < uncomp_len; i+=2) {
+	    z = 3*u1 - 3*u2 + u3;
+	    u3 = u2;
+	    u2 = u1;
+	    u1 = (u_uncomp[i] << 8) + u_uncomp[i+1];
+	    delta = u1 - z;
+	    comp[i  ] = (delta >> 8) & 0xff;
+	    comp[i+1] = (delta >> 0) & 0xff;
+	}
+	break;
+default:
+	return NULL;
+}
+comp-=2;
+comp[0] = ZTR_FORM_DELTA2;
+comp[1] = level;
+*comp_len = uncomp_len+2;
+return comp;
+}
+char *decorrelate2dyn(char *x_uncomp,
+		      int uncomp_len,
+		      int *comp_len) {
+int i, j, z[4];
+int u1 = 0, u2 = 0, u3 = 0;
+char *comp = (char *)xmalloc(uncomp_len + 2);
+unsigned char *u_uncomp = (unsigned char *)x_uncomp;
+int level = 2; /* minimum level */
+int last_level = level;
+int best;
+if (!comp)
+	return NULL;
+comp+=2;
+for (i = 0; i < uncomp_len; i+=2) {
+	z[0] = 0;
+	z[1] = u1;
+	z[2] = 2*u1 - u2;
+	z[3] = 3*u1 - 3*u2 + u3;
+	u3 = u2;
+	u2 = u1;
+	u1 = (u_uncomp[i]<<8) + u_uncomp[i+1];
+	comp[i  ] = ((u1 - z[last_level]) >> 8) & 0xff;
+	comp[i+1] = ((u1 - z[last_level]) >> 0) & 0xff;
+	best = 10000;
+	for (j = level; j < 4; j++) {
+	    if (ABS(u1 - z[j]) < best) {
+		best = ABS(u1 - z[j]);
+		last_level = j;
+	    }
+	}
+}
+comp-=2;
+comp[0] = ZTR_FORM_DDELTA2;
+comp[1] = level;
+*comp_len = uncomp_len+2;
+return comp;
+}
+/*
+* recorrelate2()
+*
+* The reverse of decorrelate2()
+*
+* Arguments:
+*	comp		Compressed input data
+*	comp_len	Length of comp data
+*	uncomp_len	Output: length of uncompressed data
+*
+* Returns:
+*	Success: uncompressed data
+*	Failure: NULL
+*/
+char *recorrelate2(char *x_comp,
+		   int comp_len,
+		   int *uncomp_len) {
+int i, z;
+int u1 = 0, u2 = 0, u3 = 0;
+int level = x_comp[1];
+char *uncomp;
+unsigned char *u_comp = (unsigned char *)x_comp;
+uncomp = (char *)xmalloc(comp_len-2);
+if (!uncomp)
+	return NULL;
+u_comp+=2;
+comp_len-=2;
+*uncomp_len = comp_len;
+switch (level) {
+case 1:
+	for (i = 0; i < comp_len; i+=2) {
+	    z = u1;
+	    u1 = ((u_comp[i] << 8) | u_comp[i+1]) + z;
+	    uncomp[i  ] = (u1 >> 8) & 0xff;
+	    uncomp[i+1] = (u1 >> 0) & 0xff;
+	}
+	break;
+case 2:
+	for (i = 0; i < comp_len; i+=2) {
+	    z = 2*u1 - u2;
+	    u2 = u1;
+	    u1 = ((u_comp[i] << 8) | u_comp[i+1]) + z;
+	    uncomp[i  ] = (u1 >> 8) & 0xff;
+	    uncomp[i+1] = (u1 >> 0) & 0xff;
+	}
+	break;
+case 3:
+	for (i = 0; i < comp_len; i+=2) {
+	    z = 3*u1 - 3*u2 + u3;
+	    u3 = u2;
+	    u2 = u1;
+	    u1 = ((u_comp[i] << 8) | u_comp[i+1]) + z;
+	    uncomp[i  ] = (u1 >> 8) & 0xff;
+	    uncomp[i+1] = (u1 >> 0) & 0xff;
+	}
+	break;
+}
+return uncomp;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_DELTA4
+* ---------------------------------------------------------------------------
+*/
+/*
+* decorrelate4()
+*
+* Produce successive deltas from a 4-byte array (big endian)
+*
+* Arguments:
+*	uncomp		Uncompressed data
+*	uncomp_len	Length of uncompressed data
+*	level		Differencing level (must be 1, 2 or 3)
+*	comp_len	Return: where to store new compressed length
+*
+* Returns:
+*	Success: A decorrelated buffer (malloced)
+*	Failure: NULL
+*/
+char *decorrelate4(char *x_uncomp,
+		   int uncomp_len,
+		   int level,
+		   int *comp_len) {
+int i, z, delta;
+int u1 = 0, u2 = 0, u3 = 0;
+char *comp = (char *)xmalloc(uncomp_len + 4);
+unsigned char *u_uncomp = (unsigned char *)x_uncomp;
+if (!comp)
+	return NULL;
+comp+=4;
+switch (level) {
+case 1:
+	for (i = 0; i < uncomp_len; i+=4) {
+	    z = u1;
+	    u1 =(u_uncomp[i  ] << 24) +
+		(u_uncomp[i+1] << 16) +
+		(u_uncomp[i+2] <<  8) +
+		(u_uncomp[i+3] <<  0);
+	    delta = u1 - z;
+	    comp[i  ] = (delta >> 24) & 0xff;
+	    comp[i+1] = (delta >> 16) & 0xff;
+	    comp[i+2] = (delta >>  8) & 0xff;
+	    comp[i+3] = (delta >>  0) & 0xff;
+	}
+	break;
+case 2:
+	for (i = 0; i < uncomp_len; i+=4) {
+	    z = 2*u1 - u2;
+	    u2 = u1;
+	    u1 =(u_uncomp[i  ] << 24) +
+		(u_uncomp[i+1] << 16) +
+		(u_uncomp[i+2] <<  8) +
+		(u_uncomp[i+3] <<  0);
+	    delta = u1 - z;
+	    comp[i  ] = (delta >> 24) & 0xff;
+	    comp[i+1] = (delta >> 16) & 0xff;
+	    comp[i+2] = (delta >>  8) & 0xff;
+	    comp[i+3] = (delta >>  0) & 0xff;
+	}
+	break;
+case 3:
+	for (i = 0; i < uncomp_len; i+=4) {
+	    z = 3*u1 - 3*u2 + u3;
+	    u3 = u2;
+	    u2 = u1;
+	    u1 =(u_uncomp[i  ] << 24) +
+		(u_uncomp[i+1] << 16) +
+		(u_uncomp[i+2] <<  8) +
+		(u_uncomp[i+3] <<  0);
+	    delta = u1 - z;
+	    comp[i  ] = (delta >> 24) & 0xff;
+	    comp[i+1] = (delta >> 16) & 0xff;
+	    comp[i+2] = (delta >>  8) & 0xff;
+	    comp[i+3] = (delta >>  0) & 0xff;
+	}
+	break;
+default:
+	return NULL;
+}
+comp-=4;
+comp[0] = ZTR_FORM_DELTA4;
+comp[1] = level;
+comp[2] = 0; /* dummy - to align on 4-byte boundary */
+comp[3] = 0; /* dummy - to align on 4-byte boundary */
+*comp_len = uncomp_len+4;
+return comp;
+}
+/*
+* recorrelate4()
+*
+* The reverse of decorrelate4()
+*
+* Arguments:
+*	comp		Compressed input data
+*	comp_len	Length of comp data
+*	uncomp_len	Output: length of uncompressed data
+*
+* Returns:
+*	Success: uncompressed data
+*	Failure: NULL
+*/
+char *recorrelate4(char *x_comp,
+		   int comp_len,
+		   int *uncomp_len) {
+int i, z;
+int u1 = 0, u2 = 0, u3 = 0;
+int level = x_comp[1];
+char *uncomp;
+unsigned char *u_comp = (unsigned char *)x_comp;
+uncomp = (char *)xmalloc(comp_len-4);
+if (!uncomp)
+	return NULL;
+u_comp+=4;
+comp_len-=4;
+*uncomp_len = comp_len;
+switch (level) {
+case 1:
+	for (i = 0; i < comp_len; i+=4) {
+	    z = u1;
+	    u1 = z +
+		((u_comp[i  ] << 24) |
+		 (u_comp[i+1] << 16) |
+		 (u_comp[i+2] <<  8) |
+		  u_comp[i+3]);
+	    uncomp[i  ] = (u1 >> 24) & 0xff;
+	    uncomp[i+1] = (u1 >> 16) & 0xff;
+	    uncomp[i+2] = (u1 >>  8) & 0xff;
+	    uncomp[i+3] = (u1 >>  0) & 0xff;
+	}
+	break;
+case 2:
+	for (i = 0; i < comp_len; i+=4) {
+	    z = 2*u1 - u2;
+	    u2 = u1;
+	    u1 = z +
+		((u_comp[i  ] << 24) |
+		 (u_comp[i+1] << 16) |
+		 (u_comp[i+2] <<  8) |
+		  u_comp[i+3]);
+	    uncomp[i  ] = (u1 >> 24) & 0xff;
+	    uncomp[i+1] = (u1 >> 16) & 0xff;
+	    uncomp[i+2] = (u1 >>  8) & 0xff;
+	    uncomp[i+3] = (u1 >>  0) & 0xff;
+	}
+	break;
+case 3:
+	for (i = 0; i < comp_len; i+=4) {
+	    z = 3*u1 - 3*u2 + u3;
+	    u3 = u2;
+	    u2 = u1;
+	    u1 = z +
+		((u_comp[i  ] << 24) |
+		 (u_comp[i+1] << 16) |
+		 (u_comp[i+2] <<  8) |
+		  u_comp[i+3]);
+	    uncomp[i  ] = (u1 >> 24) & 0xff;
+	    uncomp[i+1] = (u1 >> 16) & 0xff;
+	    uncomp[i+2] = (u1 >>  8) & 0xff;
+	    uncomp[i+3] = (u1 >>  0) & 0xff;
+	}
+	break;
+}
+return uncomp;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_16TO8
+* ---------------------------------------------------------------------------
+*/
+/*
+* shrink_16to8()
+*
+* Stores an array of 16-bit (big endian) array elements in an 8-bit array.
+* We assume that most 16-bit elements encode numbers that fit in an 8-bit
+* value. When not possible, we store a marker followed by the 16-bit value
+* stored as multiple 8-bit values.
+*
+*	uncomp		Uncompressed data
+*	uncomp_len	Length of uncompressed data (in bytes)
+*	comp_len	Return: where to store new compressed length
+*
+* Returns:
+*	Success: An 8-bit array (malloced)
+*	Failure: NULL
+*/
+char *shrink_16to8(char *x_uncomp, int uncomp_len, int *comp_len) {
+char *comp;
+int i, j, i16;
+signed char *s_uncomp = (signed char *)x_uncomp;
+/* Allocation - worst case is 3 * (uncomp_len/2) + 1 */
+if (NULL == (comp = (char *)xmalloc(3 * (uncomp_len/2) + 1)))
+	return NULL;
+comp[0] = ZTR_FORM_16TO8;
+for (i = 0, j = 1; i < uncomp_len; i+=2) {
+	i16 = (s_uncomp[i] << 8) | (unsigned char)s_uncomp[i+1];
+	if (i16 >= -127 && i16 <= 127) {
+	    comp[j++] = i16;
+	} else {
+	    comp[j++] = -128;
+	    comp[j++] = s_uncomp[i];
+	    comp[j++] = s_uncomp[i+1];
+	}
+}
+/* Reclaim unneeded memory */
+comp = xrealloc(comp, j);
+*comp_len = j;
+return comp;
+}
+/*
+* expand_8to16()
+*
+* The opposite of the shrink_16to8() function.
+*
+*	comp		Compressed input data
+*	comp_len	Length of comp data (in bytes)
+*	uncomp_len	Output: length of uncompressed data (in bytes)
+*
+* Returns:
+*	Success: Uncompressed data (char *)
+*	Failure: NULL
+*/
+char *expand_8to16(char *x_comp, int comp_len, int *uncomp_len) {
+int i, j;
+char *uncomp;
+signed char *s_comp = (signed char *)x_comp;
+/* Allocation - worst case is twice comp_len */
+if (NULL == (uncomp = (char *)xmalloc(comp_len*2)))
+	return NULL;
+#if 0
+for (i = 0, j = 1; j < comp_len; i+=2) {
+	if (s_comp[j] != -128) {
+	    uncomp[i  ] = s_comp[j] < 0 ? -1 : 0;
+	    uncomp[i+1] = s_comp[j++];
+	} else {
+	    j++;
+	    uncomp[i  ] = s_comp[j++];
+	    uncomp[i+1] = s_comp[j++];
+	}
+}
+#endif
+for (i = 0, j = 1; j < comp_len; i+=2) {
+	if (s_comp[j] >= 0) {
+	    uncomp[i  ] = 0;
+	    uncomp[i+1] = s_comp[j++];
+	} else {
+	    if (s_comp[j] != -128) {
+		uncomp[i+1] = s_comp[j++];
+		uncomp[i  ] = -1;
+	    } else {
+		j++;
+		uncomp[i  ] = s_comp[j++];
+		uncomp[i+1] = s_comp[j++];
+	    }
+	}
+}
+/* Reclaim unneeded memory */
+uncomp = xrealloc(uncomp, i);
+*uncomp_len = i;
+return uncomp;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_32TO8
+* ---------------------------------------------------------------------------
+*/
+/*
+* shrink_32to8()
+*
+* Stores an array of 32-bit (big endian) array elements in an 8-bit array.
+* We assume that most 32-bit elements encode numbers that fit in an 8-bit
+* value. When not possible, we store a marker followed by the 32-bit value
+* stored as multiple 8-bit values.
+*
+*	uncomp		Uncompressed data
+*	uncomp_len	Length of uncompressed data (in bytes)
+*	comp_len	Return: where to store new compressed length
+*
+* Returns:
+*	Success: An 8-bit array (malloced)
+*	Failure: NULL
+*/
+char *shrink_32to8(char *x_uncomp, int uncomp_len, int *comp_len) {
+char *comp;
+int i, j, i32;
+signed char *s_uncomp = (signed char *)x_uncomp;
+/* Allocation - worst case is 5 * (uncomp_len/4) + 1 */
+if (NULL == (comp = (char *)xmalloc(5 * (uncomp_len/4) + 1)))
+	return NULL;
+comp[0] = ZTR_FORM_32TO8;
+for (i = 0, j = 1; i < uncomp_len; i+=4) {
+	i32 = (s_uncomp[i] << 24) |
+	    (s_uncomp[i+1] << 16) |
+	    (s_uncomp[i+2] <<  8) |
+	    (unsigned char)s_uncomp[i+3];
+	if (i32 >= -127 && i32 <= 127) {
+	    comp[j++] = i32;
+	} else {
+	    comp[j++] = -128;
+	    comp[j++] = s_uncomp[i];
+	    comp[j++] = s_uncomp[i+1];
+	    comp[j++] = s_uncomp[i+2];
+	    comp[j++] = s_uncomp[i+3];
+	}
+}
+/* Reclaim unneeded memory */
+comp = xrealloc(comp, j);
+*comp_len = j;
+return comp;
+}
+/*
+* expand_8to32()
+*
+* The opposite of the shrink_32to8() function.
+*
+*	comp		Compressed input data
+*	comp_len	Length of comp data (in bytes)
+*	uncomp_len	Output: length of uncompressed data (in bytes)
+*
+* Returns:
+*	Success: Uncompressed data (char *)
+*	Failure: NULL
+*/
+char *expand_8to32(char *comp, int comp_len, int *uncomp_len) {
+int i, j;
+char *uncomp;
+signed char *s_comp = (signed char *)comp;
+/* Allocation - worst case is four times comp_len */
+if (NULL == (uncomp = (char *)xmalloc(comp_len*4)))
+	return NULL;
+for (i = 0, j = 1; j < comp_len; i+=4) {
+	if (s_comp[j] != -128) {
+	    uncomp[i  ] = s_comp[j] < 0 ? -1 : 0;
+	    uncomp[i+1] = s_comp[j] < 0 ? -1 : 0;
+	    uncomp[i+2] = s_comp[j] < 0 ? -1 : 0;
+	    uncomp[i+3] = s_comp[j++];
+	} else {
+	    j++;
+	    uncomp[i  ] = s_comp[j++];
+	    uncomp[i+1] = s_comp[j++];
+	    uncomp[i+2] = s_comp[j++];
+	    uncomp[i+3] = s_comp[j++];
+	}
+}
+/* Reclaim unneeded memory */
+uncomp = xrealloc(uncomp, i);
+*uncomp_len = i;
+return uncomp;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_FOLLOW1
+* ---------------------------------------------------------------------------
+*/
+static int follow_tab[256][256];
+char *follow1(char *x_uncomp,
+	      int uncomp_len,
+	      int *comp_len) {
+char *comp = (char *)xmalloc(uncomp_len + 256 + 1);
+unsigned char *u_uncomp = (unsigned char *)x_uncomp;
+signed char *s_uncomp = ((signed char *)x_uncomp);
+int i, j;
+char next[256];
+int count[256];
+if (!comp)
+	return NULL;
+/* Count di-freqs */
+memset(follow_tab, 0, 256*256*sizeof(int));
+#if 0
+for (i = 0; i < uncomp_len-1; i++)
+	follow_tab[u_uncomp[i]][u_uncomp[i+1]]++;
+/* Pick the most frequent next byte from the preceeding byte */
+for (i = 0; i < 256; i++) {
+	int bestval, bestind;
+	bestval = bestind = 0;
+	for (j = 0; j < 256; j++) {
+	    if (follow_tab[i][j] > bestval) {
+		bestval = follow_tab[i][j];
+		bestind = j;
+	    }
+	}
+	next[i] = bestind;
+}
+#endif
+memset(next, 0, 256*sizeof(*next));
+memset(count, 0, 256*sizeof(*count));
+/* Pick the most frequent next byte from the preceeding byte */
+for (i = 0; i < uncomp_len-1; ) {
+	int cur = u_uncomp[i];
+	int nxt = u_uncomp[++i];
+	int folcnt = ++follow_tab[cur][nxt];
+	if (folcnt > count[cur]) {
+	    count[cur] = folcnt;
+	    next[cur] = nxt;
+	}
+}
+j = 0;
+comp[j++] = ZTR_FORM_FOLLOW1;
+/* Output 'next' array */
+for (i = 0; i < 256; i++, j++)
+	comp[j] = next[i];
+/* Output new 'uncomp' as next['uncomp'] */
+comp[j++] = u_uncomp[0];
+for (i = 1; i < uncomp_len; i++, j++) {
+	comp[j] = next[u_uncomp[i-1]] - s_uncomp[i];
+}
+*comp_len = j;
+return comp;
+}
+char *unfollow1(char *x_comp,
+		int comp_len,
+		int *uncomp_len) {
+unsigned char *u_uncomp;
+int i, j;
+char next[256];
+unsigned char *u_comp = (unsigned char *)x_comp;
+signed   char *s_comp = (signed   char *)x_comp;
+u_uncomp = (unsigned char *)xmalloc(comp_len-256-1);
+if (!u_uncomp)
+	return NULL;
+/* Load next[] array */
+j = 1;
+for (i = 0; i < 256; i++, j++)
+	next[i] = u_comp[j];
+/* Replace comp[x] with next[comp[x-1]] - comp[x]*/
+u_uncomp[0] = u_comp[j++];
+comp_len -= 257;
+s_comp   += 257;
+for (i = 1; i < comp_len; i++) {
+	u_uncomp[i] = next[u_uncomp[i-1]] - s_comp[i];
+}
+*uncomp_len = i;
+return (char *)u_uncomp;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_CHEB445
+* ---------------------------------------------------------------------------
+*/
+#if 0
+/*
+* Compresses using chebyshev polynomials to predict the next peak.
+* Based on around 96 modern ABI files it compresses by around 5% (varied
+* between 3.9 and 6.6).
+*/
+/*
+* For now this has been disabled in favour of the integer version below
+* as we cannot guarantee all systems to have the same floating point
+* roundings, especially with the final conversion to integer.
+* (Also, for some unknown reason, the integer version produces smaller
+* files.)
+*/
+char *cheb445comp(char *uncomp, int uncomp_len, int *data_len)
+{
+int i, k, l, z;
+int datap;
+float frac[5];
+float fz[25];
+signed short *d16 = (signed short *)uncomp;
+int nwords = uncomp_len / 2;
+short *dptr = d16;
+signed short *data = (signed short *)malloc((nwords+1)*sizeof(short));
+data[0] = le_int2(ZTR_FORM_CHEB445);
+/* Check for boundary cases */
+if (nwords <= 4) {
+	switch (nwords) {
+	case 4:
+	    data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2]));
+	case 3:
+	    data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1]));
+	case 2:
+	    data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0]));
+	case 1:
+	    data[1] = be_int2(d16[0]);
+	}
+	*data_len = nwords*2;
+	return (char *)data;
+}
+/* First 4 values are just direct deltas */
+data[1] = be_int2(d16[0]);
+data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0]));
+data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1]));
+data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2]));
+datap = 5;
+/* Initialise - speeds up loop */
+for (k = 0; k < 5; k++) {
+	float kx = cos(M_PI*(k+0.5)/5)*1.5;
+	frac[k] = (kx + 1.5) - (int)(kx + 1.5);
+}
+for (z = l = 0; l < 5; l++) {
+	for (k = 0; k < 5; k++, z++) {
+	    fz[z] = 0.4 * cos(l * M_PI*(k+0.5)/5);
+	}
+}
+/* Loop */
+for (i = 0; i < nwords-4; i++) {
+	float dd, y = 10/3.0;
+	float f[5], coef[5];
+	signed short diff;
+	int p;
+	f[0] = be_int2(dptr[2])*frac[4] + be_int2(dptr[3])*frac[0];
+	f[1] = be_int2(dptr[2])*frac[3] + be_int2(dptr[3])*frac[1];
+	f[2] = be_int2(dptr[1])*frac[2] + be_int2(dptr[2])*frac[2];
+	f[3] = be_int2(dptr[0])*frac[1] + be_int2(dptr[1])*frac[3];
+	f[4] = be_int2(dptr[0])*frac[0] + be_int2(dptr[1])*frac[4];
+	for (z = l = 0; l < 5; l++, z+=5)
+	    coef[l] = f[0] * fz[z+0] +
+		      f[1] * fz[z+1] +
+		      f[2] * fz[z+2] +
+		      f[3] * fz[z+3] +
+		      f[4] * fz[z+4];
+	dd = y*coef[3]+coef[2];
+	p = 0.5 + 5/3.0*(y*dd-coef[3]+coef[1])-dd+coef[0]/2.0;
+	if (p < 0) p = 0;
+	diff = be_int2(dptr[4]) - p;
+	data[datap++] = be_int2(diff);
+	dptr++;
+}
+*data_len = datap*2;
+return (char *)data;
+}
+char *cheb445uncomp(char *comp, int comp_len, int *uncomp_len)
+{
+int i, k, l, z;
+float frac[5];
+float fz[25];
+signed short *d16 = (signed short *)comp;
+int nwords = comp_len / 2;
+signed short *data = (signed short *)xmalloc(comp_len);
+short *dptr = data, *dptr2 = d16;
+/* Check for boundary cases */
+if (nwords <= 3) {
+	switch (nwords) {
+	case 3:
+	    data[0] = be_int2(d16[1]);
+	    data[1] = be_int2(be_int2(d16[2])+be_int2(data[0]));
+	    data[2] = be_int2(be_int2(d16[3])+be_int2(data[1]));
+	    break;
+	case 2:
+	    data[0] = be_int2(d16[1]);
+	    data[1] = be_int2(be_int2(d16[2])+be_int2(data[0]));
+	    break;
+	case 1:
+	    data[0] = be_int2(d16[1]);
+	    break;
+	}
+	*uncomp_len = (nwords-1)*2;
+	return (char *)data;
+}
+/* First 3 values are just direct deltas */
+data[0] = be_int2(d16[1]);
+data[1] = be_int2(be_int2(d16[2])+be_int2(data[0]));
+data[2] = be_int2(be_int2(d16[3])+be_int2(data[1]));
+data[3] = be_int2(be_int2(d16[4])+be_int2(data[2]));
+dptr2 += 5;
+/* Initialise - speeds up loop */
+for (k = 0; k < 5; k++) {
+	float kx = cos(M_PI*(k+0.5)/5)*1.5;
+	frac[k] = (kx + 1.5) - (int)(kx + 1.5);
+}
+for (z = l = 0; l < 5; l++) {
+	for (k = 0; k < 5; k++, z++) {
+	    fz[z] = 0.4 * cos(l * M_PI*(k+0.5)/5);
+	}
+}
+/* Loop */
+for (i = 0; i < nwords-3; i++) {
+	float dd, y = 10/3.0;
+	float f[5], coef[5];
+	signed short diff;
+	int p;
+	f[0] = be_int2(dptr[2])*frac[4] + be_int2(dptr[3])*frac[0];
+	f[1] = be_int2(dptr[2])*frac[3] + be_int2(dptr[3])*frac[1];
+	f[2] = be_int2(dptr[1])*frac[2] + be_int2(dptr[2])*frac[2];
+	f[3] = be_int2(dptr[0])*frac[1] + be_int2(dptr[1])*frac[3];
+	f[4] = be_int2(dptr[0])*frac[0] + be_int2(dptr[1])*frac[4];
+	for (z = l = 0; l < 5; l++, z+=5)
+	    coef[l] = f[0] * fz[z+0] +
+		      f[1] * fz[z+1] +
+		      f[2] * fz[z+2] +
+		      f[3] * fz[z+3] +
+		      f[4] * fz[z+4];
+	dd = y*coef[3]+coef[2];
+	p = 0.5 + 5/3.0*(y*dd-coef[3]+coef[1])-dd+coef[0]/2.0;
+	if (p < 0) p = 0;
+	diff = be_int2(*dptr2) + p;
+	dptr[4] = be_int2(diff);
+	dptr++;
+	dptr2++;
+}
+*uncomp_len = (nwords-1)*2;
+return (char *)data;
+}
+#endif
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_ICHEB
+* ---------------------------------------------------------------------------
+*/
+/*
+* Integer versions of the chebyshev polynomial compressor. This uses
+* the polynomials to predict the next peak from the preceeding 3.
+* Tested on 100 ABI-3700, Megabace and Licor files it compressed by
+* around 7-8%. (Oddly this is slightly more than the floating point
+* version.)
+*
+* These require 32-bit integers and have code to make sure that arithmetic
+* does not overflow this.
+*/
+#define CH1 150
+#define CH2 105
+char *ichebcomp(char *uncomp, int uncomp_len, int *data_len)
+{
+int i, l, z;
+int datap;
+int frac[5] = {139,57,75,93,11};
+int fz[20] = {42, 42, 42, 42, 42,
+		  39, 24,  0,-24,-39,
+		  33,-12,-42,-12, 33,
+		  24,-39,  0, 39,-24};
+int dfac;
+signed short *d16 = (signed short *)uncomp;
+int nwords = uncomp_len / 2;
+short *dptr = d16;
+signed short *data = (signed short *)malloc((nwords+1)*sizeof(short));
+data[0] = le_int2(ZTR_FORM_ICHEB);
+/* Check for boundary cases */
+if (nwords <= 4) {
+	switch (nwords) {
+	case 4:
+	    data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2]));
+	case 3:
+	    data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1]));
+	case 2:
+	    data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0]));
+	case 1:
+	    data[1] = be_int2(d16[0]);
+	}
+	*data_len = nwords*2;
+	return (char *)data;
+}
+/* First 4 values are just direct deltas */
+data[1] = be_int2(d16[0]);
+data[2] = be_int2(be_int2(d16[1])-be_int2(d16[0]));
+data[3] = be_int2(be_int2(d16[2])-be_int2(d16[1]));
+data[4] = be_int2(be_int2(d16[3])-be_int2(d16[2]));
+datap = 5;
+/* Loop */
+for (i = 4; i < nwords; i++) {
+	int dd, f[5];
+	signed int coef[4];
+	signed short diff;
+	int p;
+	/*
+	 * FIXME: As an alternative to the range checking below, if we
+	 * scale dptr[X] such that it's never higher than 2800 then
+	 * the 32-bit arithmetic will never overflow. Practically speaking,
+	 * all observed ABI and Megabace files have vales up to 1600 only.
+	 */
+	/*
+	 * frac[N] is always paired with frac[4-N], summing to 1.0 - or
+	 * 150 when scaled.
+	 * Hence f[0] has range 0 to 65536*150.
+	 */
+	f[0] = ((unsigned short)be_int2(dptr[2]))*frac[4] +
+	       ((unsigned short)be_int2(dptr[3]))*frac[0];
+	f[1] = ((unsigned short)be_int2(dptr[2]))*frac[3] +
+	       ((unsigned short)be_int2(dptr[3]))*frac[1];
+	f[2] = ((unsigned short)be_int2(dptr[1]))*frac[2] +
+	       ((unsigned short)be_int2(dptr[2]))*frac[2];
+	f[3] = ((unsigned short)be_int2(dptr[0]))*frac[1] +
+	       ((unsigned short)be_int2(dptr[1]))*frac[3];
+	f[4] = ((unsigned short)be_int2(dptr[0]))*frac[0] +
+	       ((unsigned short)be_int2(dptr[1]))*frac[4];
+	/*
+	 * fz[z+0..5] sums to no more than 210 (5*42) and no less than 0.
+	 * Therefore coef[l] has range 0 to 65536*150*210, which (just)
+	 * fits in 31-bits, plus 1 for the sign.
+	 */
+	for (z = l = 0; l < 4; l++, z+=5)
+	    coef[l] = (f[0] * fz[z+0] +
+		       f[1] * fz[z+1] +
+		       f[2] * fz[z+2] +
+		       f[3] * fz[z+3] +
+		       f[4] * fz[z+4]);
+	/*
+	 * computing p requires at most a temporary variable of
+	 * 24.1 * coef, but coef may be a full 32-bit integer.
+	 * If coef is sufficiently close to cause an integer overflow then
+	 * we scale it down.
+	 */
+	{
+	    int max = 0;
+	    for (l = 0; l < 4; l++) {
+		if (max < ABS(coef[l]))
+		    max = ABS(coef[l]);
+	    }
+	    if (max > 1<<26) {
+		dfac = max / (1<<26) + 1;
+		for (l = 0; l < 4; l++)
+		    coef[l] /= dfac;
+	    } else {
+		dfac = 1;
+	    }
+	}
+	dd = (coef[3]/3)*10+coef[2];
+	p = ((((dd/3)*10-coef[3]+coef[1])/3)*5-dd+coef[0]/2)/(CH1*CH2);
+	p *= dfac;
+	if (p < 0) p = 0;
+	diff = be_int2(dptr[4]) - p;
+	data[datap++] = be_int2(diff);
+	dptr++;
+}
+*data_len = datap*2;
+return (char *)data;
+}
+char *ichebuncomp(char *comp, int comp_len, int *uncomp_len)
+{
+int i, l, z;
+int frac[5] = {139,57,75,93,11};
+int fz[20] = {42, 42, 42, 42, 42,
+		  39, 24,  0,-24,-39,
+		  33,-12,-42,-12, 33,
+		  24,-39,  0, 39,-24};
+signed short *d16 = (signed short *)comp;
+int nwords = comp_len / 2 - 1;
+signed short *data = (signed short *)xmalloc(comp_len);
+short *dptr = data, *dptr2 = d16;
+int dfac;
+/* Check for boundary cases */
+if (nwords <= 4) {
+	switch (nwords) {
+	case 4:
+	    data[0] = be_int2(d16[1]);
+	    data[1] = be_int2(be_int2(d16[2])+be_int2(data[0]));
+	    data[2] = be_int2(be_int2(d16[3])+be_int2(data[1]));
+	    data[3] = be_int2(be_int2(d16[4])+be_int2(data[2]));
+	    break;
+	case 3:
+	    data[0] = be_int2(d16[1]);
+	    data[1] = be_int2(be_int2(d16[2])+be_int2(data[0]));
+	    data[2] = be_int2(be_int2(d16[3])+be_int2(data[1]));
+	    break;
+	case 2:
+	    data[0] = be_int2(d16[1]);
+	    data[1] = be_int2(be_int2(d16[2])+be_int2(data[0]));
+	    break;
+	case 1:
+	    data[0] = be_int2(d16[1]);
+	    break;
+	}
+	*uncomp_len = nwords*2;
+	return (char *)data;
+}
+/* First 4 values are just direct deltas */
+data[0] = be_int2(d16[1]);
+data[1] = be_int2(be_int2(d16[2])+be_int2(data[0]));
+data[2] = be_int2(be_int2(d16[3])+be_int2(data[1]));
+data[3] = be_int2(be_int2(d16[4])+be_int2(data[2]));
+dptr2 += 5;
+/* Loop */
+for (i = 4; i < nwords; i++) {
+	int dd, coef[5], f[5];
+	signed short diff;
+	int p;
+	f[0] = ((unsigned short)be_int2(dptr[2]))*frac[4] +
+	       ((unsigned short)be_int2(dptr[3]))*frac[0];
+	f[1] = ((unsigned short)be_int2(dptr[2]))*frac[3] +
+	       ((unsigned short)be_int2(dptr[3]))*frac[1];
+	f[2] = ((unsigned short)be_int2(dptr[1]))*frac[2] +
+	       ((unsigned short)be_int2(dptr[2]))*frac[2];
+	f[3] = ((unsigned short)be_int2(dptr[0]))*frac[1] +
+	       ((unsigned short)be_int2(dptr[1]))*frac[3];
+	f[4] = ((unsigned short)be_int2(dptr[0]))*frac[0] +
+	       ((unsigned short)be_int2(dptr[1]))*frac[4];
+	for (z = l = 0; l < 4; l++, z+=5)
+	    coef[l] = f[0] * fz[z+0] +
+		      f[1] * fz[z+1] +
+		      f[2] * fz[z+2] +
+		      f[3] * fz[z+3] +
+		      f[4] * fz[z+4];
+	/*
+	 * computing p requires at most a temporary variable of
+	 * 24.1 * coef, but coef may be a full 32-bit integer.
+	 * If coef is sufficiently close to cause an integer overflow then
+	 * we scale it down.
+	 */
+	{
+	    int max = 0;
+	    for (l = 0; l < 4; l++) {
+		if (max < ABS(coef[l]))
+		    max = ABS(coef[l]);
+	    }
+	    if (max > 1<<26) {
+		dfac = max / (1<<26) + 1;
+		for (l = 0; l < 4; l++)
+		    coef[l] /= dfac;
+	    } else {
+		dfac = 1;
+	    }
+	}
+	dd = (coef[3]/3)*10+coef[2];
+	p = ((((dd/3)*10-coef[3]+coef[1])/3)*5-dd+coef[0]/2)/(CH1*CH2);
+	p *= dfac;
+	if (p < 0) p = 0;
+	diff = be_int2(*dptr2) + p;
+	dptr[4] = be_int2(diff);
+	dptr++;
+	dptr2++;
+}
+*uncomp_len = nwords*2;
+return (char *)data;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_LOG
+* ---------------------------------------------------------------------------
+*/
+/*
+* This is a LOSSY compression. It replaces N with 10 * log2(N).
+* (Just an idea, and not a great one it seems.)
+*/
+char *log2_data(char *x_uncomp,
+		int uncomp_len,
+		int *comp_len) {
+int i, u1, l1;
+char *comp = (char *)xmalloc(uncomp_len + 2);
+unsigned char *u_uncomp = (unsigned char *)x_uncomp;
+if (!comp)
+	return NULL;
+comp+=2;
+for (i = 0; i < uncomp_len; i+=2) {
+	u1 =(u_uncomp[i  ] <<  8) +
+	    (u_uncomp[i+1] <<  0);
+	l1 = (int)(10 * log(u1+1) / log(2));
+	comp[i  ] = (l1 >> 8) & 0xff;
+	comp[i+1] = (l1 >> 0) & 0xff;
+}
+comp-=2;
+comp[0] = ZTR_FORM_LOG2;
+comp[1] = 0; /* dummy - to align on 2-byte boundary */
+*comp_len = uncomp_len+2;
+return comp;
+}
+char *unlog2_data(char *x_comp,
+		  int comp_len,
+		  int *uncomp_len) {
+int i, u1, l1;
+char *uncomp;
+unsigned char *u_comp = (unsigned char *)x_comp;
+uncomp = (char *)xmalloc(comp_len-2);
+if (!uncomp)
+	return NULL;
+u_comp+=2;
+comp_len-=2;
+*uncomp_len = comp_len;
+for (i = 0; i < comp_len; i+=2) {
+	l1 = ((u_comp[i  ] << 8) |
+	      (u_comp[i+1] << 0));
+	u1 = (int)pow(2.0, l1/10.0)-1;
+	uncomp[i  ] = (u1 >> 8) & 0xff;
+	uncomp[i+1] = (u1 >> 0) & 0xff;
+}
+return uncomp;
+}
+/*
+* ---------------------------------------------------------------------------
+* ZTR_FORM_STHUFF
+* ---------------------------------------------------------------------------
+*/
+/*
+* Implements compression using a set of static huffman codes stored using
+* the Deflate algorithm (and so in this respect it's similar to zlib).
+*
+* The huffman codes though can be previously stored in the ztr object
+* using ztr_add_hcode(). "cset" indicates which numbered stored huffman
+* code set is to be used, or passing zero will use inline codes (ie they
+* are stored in the data stream itself, just as in standard deflate).
+*
+* Arguments:
+*	ztr		ztr_t pointer; used to find stored code-sets
+*	uncomp		The uncompressed input data
+*	uncomp_len	Length of uncomp
+*	cset		Stored code-set number, zero for inline
+*	recsz		Record size - only used when cset == 0.
+*	comp_len	Output: length of compressed data
+*
+* Returns:
+*	Compressed data stream if successful + comp_len
+*      NULL on failure
+*/
+char *sthuff(ztr_t *ztr, char *uncomp, int uncomp_len,
+	     int cset, int recsz, int *comp_len) {
+block_t *blk = block_create(NULL, 2);
+unsigned char bytes[2];
+huffman_codeset_t *c = NULL;
+unsigned char *comp = NULL;
+ztr_hcode_t *hc = NULL;
+if (cset >= CODE_USER) {
+	if (NULL == (hc = ztr_find_hcode(ztr, cset)))
+	    return NULL;
+	c = hc->codes;
+} else if (cset != CODE_INLINE) {
+	c = generate_code_set(cset, 1, NULL, 0, 1, MAX_CODE_LEN, 0);
+}
+if (!c) {
+	/* No cached ones found, so inline some instead */
+	cset = 0;
+	c = generate_code_set(0, recsz, (unsigned char *)uncomp, uncomp_len, 1,
+			      MAX_CODE_LEN, 0);
+}
+bytes[0] = ZTR_FORM_STHUFF;
+bytes[1] = cset;
+store_bytes(blk, bytes, 2);
+if (hc) {
+	if (!c->blk) {
+	    c->blk = block_create(NULL, 2);
+	    store_codes(c->blk, c, 1);
+	}
+	blk->bit = c->blk->bit;
+} else {
+	store_codes(blk, c, 1);
+}
+/*
+{int i;
+for (i = 0; i < c->ncodes; i++) {
+	output_code_set(stderr, c->codes[i]);
+}}
+*/
+/*
+* Unless CODE_INLINE, all we wanted to know is what bit number
+* to start on. The above is therefore somewhat inefficient.
+*/
+if (cset != 0) {
+	blk->byte = 2;
+	memset(&blk->data[2], 0, blk->alloc - 2);
+}
+if (0 == huffman_multi_encode(blk, c, cset,
+			    (unsigned char *)uncomp, uncomp_len)) {
+	comp = blk->data;
+	*comp_len = blk->byte + (blk->bit != 0);
+	block_destroy(blk, 1);
+}
+if (cset == 0)
+	huffman_codeset_destroy(c);
+return (char *)comp;
+}
+char *unsthuff(ztr_t *ztr, char *comp, int comp_len, int *uncomp_len) {
+int cset = (unsigned char)(comp[1]);
+huffman_codeset_t *cs = NULL, *cs_free = NULL;
+block_t *blk_in = block_create(NULL, comp_len),
+	*blk_out = block_create(NULL, 1000);
+int bfinal = 1, bit_num = 0;
+char *uncomp;
+if (cset >= CODE_USER) {
+	/* Scans through HUFF chunks */
+	ztr_hcode_t *hc = ztr_find_hcode(ztr, cset);
+	if (!hc)
+	    return NULL;
+	cs = hc->codes;
+	bit_num = cs->bit_num;
+	blk_in->bit = 0;
+} else if (cset > 0) {
+	/* Create some temporary huffman_codes to stringify */
+	cs_free = cs = generate_code_set(cset, 1, NULL, 0, 1, MAX_CODE_LEN, 0);
+	if (!cs)
+	    return NULL;
+	bit_num = cs->bit_num;
+	blk_in->bit = 0;
+} /* else inline codes */
+/*
+* We need to know at what bit the huffman codes would have ended on
+* so we can store our huffman encoded symbols immediately following it.
+* For speed though this bit-number is cached.
+*/
+blk_in->data[blk_in->byte++] |= *(comp+2);
+store_bytes(blk_in, (unsigned char *)comp+3, comp_len-3);
+/* Rewind */
+blk_in->byte = 0;
+blk_in->bit = bit_num;
+do {
+	block_t *out;
+	/*
+	 * We're either at the start of a block with codes to restore
+	 * (cset == INLINE or the 2nd onwards block) or we've already
+	 * got some codes in cs and we're at the position where huffman
+	 * encoded symbols are stored.
+	 */
+	if (!cs)
+	    if (NULL == (cs = cs_free = restore_codes(blk_in, &bfinal)))
+		return NULL;
+	/*
+	{int i;
+	for (i = 0; i < cs->ncodes; i++) {
+	    output_code_set(stderr, cs->codes[i]);
+	}}
+	*/
+	if (NULL == (out = huffman_multi_decode(blk_in, cs))) {
+	    huffman_codeset_destroy(cs);
+	    return NULL;
+	}
+	/* Could optimise this for the common case of only 1 block */
+	store_bytes(blk_out, out->data, out->byte);
+	block_destroy(out, 0);
+	if (cs_free)
+	    huffman_codeset_destroy(cs_free);
+	cs = cs_free = NULL;
+} while (!bfinal);
+*uncomp_len = blk_out->byte;
+uncomp = (char *)blk_out->data;
+block_destroy(blk_in, 0);
+block_destroy(blk_out, 1);
+return uncomp;
+}
+#ifndef NDEBUG
+#define SYM_EOF 256
+static void output_code_set(FILE *fp, huffman_codes_t *cds) {
+int i, j;
+int nbits_in = 0, nbits_out = 0;
+huffman_code_t *codes = cds->codes;
+int ncodes = cds->ncodes;
+fprintf(fp, "static huffman_code_t codes_FIXME[] = {\n");
+for (i = j = 0; i < ncodes; i++) {
+	nbits_out += codes[i].nbits * codes[i].freq;
+	nbits_in  += 8*codes[i].freq;
+	if (j == 0)
+	    fprintf(fp, "    ");
+	if (codes[i].symbol == SYM_EOF) {
+	    fprintf(fp, "{SYM_EOF,%3d}, ", codes[i].nbits);
+	    j = 10;
+	} else {
+	    if (isalnum(codes[i].symbol)) {
+		fprintf(fp, "{'%c',%3d}, ", codes[i].symbol, codes[i].nbits);
+	    } else {
+		fprintf(fp, "{%3d,%3d}, ", codes[i].symbol, codes[i].nbits);
+	    }
+	}
+	j++;
+	if (j >= 6) {
+	    fputc('\n', fp);
+	    j = 0;
+	}
+}
+if (j)
+	fputc('\n', fp);
+fprintf(fp, "};\n");
+fprintf(fp, "/* Expected compression to %f of input */\n",
+	    (double)nbits_out/nbits_in);
+}
+#endif
+/*
+* Reorders quality data from its RAW format to an interleaved 4-byte
+* aligned format.
+*
+* Starting with sequence A1 C2 G3 the raw format is quality of called
+* bases followed by quality of remaining bases in triplets per base:
+* 0 (RAW format)
+* Q(A1) Q(C2) Q(G3)
+* Q(C1) Q(G1) Q(T1)
+* Q(A2) Q(G2) Q(T2)
+* Q(A3) Q(C3) Q(T3)
+*
+* We reorder it to:
+* ZTR_FORM_QSHIFT <any> <any> 0(raw)
+* Q(A1) Q(C1) Q(G1) Q(T1)
+* Q(C2) Q(A2) Q(G2) Q(T2)
+* Q(G3) Q(A3) Q(C3) Q(T3)
+*
+* Returns shifted data on success
+*         NULL on failure
+*/
+char *qshift(char *qold, int qlen, int *new_len) {
+int i, j, k;
+char *qnew;
+int nbases;
+/*
+* Correct input is raw encoding + 4x nbases bytes
+*/
+if ((qlen-1)%4 != 0 || *qold != 0)
+	return NULL;
+nbases = (qlen-1)/4;
+qnew = (char *)malloc((nbases+1)*4);
+qnew[0] = ZTR_FORM_QSHIFT; /* reorder code */
+qnew[1] = -40;  /* pad */
+qnew[2] = -40;  /* pad */
+qnew[3] = qold[0];
+for (i = 0, j = 4, k = nbases; i < nbases; i++, j+=4, k+=3) {
+	qnew[j  ] = qold[1+i  ];
+	qnew[j+1] = qold[1+k  ];
+	qnew[j+2] = qold[1+k+1];
+	qnew[j+3] = qold[1+k+2];
+}
+*new_len = (nbases+1)*4;
+return qnew;
+}
+/*
+* The opposite transform from qshift() above.
+*
+* Returns unshifted data on success
+*         NULL on failure.
+*/
+char *unqshift(char *qold, int qlen, int *new_len) {
+int i, j, k;
+char *qnew;
+int nbases;
+/*
+* Correct input is 4x (nbases+1) bytes
+*/
+if (qlen%4 != 0 || *qold != ZTR_FORM_QSHIFT)
+	return NULL;
+nbases = qlen/4-1;
+qnew = (char *)malloc(nbases*4+1);
+qnew[0] = 0; /* raw byte */
+for (i = 0, j = 4, k = nbases; i < nbases; i++, j+=4, k+=3) {
+	qnew[1+i  ] = qold[j  ];
+	qnew[1+k  ] = qold[j+1];
+	qnew[1+k+1] = qold[j+2];
+	qnew[1+k+2] = qold[j+3];
+}
+*new_len = nbases*4+1;
+return qnew;
+}
+/*
+* Given a sequence ACTG this shifts trace data from the order:
+*
+*     A1A2A3A4 C1C2C3C4 G1G2G3G4 T1T2T3T4
+*
+* to
+*
+*     A1C1G1T1 C2A2G2T2 T3A3C3G3 G4C4C4T4
+*
+* Ie for each base it ouputs the signal for the called base first
+* followed by the remaining 3 signals in A,C,G,T order (minus the
+* called signal already output).
+*/
+/*
+* NCBI uses a rotation mechanism. Thus instead of converting acGt (G
+* called) to Gact they produce Gtac.
+*
+* Given 4 columns of data the two schemes produce value orderings as:
+*
+* Rotate: Acgt       Shift: Acgt
+*         Cgta              Cagt
+*         Gtac              Gact
+*         Tacg              Tacg
+*
+* As a consequence any channel bias for a/c/g/t is better preserved
+* by shifting than it is by rotating as each column (except #1) are
+* only populated by two base types and 2 of those columns are on a
+* 3:1 ratio.
+*/
+/* #define ROTATE_INSTEAD */
+char *tshift(ztr_t *ztr, char *told_c, int tlen, int *new_len) {
+int nc, i;
+ztr_chunk_t **base = ztr_find_chunks(ztr, ZTR_TYPE_BASE, &nc);
+char *bases;
+int nbases;
+unsigned short *tnew, *told;
+if (nc == 0)
+	return NULL;
+if (*told_c != 0)
+	return NULL; /* assume RAW format trace input */
+/* Use last BASE chunk if multiple are present */
+/* FIXME: ensure uncompressed first */
+bases  = base[nc-1]->data+1;
+nbases = base[nc-1]->dlength-1;
+if (nbases != (tlen-2)/8) {
+	fprintf(stderr, "Mismatch in number of base calls to samples\n");
+	return NULL;
+}
+/* Allocate and initialise header */
+told = ((unsigned short *)told_c) + 1;
+*new_len = (nbases*4+4) * sizeof(*tnew);
+tnew = (unsigned short *)malloc(*new_len);
+for (i = 0; i < 4; i++) {
+	tnew[i] = 0;
+}
+((char *)tnew)[0] = ZTR_FORM_TSHIFT;
+#ifdef ROTATE_INSTEAD
+/* Reorder */
+for (i = 0; i < nbases; i++) {
+	switch(bases[i]) {
+	case 'T':
+	    tnew[4+i*4+0] = told[3*nbases+i]; /* TACG */
+	    tnew[4+i*4+1] = told[0*nbases+i];
+	    tnew[4+i*4+2] = told[1*nbases+i];
+	    tnew[4+i*4+3] = told[2*nbases+i];
+	    break;
+	case 'G':
+	    tnew[4+i*4+0] = told[2*nbases+i]; /* GTAC */
+	    tnew[4+i*4+1] = told[3*nbases+i];
+	    tnew[4+i*4+2] = told[0*nbases+i];
+	    tnew[4+i*4+3] = told[1*nbases+i];
+	    break;
+	case 'C':
+	    tnew[4+i*4+0] = told[1*nbases+i]; /* CGTA */
+	    tnew[4+i*4+1] = told[2*nbases+i];
+	    tnew[4+i*4+2] = told[3*nbases+i];
+	    tnew[4+i*4+3] = told[0*nbases+i];
+	    break;
+	default:
+	    tnew[4+i*4+0] = told[0*nbases+i]; /* ACGT */
+	    tnew[4+i*4+1] = told[1*nbases+i];
+	    tnew[4+i*4+2] = told[2*nbases+i];
+	    tnew[4+i*4+3] = told[3*nbases+i];
+	    break;
+	}
+}
+#else
+/* Reorder */
+for (i = 0; i < nbases; i++) {
+	switch(bases[i]) {
+	case 'T':
+	    tnew[4+i*4+0] = told[3*nbases+i]; /* TACG */
+	    tnew[4+i*4+1] = told[0*nbases+i];
+	    tnew[4+i*4+2] = told[1*nbases+i];
+	    tnew[4+i*4+3] = told[2*nbases+i];
+	    break;
+	case 'G':
+	    tnew[4+i*4+0] = told[2*nbases+i]; /* GACT */
+	    tnew[4+i*4+1] = told[0*nbases+i];
+	    tnew[4+i*4+2] = told[1*nbases+i];
+	    tnew[4+i*4+3] = told[3*nbases+i];
+	    break;
+	case 'C':
+	    tnew[4+i*4+0] = told[1*nbases+i]; /* CAGT */
+	    tnew[4+i*4+1] = told[0*nbases+i];
+	    tnew[4+i*4+2] = told[2*nbases+i];
+	    tnew[4+i*4+3] = told[3*nbases+i];
+	    break;
+	default:
+	    tnew[4+i*4+0] = told[0*nbases+i]; /* ACGT */
+	    tnew[4+i*4+1] = told[1*nbases+i];
+	    tnew[4+i*4+2] = told[2*nbases+i];
+	    tnew[4+i*4+3] = told[3*nbases+i];
+	    break;
+	}
+}
+#endif
+xfree(base);
+return (char *)tnew;
+}
+char *untshift(ztr_t *ztr, char *told_c, int tlen, int *new_len) {
+unsigned short *tnew, *told = (unsigned short *)told_c;
+int nc, nbases, i;
+char *bases;
+ztr_chunk_t **base = ztr_find_chunks(ztr, ZTR_TYPE_BASE, &nc);
+if (nc == 0)
+	return NULL;
+/* Use last BASE chunk if multiple are present */
+uncompress_chunk(ztr, base[nc-1]);
+bases  = base[nc-1]->data+1;
+nbases = base[nc-1]->dlength-1;
+*new_len = 2 + nbases*4 * sizeof(*tnew);
+tnew = (unsigned short *)malloc(*new_len);
+tnew[0] = 0;
+told += 4;
+#ifdef ROTATE_INSTEAD
+/* Reorder */
+for (i = 0; i < nbases; i++) {
+	switch(bases[i]) {
+	case 'T':
+	    tnew[1+3*nbases+i] = *told++; /* TACG */
+	    tnew[1+0*nbases+i] = *told++;
+	    tnew[1+1*nbases+i] = *told++;
+	    tnew[1+2*nbases+i] = *told++;
+	    break;
+	case 'G':
+	    tnew[1+2*nbases+i] = *told++; /* GTAC */
+	    tnew[1+3*nbases+i] = *told++;
+	    tnew[1+0*nbases+i] = *told++;
+	    tnew[1+1*nbases+i] = *told++;
+	    break;
+	case 'C':
+	    tnew[1+1*nbases+i] = *told++; /* CGTA */
+	    tnew[1+2*nbases+i] = *told++;
+	    tnew[1+3*nbases+i] = *told++;
+	    tnew[1+0*nbases+i] = *told++;
+	    break;
+	default:
+	    tnew[1+0*nbases+i] = *told++; /* ACGT */
+	    tnew[1+1*nbases+i] = *told++;
+	    tnew[1+2*nbases+i] = *told++;
+	    tnew[1+3*nbases+i] = *told++;
+	    break;
+	}
+}
+#else
+/* Reorder */
+for (i = 0; i < nbases; i++) {
+	switch(bases[i]) {
+	case 'T':
+	    tnew[1+3*nbases+i] = *told++; /* TACG */
+	    tnew[1+0*nbases+i] = *told++;
+	    tnew[1+1*nbases+i] = *told++;
+	    tnew[1+2*nbases+i] = *told++;
+	    break;
+	case 'G':
+	    tnew[1+2*nbases+i] = *told++; /* GACT */
+	    tnew[1+0*nbases+i] = *told++;
+	    tnew[1+1*nbases+i] = *told++;
+	    tnew[1+3*nbases+i] = *told++;
+	    break;
+	case 'C':
+	    tnew[1+1*nbases+i] = *told++; /* CAGT */
+	    tnew[1+0*nbases+i] = *told++;
+	    tnew[1+2*nbases+i] = *told++;
+	    tnew[1+3*nbases+i] = *told++;
+	    break;
+	default:
+	    tnew[1+0*nbases+i] = *told++; /* ACGT */
+	    tnew[1+1*nbases+i] = *told++;
+	    tnew[1+2*nbases+i] = *told++;
+	    tnew[1+3*nbases+i] = *told++;
+	    break;
+	}
+}
+#endif
+xfree(base);
+return (char *)tnew;
+}

Mercurial > repos > dawe > srf2fastq

comparison srf2fastq/io_lib-1.12.2/io_lib/compression.c @ 0:d901c9f41a6a default tip