diff srf2fastq/io_lib-1.12.2/progs/ztr_dump.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/srf2fastq/io_lib-1.12.2/progs/ztr_dump.c	Tue Jun 07 17:48:05 2011 -0400
@@ -0,0 +1,217 @@
+#include <stdio.h>
+#include <errno.h>
+#include <math.h>
+#include <io_lib/ztr.h>
+#include <io_lib/compression.h>
+#include <io_lib/xalloc.h>
+
+static char *format2str(int format) {
+    static char unk[100];
+
+    switch (format) {
+    case ZTR_FORM_RAW:     return "raw";
+    case ZTR_FORM_RLE:     return "rle";
+    case ZTR_FORM_XRLE:    return "xrle";
+    case ZTR_FORM_XRLE2:   return "xrle2";
+    case ZTR_FORM_ZLIB:    return "zlib";
+    case ZTR_FORM_DELTA1:  return "delta1";
+    case ZTR_FORM_DELTA2:  return "delta2";
+    case ZTR_FORM_DELTA4:  return "delta4";
+    case ZTR_FORM_DDELTA1: return "ddelta1";
+    case ZTR_FORM_DDELTA2: return "ddelta2";
+    case ZTR_FORM_DDELTA4: return "ddelta4";
+    case ZTR_FORM_16TO8:   return "16to8";
+    case ZTR_FORM_32TO8:   return "32to8";
+    case ZTR_FORM_FOLLOW1: return "follow1";
+    case ZTR_FORM_CHEB445: return "cheb445";
+    case ZTR_FORM_ICHEB:   return "icheb";
+    case ZTR_FORM_LOG2:    return "log2";
+    case ZTR_FORM_STHUFF:  return "sthuff";
+    case ZTR_FORM_QSHIFT:  return "qshift";
+    case ZTR_FORM_TSHIFT:  return "tshift";
+    }
+
+    sprintf(unk, "?%d?\n", format);
+    return unk;
+}
+
+/*
+ * Shannon showed that for storage in base 'b' with alphabet symbols 'a' having
+ * a probability of ocurring in any context of 'Pa' we should encode
+ * symbol 'a' to have a storage width of -logb(Pa).
+ *
+ * Eg. b = 26, P(e) = .22. => width .4647277.
+ *
+ * We use this to calculate the entropy of a signal by summing over all letters
+ * in the signal. In this case, our storage has base 256.
+ */
+#define EBASE 256
+double entropy(unsigned char *data, int len) {
+    double E[EBASE];
+    double P[EBASE];
+    double e;
+    int i;
+    
+    for (i = 0; i < EBASE; i++)
+        P[i] = 0;
+
+    for (i = 0; i < len; i++)
+        P[data[i]]++;
+
+    for (i = 0; i < EBASE; i++) {
+        if (P[i]) {
+            P[i] /= len;
+            E[i] = -(log(P[i])/log(EBASE));
+        } else {
+            E[i] = 0;
+        }
+    }
+
+    for (e = i = 0; i < len; i++)
+        e += E[data[i]];
+
+    return e;
+}
+
+/* Debug version of the ztr.c uncompress_chunk function. */
+static int explode_chunk(ztr_t *ztr, ztr_chunk_t *chunk) {
+    char *new_data = NULL;
+    int new_len;
+
+    while (chunk->dlength > 0 && chunk->data[0] != ZTR_FORM_RAW) {
+	double ent = entropy((unsigned char *)chunk->data, chunk->dlength);
+
+	switch (chunk->data[0]) {
+	case ZTR_FORM_RLE:
+	    new_data = unrle(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_XRLE:
+	    new_data = unxrle(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_XRLE2:
+	    new_data = unxrle2(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_ZLIB:
+	    new_data = zlib_dehuff(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_DELTA1:
+	    new_data = recorrelate1(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_DELTA2:
+	    new_data = recorrelate2(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_DELTA4:
+	    new_data = recorrelate4(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_16TO8:
+	    new_data = expand_8to16(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_32TO8:
+	    new_data = expand_8to32(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_FOLLOW1:
+	    new_data = unfollow1(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_ICHEB:
+	    new_data = ichebuncomp(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_LOG2:
+	    new_data = unlog2_data(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_STHUFF:
+	    new_data = unsthuff(ztr, chunk->data, chunk->dlength, &new_len);
+	    break;
+	    
+	case ZTR_FORM_QSHIFT:
+	    new_data = unqshift(chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	case ZTR_FORM_TSHIFT:
+	    new_data = untshift(ztr, chunk->data, chunk->dlength, &new_len);
+	    break;
+
+	default:
+	    fprintf(stderr, "Unknown encoding format %d\n", chunk->data[0]);
+	    return -1;
+	}
+	    
+	if (!new_data) {
+	    fprintf(stderr, "Failed to decode chunk with format %s\n",
+		    format2str(chunk->data[0]));
+	    return -1;
+	}
+
+	printf("    format %8s => %6d to %6d, entropy %8.1f to %8.1f\n",
+	       format2str(chunk->data[0]), chunk->dlength, new_len,
+	       ent, entropy((unsigned char *)new_data, new_len));
+
+	chunk->dlength = new_len;
+	xfree(chunk->data);
+	chunk->data = new_data;
+    }
+
+    return 0;
+}
+
+int main(int argc, char **argv) {
+    ztr_t *ztr;
+    mFILE *fp;
+    int i;
+
+    if (argc >= 2) {
+	if (NULL == (fp = mfopen(argv[1], "rb"))) {
+	    perror(argv[1]);
+	    return 1;
+	}
+    } else {
+	fp = mstdin();
+    }
+
+    if (NULL == (ztr = mfread_ztr(fp))) {
+	perror("fread_ztr");
+	return 1;
+    }
+
+    printf("Nchunks = %d\n", ztr->nchunks);
+    for (i = 0; i < ztr->nchunks; i++) {
+	char str[5];
+	int complen;
+	int rawlen;
+	char *val;
+
+	(void)ZTR_BE2STR(ztr->chunk[i].type, str);
+	complen = ztr->chunk[i].dlength;
+	val = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE");
+	if (val)
+	    printf("-- %s (%s) --\n", str, val);
+	else
+	    printf("-- %s --\n", str);
+	explode_chunk(ztr, &ztr->chunk[i]);
+	rawlen = ztr->chunk[i].dlength;
+	printf("SUMMARY %s  mlen %3d, dlen %6d, rawlen %6d, ratio %f\n",
+	       str, ztr->chunk[i].mdlength,
+	       complen, rawlen, (double)complen/rawlen);
+#if 0
+	fflush(stdout);
+	puts("\n========================================");
+	write(1, ztr->chunk[i].data, ztr->chunk[i].dlength);
+	puts("\n========================================");
+#endif
+    }
+
+    delete_ztr(ztr);
+    
+    return 0;
+}