comparison srf2fastq/io_lib-1.12.2/progs/ztr_dump.c @ 0:d901c9f41a6a default tip

Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author dawe
date Tue, 07 Jun 2011 17:48:05 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d901c9f41a6a
1 #include <stdio.h>
2 #include <errno.h>
3 #include <math.h>
4 #include <io_lib/ztr.h>
5 #include <io_lib/compression.h>
6 #include <io_lib/xalloc.h>
7
8 static char *format2str(int format) {
9 static char unk[100];
10
11 switch (format) {
12 case ZTR_FORM_RAW: return "raw";
13 case ZTR_FORM_RLE: return "rle";
14 case ZTR_FORM_XRLE: return "xrle";
15 case ZTR_FORM_XRLE2: return "xrle2";
16 case ZTR_FORM_ZLIB: return "zlib";
17 case ZTR_FORM_DELTA1: return "delta1";
18 case ZTR_FORM_DELTA2: return "delta2";
19 case ZTR_FORM_DELTA4: return "delta4";
20 case ZTR_FORM_DDELTA1: return "ddelta1";
21 case ZTR_FORM_DDELTA2: return "ddelta2";
22 case ZTR_FORM_DDELTA4: return "ddelta4";
23 case ZTR_FORM_16TO8: return "16to8";
24 case ZTR_FORM_32TO8: return "32to8";
25 case ZTR_FORM_FOLLOW1: return "follow1";
26 case ZTR_FORM_CHEB445: return "cheb445";
27 case ZTR_FORM_ICHEB: return "icheb";
28 case ZTR_FORM_LOG2: return "log2";
29 case ZTR_FORM_STHUFF: return "sthuff";
30 case ZTR_FORM_QSHIFT: return "qshift";
31 case ZTR_FORM_TSHIFT: return "tshift";
32 }
33
34 sprintf(unk, "?%d?\n", format);
35 return unk;
36 }
37
38 /*
39 * Shannon showed that for storage in base 'b' with alphabet symbols 'a' having
40 * a probability of ocurring in any context of 'Pa' we should encode
41 * symbol 'a' to have a storage width of -logb(Pa).
42 *
43 * Eg. b = 26, P(e) = .22. => width .4647277.
44 *
45 * We use this to calculate the entropy of a signal by summing over all letters
46 * in the signal. In this case, our storage has base 256.
47 */
48 #define EBASE 256
49 double entropy(unsigned char *data, int len) {
50 double E[EBASE];
51 double P[EBASE];
52 double e;
53 int i;
54
55 for (i = 0; i < EBASE; i++)
56 P[i] = 0;
57
58 for (i = 0; i < len; i++)
59 P[data[i]]++;
60
61 for (i = 0; i < EBASE; i++) {
62 if (P[i]) {
63 P[i] /= len;
64 E[i] = -(log(P[i])/log(EBASE));
65 } else {
66 E[i] = 0;
67 }
68 }
69
70 for (e = i = 0; i < len; i++)
71 e += E[data[i]];
72
73 return e;
74 }
75
76 /* Debug version of the ztr.c uncompress_chunk function. */
77 static int explode_chunk(ztr_t *ztr, ztr_chunk_t *chunk) {
78 char *new_data = NULL;
79 int new_len;
80
81 while (chunk->dlength > 0 && chunk->data[0] != ZTR_FORM_RAW) {
82 double ent = entropy((unsigned char *)chunk->data, chunk->dlength);
83
84 switch (chunk->data[0]) {
85 case ZTR_FORM_RLE:
86 new_data = unrle(chunk->data, chunk->dlength, &new_len);
87 break;
88
89 case ZTR_FORM_XRLE:
90 new_data = unxrle(chunk->data, chunk->dlength, &new_len);
91 break;
92
93 case ZTR_FORM_XRLE2:
94 new_data = unxrle2(chunk->data, chunk->dlength, &new_len);
95 break;
96
97 case ZTR_FORM_ZLIB:
98 new_data = zlib_dehuff(chunk->data, chunk->dlength, &new_len);
99 break;
100
101 case ZTR_FORM_DELTA1:
102 new_data = recorrelate1(chunk->data, chunk->dlength, &new_len);
103 break;
104
105 case ZTR_FORM_DELTA2:
106 new_data = recorrelate2(chunk->data, chunk->dlength, &new_len);
107 break;
108
109 case ZTR_FORM_DELTA4:
110 new_data = recorrelate4(chunk->data, chunk->dlength, &new_len);
111 break;
112
113 case ZTR_FORM_16TO8:
114 new_data = expand_8to16(chunk->data, chunk->dlength, &new_len);
115 break;
116
117 case ZTR_FORM_32TO8:
118 new_data = expand_8to32(chunk->data, chunk->dlength, &new_len);
119 break;
120
121 case ZTR_FORM_FOLLOW1:
122 new_data = unfollow1(chunk->data, chunk->dlength, &new_len);
123 break;
124
125 case ZTR_FORM_ICHEB:
126 new_data = ichebuncomp(chunk->data, chunk->dlength, &new_len);
127 break;
128
129 case ZTR_FORM_LOG2:
130 new_data = unlog2_data(chunk->data, chunk->dlength, &new_len);
131 break;
132
133 case ZTR_FORM_STHUFF:
134 new_data = unsthuff(ztr, chunk->data, chunk->dlength, &new_len);
135 break;
136
137 case ZTR_FORM_QSHIFT:
138 new_data = unqshift(chunk->data, chunk->dlength, &new_len);
139 break;
140
141 case ZTR_FORM_TSHIFT:
142 new_data = untshift(ztr, chunk->data, chunk->dlength, &new_len);
143 break;
144
145 default:
146 fprintf(stderr, "Unknown encoding format %d\n", chunk->data[0]);
147 return -1;
148 }
149
150 if (!new_data) {
151 fprintf(stderr, "Failed to decode chunk with format %s\n",
152 format2str(chunk->data[0]));
153 return -1;
154 }
155
156 printf(" format %8s => %6d to %6d, entropy %8.1f to %8.1f\n",
157 format2str(chunk->data[0]), chunk->dlength, new_len,
158 ent, entropy((unsigned char *)new_data, new_len));
159
160 chunk->dlength = new_len;
161 xfree(chunk->data);
162 chunk->data = new_data;
163 }
164
165 return 0;
166 }
167
168 int main(int argc, char **argv) {
169 ztr_t *ztr;
170 mFILE *fp;
171 int i;
172
173 if (argc >= 2) {
174 if (NULL == (fp = mfopen(argv[1], "rb"))) {
175 perror(argv[1]);
176 return 1;
177 }
178 } else {
179 fp = mstdin();
180 }
181
182 if (NULL == (ztr = mfread_ztr(fp))) {
183 perror("fread_ztr");
184 return 1;
185 }
186
187 printf("Nchunks = %d\n", ztr->nchunks);
188 for (i = 0; i < ztr->nchunks; i++) {
189 char str[5];
190 int complen;
191 int rawlen;
192 char *val;
193
194 (void)ZTR_BE2STR(ztr->chunk[i].type, str);
195 complen = ztr->chunk[i].dlength;
196 val = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE");
197 if (val)
198 printf("-- %s (%s) --\n", str, val);
199 else
200 printf("-- %s --\n", str);
201 explode_chunk(ztr, &ztr->chunk[i]);
202 rawlen = ztr->chunk[i].dlength;
203 printf("SUMMARY %s mlen %3d, dlen %6d, rawlen %6d, ratio %f\n",
204 str, ztr->chunk[i].mdlength,
205 complen, rawlen, (double)complen/rawlen);
206 #if 0
207 fflush(stdout);
208 puts("\n========================================");
209 write(1, ztr->chunk[i].data, ztr->chunk[i].dlength);
210 puts("\n========================================");
211 #endif
212 }
213
214 delete_ztr(ztr);
215
216 return 0;
217 }