Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/progs/ztr_dump.c @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 #include <stdio.h> | |
2 #include <errno.h> | |
3 #include <math.h> | |
4 #include <io_lib/ztr.h> | |
5 #include <io_lib/compression.h> | |
6 #include <io_lib/xalloc.h> | |
7 | |
8 static char *format2str(int format) { | |
9 static char unk[100]; | |
10 | |
11 switch (format) { | |
12 case ZTR_FORM_RAW: return "raw"; | |
13 case ZTR_FORM_RLE: return "rle"; | |
14 case ZTR_FORM_XRLE: return "xrle"; | |
15 case ZTR_FORM_XRLE2: return "xrle2"; | |
16 case ZTR_FORM_ZLIB: return "zlib"; | |
17 case ZTR_FORM_DELTA1: return "delta1"; | |
18 case ZTR_FORM_DELTA2: return "delta2"; | |
19 case ZTR_FORM_DELTA4: return "delta4"; | |
20 case ZTR_FORM_DDELTA1: return "ddelta1"; | |
21 case ZTR_FORM_DDELTA2: return "ddelta2"; | |
22 case ZTR_FORM_DDELTA4: return "ddelta4"; | |
23 case ZTR_FORM_16TO8: return "16to8"; | |
24 case ZTR_FORM_32TO8: return "32to8"; | |
25 case ZTR_FORM_FOLLOW1: return "follow1"; | |
26 case ZTR_FORM_CHEB445: return "cheb445"; | |
27 case ZTR_FORM_ICHEB: return "icheb"; | |
28 case ZTR_FORM_LOG2: return "log2"; | |
29 case ZTR_FORM_STHUFF: return "sthuff"; | |
30 case ZTR_FORM_QSHIFT: return "qshift"; | |
31 case ZTR_FORM_TSHIFT: return "tshift"; | |
32 } | |
33 | |
34 sprintf(unk, "?%d?\n", format); | |
35 return unk; | |
36 } | |
37 | |
38 /* | |
39 * Shannon showed that for storage in base 'b' with alphabet symbols 'a' having | |
40 * a probability of ocurring in any context of 'Pa' we should encode | |
41 * symbol 'a' to have a storage width of -logb(Pa). | |
42 * | |
43 * Eg. b = 26, P(e) = .22. => width .4647277. | |
44 * | |
45 * We use this to calculate the entropy of a signal by summing over all letters | |
46 * in the signal. In this case, our storage has base 256. | |
47 */ | |
48 #define EBASE 256 | |
49 double entropy(unsigned char *data, int len) { | |
50 double E[EBASE]; | |
51 double P[EBASE]; | |
52 double e; | |
53 int i; | |
54 | |
55 for (i = 0; i < EBASE; i++) | |
56 P[i] = 0; | |
57 | |
58 for (i = 0; i < len; i++) | |
59 P[data[i]]++; | |
60 | |
61 for (i = 0; i < EBASE; i++) { | |
62 if (P[i]) { | |
63 P[i] /= len; | |
64 E[i] = -(log(P[i])/log(EBASE)); | |
65 } else { | |
66 E[i] = 0; | |
67 } | |
68 } | |
69 | |
70 for (e = i = 0; i < len; i++) | |
71 e += E[data[i]]; | |
72 | |
73 return e; | |
74 } | |
75 | |
76 /* Debug version of the ztr.c uncompress_chunk function. */ | |
77 static int explode_chunk(ztr_t *ztr, ztr_chunk_t *chunk) { | |
78 char *new_data = NULL; | |
79 int new_len; | |
80 | |
81 while (chunk->dlength > 0 && chunk->data[0] != ZTR_FORM_RAW) { | |
82 double ent = entropy((unsigned char *)chunk->data, chunk->dlength); | |
83 | |
84 switch (chunk->data[0]) { | |
85 case ZTR_FORM_RLE: | |
86 new_data = unrle(chunk->data, chunk->dlength, &new_len); | |
87 break; | |
88 | |
89 case ZTR_FORM_XRLE: | |
90 new_data = unxrle(chunk->data, chunk->dlength, &new_len); | |
91 break; | |
92 | |
93 case ZTR_FORM_XRLE2: | |
94 new_data = unxrle2(chunk->data, chunk->dlength, &new_len); | |
95 break; | |
96 | |
97 case ZTR_FORM_ZLIB: | |
98 new_data = zlib_dehuff(chunk->data, chunk->dlength, &new_len); | |
99 break; | |
100 | |
101 case ZTR_FORM_DELTA1: | |
102 new_data = recorrelate1(chunk->data, chunk->dlength, &new_len); | |
103 break; | |
104 | |
105 case ZTR_FORM_DELTA2: | |
106 new_data = recorrelate2(chunk->data, chunk->dlength, &new_len); | |
107 break; | |
108 | |
109 case ZTR_FORM_DELTA4: | |
110 new_data = recorrelate4(chunk->data, chunk->dlength, &new_len); | |
111 break; | |
112 | |
113 case ZTR_FORM_16TO8: | |
114 new_data = expand_8to16(chunk->data, chunk->dlength, &new_len); | |
115 break; | |
116 | |
117 case ZTR_FORM_32TO8: | |
118 new_data = expand_8to32(chunk->data, chunk->dlength, &new_len); | |
119 break; | |
120 | |
121 case ZTR_FORM_FOLLOW1: | |
122 new_data = unfollow1(chunk->data, chunk->dlength, &new_len); | |
123 break; | |
124 | |
125 case ZTR_FORM_ICHEB: | |
126 new_data = ichebuncomp(chunk->data, chunk->dlength, &new_len); | |
127 break; | |
128 | |
129 case ZTR_FORM_LOG2: | |
130 new_data = unlog2_data(chunk->data, chunk->dlength, &new_len); | |
131 break; | |
132 | |
133 case ZTR_FORM_STHUFF: | |
134 new_data = unsthuff(ztr, chunk->data, chunk->dlength, &new_len); | |
135 break; | |
136 | |
137 case ZTR_FORM_QSHIFT: | |
138 new_data = unqshift(chunk->data, chunk->dlength, &new_len); | |
139 break; | |
140 | |
141 case ZTR_FORM_TSHIFT: | |
142 new_data = untshift(ztr, chunk->data, chunk->dlength, &new_len); | |
143 break; | |
144 | |
145 default: | |
146 fprintf(stderr, "Unknown encoding format %d\n", chunk->data[0]); | |
147 return -1; | |
148 } | |
149 | |
150 if (!new_data) { | |
151 fprintf(stderr, "Failed to decode chunk with format %s\n", | |
152 format2str(chunk->data[0])); | |
153 return -1; | |
154 } | |
155 | |
156 printf(" format %8s => %6d to %6d, entropy %8.1f to %8.1f\n", | |
157 format2str(chunk->data[0]), chunk->dlength, new_len, | |
158 ent, entropy((unsigned char *)new_data, new_len)); | |
159 | |
160 chunk->dlength = new_len; | |
161 xfree(chunk->data); | |
162 chunk->data = new_data; | |
163 } | |
164 | |
165 return 0; | |
166 } | |
167 | |
168 int main(int argc, char **argv) { | |
169 ztr_t *ztr; | |
170 mFILE *fp; | |
171 int i; | |
172 | |
173 if (argc >= 2) { | |
174 if (NULL == (fp = mfopen(argv[1], "rb"))) { | |
175 perror(argv[1]); | |
176 return 1; | |
177 } | |
178 } else { | |
179 fp = mstdin(); | |
180 } | |
181 | |
182 if (NULL == (ztr = mfread_ztr(fp))) { | |
183 perror("fread_ztr"); | |
184 return 1; | |
185 } | |
186 | |
187 printf("Nchunks = %d\n", ztr->nchunks); | |
188 for (i = 0; i < ztr->nchunks; i++) { | |
189 char str[5]; | |
190 int complen; | |
191 int rawlen; | |
192 char *val; | |
193 | |
194 (void)ZTR_BE2STR(ztr->chunk[i].type, str); | |
195 complen = ztr->chunk[i].dlength; | |
196 val = ztr_lookup_mdata_value(ztr, &ztr->chunk[i], "TYPE"); | |
197 if (val) | |
198 printf("-- %s (%s) --\n", str, val); | |
199 else | |
200 printf("-- %s --\n", str); | |
201 explode_chunk(ztr, &ztr->chunk[i]); | |
202 rawlen = ztr->chunk[i].dlength; | |
203 printf("SUMMARY %s mlen %3d, dlen %6d, rawlen %6d, ratio %f\n", | |
204 str, ztr->chunk[i].mdlength, | |
205 complen, rawlen, (double)complen/rawlen); | |
206 #if 0 | |
207 fflush(stdout); | |
208 puts("\n========================================"); | |
209 write(1, ztr->chunk[i].data, ztr->chunk[i].dlength); | |
210 puts("\n========================================"); | |
211 #endif | |
212 } | |
213 | |
214 delete_ztr(ztr); | |
215 | |
216 return 0; | |
217 } |