Mercurial > repos > dawe > srf2fastq
comparison srf2fastq/io_lib-1.12.2/io_lib/compression.h @ 0:d901c9f41a6a default tip
Migrated tool version 1.0.1 from old tool shed archive to new tool shed repository
author | dawe |
---|---|
date | Tue, 07 Jun 2011 17:48:05 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d901c9f41a6a |
---|---|
1 #ifndef _COMPRESSION_H_ | |
2 #define _COMPRESSION_H_ | |
3 | |
4 #include "io_lib/os.h" | |
5 #include <zlib.h> | |
6 | |
7 #ifdef __cplusplus | |
8 extern "C" { | |
9 #endif | |
10 | |
11 /* | |
12 * zlib_huff() | |
13 * | |
14 * Compresses data using huffman encoding, as implemented by zlib. | |
15 * | |
16 * Arguments: | |
17 * uncomp Uncompressed input data | |
18 * uncomp_len Length of uncomp data | |
19 * comp_len Output: length of compressed data | |
20 * | |
21 * Returns: | |
22 * Compressed data if successful | |
23 * NULL if not successful | |
24 */ | |
25 char *zlib_huff(char *uncomp, int uncomp_len, int strategy, int *comp_len); | |
26 | |
27 /* | |
28 * zlib_dehuff() | |
29 * | |
30 * Uncompresses data using huffman encoding, as implemented by zlib. | |
31 * | |
32 * Arguments: | |
33 * comp Compressed input data | |
34 * comp_len Length of comp data | |
35 * uncomp_len Output: length of uncompressed data | |
36 * | |
37 * Returns: | |
38 * Uncompressed data if successful | |
39 * NULL if not successful | |
40 */ | |
41 char *zlib_dehuff(char *comp, int comp_len, int *uncomp_len); | |
42 | |
43 /* | |
44 * zlib_dehuff2() | |
45 * | |
46 * Uncompresses data using huffman encoding, as implemented by zlib. | |
47 * Similar to zlib_dehuff above, but with the following differences: | |
48 * | |
49 * 1) It pastes together the zlib stream from two components; comp1+comp2 | |
50 * with the last byte of comp1 overlapping (ORed) with the first byte | |
51 * of comp2. This allows for separation of the huffman codes from | |
52 * the compressed data itself. | |
53 * 2) It uses the raw Deflate format rather than Zlib's wrapping of it. | |
54 * 3) It uses an EOF symbol to mark the end rather than encoding the | |
55 * uncompressed size in the header | |
56 * | |
57 * | |
58 * Arguments: | |
59 * comp1 Compressed input data part 1 | |
60 * comp1_len Length of comp1 data | |
61 * comp2 Compressed input data part 2 | |
62 * comp2_len Length of comp2 data | |
63 * uncomp_len Output: length of uncompressed data | |
64 * | |
65 * Returns: | |
66 * Uncompressed data if successful | |
67 * NULL if not successful | |
68 */ | |
69 char *zlib_dehuff2(char *comp1, int comp1_len, | |
70 char *comp2, int comp2_len, | |
71 int *uncomp_len); | |
72 | |
73 /* | |
74 * Run length encoding. | |
75 * | |
76 * Any run of 3 or more identical characters (up to 255 in a row) are replaced | |
77 * by a 'guard' byte followed by the number of characters followed by | |
78 * the character value itself. | |
79 * Any single guard value in the input is escaped using 'guard 0'. | |
80 * | |
81 * Specifying guard as -1 will automatically pick one of the least used | |
82 * characters in the input as the guard. | |
83 * | |
84 * Arguments: | |
85 * uncomp Input data | |
86 * uncomp_len Length of input data 'uncomp' | |
87 * guard Guard byte - used to encode "N" copies of data | |
88 * comp_len Output: length of compressed data | |
89 * | |
90 * Returns: | |
91 * Compressed data if successful | |
92 * NULL if not successful | |
93 */ | |
94 char *rle(char *uncomp, int uncomp_len, int guard, int *comp_len); | |
95 | |
96 /* | |
97 * Reverses run length encoding. | |
98 * | |
99 * Arguments: | |
100 * comp Compressed input data | |
101 * comp_len Length of comp data | |
102 * uncomp_len Output: length of uncompressed data | |
103 * | |
104 * Returns: | |
105 * Uncompressed data if successful | |
106 * NULL if not successful | |
107 */ | |
108 char *unrle(char *comp, int comp_len, int *uncomp_len); | |
109 | |
110 /* | |
111 * Mutli-byte run length encoding. | |
112 * | |
113 * Any run of 3 or more identical characters (up to 255 in a row) are replaced | |
114 * by a 'guard' byte followed by the number of characters followed by | |
115 * the character value itself. | |
116 * Any single guard value in the input is escaped using 'guard 0'. | |
117 * | |
118 * Specifying guard as -1 will automatically pick one of the least used | |
119 * characters in the input as the guard. | |
120 * | |
121 * Arguments: | |
122 * uncomp Input data | |
123 * uncomp_len Length of input data 'uncomp' | |
124 * guard Guard byte - used to encode "N" copies of data | |
125 * rsz Size of blocks to compare for run checking. | |
126 * comp_len Output: length of compressed data | |
127 * | |
128 * Returns: | |
129 * Compressed data if successful | |
130 * NULL if not successful | |
131 */ | |
132 char *xrle(char *uncomp, int uncomp_len, int guard, int rsz, int *comp_len); | |
133 | |
134 /* | |
135 * Reverses multi-byte run length encoding. | |
136 * | |
137 * Arguments: | |
138 * comp Compressed input data | |
139 * comp_len Length of comp data | |
140 * uncomp_len Output: length of uncompressed data | |
141 * | |
142 * Returns: | |
143 * Uncompressed data if successful | |
144 * NULL if not successful | |
145 */ | |
146 char *unxrle(char *comp, int comp_len, int *uncomp_len); | |
147 | |
148 /* | |
149 * Mutli-byte run length encoding. | |
150 * | |
151 * Steps along in words of size 'rsz'. Unlike XRLE above this does run-length | |
152 * encoding by writing out an additional "length" word every time 2 or more | |
153 * words in a row are spotted. This removes the need for a guard byte. | |
154 * | |
155 * Additionally this method ensures that both input and output formats remain | |
156 * aligned on words of size 'rsz'. | |
157 * | |
158 * Arguments: | |
159 * uncomp Input data | |
160 * uncomp_len Length of input data 'uncomp' | |
161 * rsz Size of blocks to compare for run checking. | |
162 * comp_len Output: length of compressed data | |
163 * | |
164 * Returns: | |
165 * Compressed data if successful | |
166 * NULL if not successful | |
167 */ | |
168 char *xrle2(char *uncomp, int uncomp_len, int rsz, int *comp_len); | |
169 | |
170 /* | |
171 * Reverses multi-byte run length encoding (xrle_new). | |
172 * | |
173 * Arguments: | |
174 * comp Compressed input data | |
175 * comp_len Length of comp data | |
176 * uncomp_len Output: length of uncompressed data | |
177 * | |
178 * Returns: | |
179 * Uncompressed data if successful | |
180 * NULL if not successful | |
181 */ | |
182 char *unxrle2(char *comp, int comp_len, int *uncomp_len); | |
183 | |
184 /* | |
185 * decorrelate1() | |
186 * | |
187 * Produce successive deltas from a 1-byte array. | |
188 * | |
189 * Arguments: | |
190 * uncomp Uncompressed data | |
191 * uncomp_len Length of uncompressed data | |
192 * level Differencing level (must be 1, 2 or 3) | |
193 * comp_len Return: where to store new compressed length | |
194 * | |
195 * Returns: | |
196 * Success: A decorrelated buffer (malloced) | |
197 * Failure: NULL | |
198 */ | |
199 char *decorrelate1(char *uncomp, int uncomp_len, int level, int *comp_len); | |
200 char *decorrelate1dyn(char *s_uncomp, int uncomp_len, int *comp_len); | |
201 | |
202 /* | |
203 * recorrelate1() | |
204 * | |
205 * The reverse of decorrelate1() | |
206 * | |
207 * Arguments: | |
208 * comp Compressed input data | |
209 * comp_len Length of comp data | |
210 * uncomp_len Output: length of uncompressed data | |
211 * | |
212 * Returns: | |
213 * Success: uncompressed data | |
214 * Failure: NULL | |
215 */ | |
216 char *recorrelate1(char *comp, int comp_len, int *uncomp_len); | |
217 | |
218 /* | |
219 * decorrelate2() | |
220 * | |
221 * Produce successive deltas from a 2-byte array (big endian) | |
222 * | |
223 * Arguments: | |
224 * uncomp Uncompressed data | |
225 * uncomp_len Length of uncompressed data | |
226 * level Differencing level (must be 1, 2 or 3) | |
227 * comp_len Return: where to store new compressed length | |
228 * | |
229 * Returns: | |
230 * Success: A decorrelated buffer (malloced) | |
231 * Failure: NULL | |
232 */ | |
233 char *decorrelate2(char *uncomp, int uncomp_len, int level, int *comp_len); | |
234 char *decorrelate2dyn(char *s_uncomp, int uncomp_len, int *comp_len); | |
235 | |
236 /* | |
237 * recorrelate2() | |
238 * | |
239 * The reverse of decorrelate2() | |
240 * | |
241 * Arguments: | |
242 * comp Compressed input data | |
243 * comp_len Length of comp data | |
244 * uncomp_len Output: length of uncompressed data | |
245 * | |
246 * Returns: | |
247 * Success: uncompressed data | |
248 * Failure: NULL | |
249 */ | |
250 char *recorrelate2(char *comp, int comp_len, int *uncomp_len); | |
251 | |
252 /* | |
253 * decorrelate4() | |
254 * | |
255 * Produce successive deltas from a 4-byte array (big endian) | |
256 * | |
257 * Arguments: | |
258 * uncomp Uncompressed data | |
259 * uncomp_len Length of uncompressed data | |
260 * level Differencing level (must be 1, 2 or 3) | |
261 * comp_len Return: where to store new compressed length | |
262 * | |
263 * Returns: | |
264 * Success: A decorrelated buffer (malloced) | |
265 * Failure: NULL | |
266 */ | |
267 char *decorrelate4(char *uncomp, int uncomp_len, int level, int *comp_len); | |
268 | |
269 /* | |
270 * recorrelate4() | |
271 * | |
272 * The reverse of decorrelate4() | |
273 * | |
274 * Arguments: | |
275 * comp Compressed input data | |
276 * comp_len Length of comp data | |
277 * uncomp_len Output: length of uncompressed data | |
278 * | |
279 * Returns: | |
280 * Success: uncompressed data | |
281 * Failure: NULL | |
282 */ | |
283 char *recorrelate4(char *comp, int comp_len, int *uncomp_len); | |
284 | |
285 /* | |
286 * shrink_16to8() | |
287 * | |
288 * Stores an array of 16-bit (big endian) array elements in an 8-bit array. | |
289 * We assume that most 16-bit elements encode numbers that fit in an 8-bit | |
290 * value. When not possible, we store a marker followed by the 16-bit value | |
291 * stored as multiple 8-bit values. | |
292 * | |
293 * uncomp Uncompressed data | |
294 * uncomp_len Length of uncompressed data (in bytes) | |
295 * comp_len Return: where to store new compressed length | |
296 * | |
297 * Returns: | |
298 * Success: An 8-bit array (malloced) | |
299 * Failure: NULL | |
300 */ | |
301 char *shrink_16to8(char *uncomp, int uncomp_len, int *comp_len); | |
302 | |
303 /* | |
304 * expand_8to16() | |
305 * | |
306 * The opposite of the shrink_16to8() function. | |
307 * | |
308 * comp Compressed input data | |
309 * comp_len Length of comp data (in bytes) | |
310 * uncomp_len Output: length of uncompressed data (in bytes) | |
311 * | |
312 * Returns: | |
313 * Success: Uncompressed data (char *) | |
314 * Failure: NULL | |
315 */ | |
316 char *expand_8to16(char *comp, int comp_len, int *uncomp_len); | |
317 | |
318 /* | |
319 * shrink_32to8() | |
320 * | |
321 * Stores an array of 32-bit (big endian) array elements in an 8-bit array. | |
322 * We assume that most 32-bit elements encode numbers that fit in an 8-bit | |
323 * value. When not possible, we store a marker followed by the 32-bit value | |
324 * stored as multiple 8-bit values. | |
325 * | |
326 * uncomp Uncompressed data | |
327 * uncomp_len Length of uncompressed data (in bytes) | |
328 * comp_len Return: where to store new compressed length | |
329 * | |
330 * Returns: | |
331 * Success: An 8-bit array (malloced) | |
332 * Failure: NULL | |
333 */ | |
334 char *shrink_32to8(char *uncomp, int uncomp_len, int *comp_len); | |
335 | |
336 /* | |
337 * expand_8to32() | |
338 * | |
339 * The opposite of the shrink_32to8() function. | |
340 * | |
341 * comp Compressed input data | |
342 * comp_len Length of comp data (in bytes) | |
343 * uncomp_len Output: length of uncompressed data (in bytes) | |
344 * | |
345 * Returns: | |
346 * Success: Uncompressed data (char *) | |
347 * Failure: NULL | |
348 */ | |
349 char *expand_8to32(char *comp, int comp_len, int *uncomp_len); | |
350 | |
351 char *follow1(char *s_uncomp, | |
352 int uncomp_len, | |
353 int *comp_len); | |
354 | |
355 char *unfollow1(char *s_comp, | |
356 int comp_len, | |
357 int *uncomp_len); | |
358 | |
359 char *ichebcomp(char *uncomp, | |
360 int uncomp_len, | |
361 int *data_len); | |
362 | |
363 char *ichebuncomp(char *comp, | |
364 int comp_len, | |
365 int *uncomp_len); | |
366 | |
367 /* | |
368 * This is a LOSSY compression. It replaces N with 10 * log2(N). | |
369 */ | |
370 char *log2_data(char *x_uncomp, | |
371 int uncomp_len, | |
372 int *comp_len); | |
373 | |
374 char *unlog2_data(char *x_comp, | |
375 int comp_len, | |
376 int *uncomp_len); | |
377 | |
378 /* | |
379 * Implements compression using a set of static huffman codes stored using | |
380 * the Deflate algorithm (and so in this respect it's similar to zlib). | |
381 * | |
382 * The huffman codes though can be previously stored in the ztr object | |
383 * using ztr_add_hcode(). "cset" indicates which numbered stored huffman | |
384 * code set is to be used, or passing zero will use inline codes (ie they | |
385 * are stored in the data stream itself, just as in standard deflate). | |
386 * | |
387 * Arguments: | |
388 * ztr ztr_t pointer; used to find stored code-sets | |
389 * uncomp The uncompressed input data | |
390 * uncomp_len Length of uncomp | |
391 * cset Stored code-set number, zero for inline | |
392 * recsz Record size - only used when cset == 0. | |
393 * comp_len Output: length of compressed data | |
394 * | |
395 * Returns: | |
396 * Compressed data stream if successful + comp_len | |
397 * NULL on failure | |
398 */ | |
399 char *sthuff(ztr_t *ztr, char *uncomp, int uncomp_len, | |
400 int cset, int recsz, int *comp_len); | |
401 char *unsthuff(ztr_t *ztr, char *comp, int comp_len, int *uncomp_len); | |
402 | |
403 /* | |
404 * Reorders quality data from its RAW format to an interleaved 4-byte | |
405 * aligned format. | |
406 * | |
407 * Starting with sequence A1 C2 G3 the raw format is quality of called | |
408 * bases followed by quality of remaining bases: | |
409 * 0 (RAW format) | |
410 * Q(A1) Q(C2) Q(G3) | |
411 * Q(C2) Q(A2) Q(A3) | |
412 * Q(G2) Q(G2) Q(C3) | |
413 * Q(T2) Q(T2) Q(T3) | |
414 * | |
415 * We reorder it to: | |
416 * ZTR_FORM_QSHIFT <any> <any> 0(raw) | |
417 * Q(A1) Q(C1) Q(G1) Q(T1) | |
418 * Q(C2) Q(A2) Q(G2) Q(T2) | |
419 * Q(G3) Q(A3) Q(C3) Q(T3) | |
420 * | |
421 * Returns shifted data on success | |
422 * NULL on failure | |
423 */ | |
424 char *qshift(char *qold, int qlen, int *new_len); | |
425 char *unqshift(char *qold, int qlen, int *new_len); | |
426 | |
427 /* | |
428 * Given a sequence ACTG this shifts trace data from the order: | |
429 * | |
430 * A1A2A3A4 C1C2C3C4 G1G2G3G4 T1T2T3T4 | |
431 * | |
432 * to | |
433 * | |
434 * A1C1G1T1 C2A2G2T2 T3A3C3G3 G4C4C4T4 | |
435 * | |
436 * Ie for each base it ouputs the signal for the called base first | |
437 * followed by the remaining 3 signals in A,C,G,T order (minus the | |
438 * called signal already output). | |
439 */ | |
440 char *tshift(ztr_t *ztr, char *told_c, int tlen, int *new_len); | |
441 char *untshift(ztr_t *ztr, char *told_c, int tlen, int *new_len); | |
442 | |
443 #ifdef __cplusplus | |
444 } | |
445 #endif | |
446 | |
447 #endif /* _COMPRESSION_H_ */ |