Mercurial > repos > youngkim > ezbamqc
comparison ezBAMQC/src/htslib/cram/cram_codecs.h @ 0:dfa3745e5fd8
Uploaded
| author | youngkim |
|---|---|
| date | Thu, 24 Mar 2016 17:12:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfa3745e5fd8 |
|---|---|
| 1 /* | |
| 2 Copyright (c) 2012-2013 Genome Research Ltd. | |
| 3 Author: James Bonfield <jkb@sanger.ac.uk> | |
| 4 | |
| 5 Redistribution and use in source and binary forms, with or without | |
| 6 modification, are permitted provided that the following conditions are met: | |
| 7 | |
| 8 1. Redistributions of source code must retain the above copyright notice, | |
| 9 this list of conditions and the following disclaimer. | |
| 10 | |
| 11 2. Redistributions in binary form must reproduce the above copyright notice, | |
| 12 this list of conditions and the following disclaimer in the documentation | |
| 13 and/or other materials provided with the distribution. | |
| 14 | |
| 15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger | |
| 16 Institute nor the names of its contributors may be used to endorse or promote | |
| 17 products derived from this software without specific prior written permission. | |
| 18 | |
| 19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND | |
| 20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| 22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE | |
| 23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
| 24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
| 25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
| 26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
| 27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 29 */ | |
| 30 | |
| 31 #ifndef _CRAM_ENCODINGS_H_ | |
| 32 #define _CRAM_ENCODINGS_H_ | |
| 33 | |
| 34 #ifdef __cplusplus | |
| 35 extern "C" { | |
| 36 #endif | |
| 37 | |
| 38 #include <inttypes.h> | |
| 39 | |
| 40 struct cram_codec; | |
| 41 | |
| 42 /* | |
| 43 * Slow but simple huffman decoder to start with. | |
| 44 * Read a bit at a time, keeping track of {length, value} | |
| 45 * eg. 1 1 0 1 => {1,1}, {2,3}, {3,6}, {4,13} | |
| 46 * | |
| 47 * Keep track of this through the huffman code table. | |
| 48 * For fast scanning we have an index of where the first code of length X | |
| 49 * appears. | |
| 50 */ | |
| 51 typedef struct { | |
| 52 int32_t symbol; | |
| 53 int32_t p; // next code start value, minus index to codes[] | |
| 54 int32_t code; | |
| 55 int32_t len; | |
| 56 } cram_huffman_code; | |
| 57 | |
| 58 typedef struct { | |
| 59 int ncodes; | |
| 60 cram_huffman_code *codes; | |
| 61 } cram_huffman_decoder; | |
| 62 | |
| 63 #define MAX_HUFF 128 | |
| 64 typedef struct { | |
| 65 cram_huffman_code *codes; | |
| 66 int nvals; | |
| 67 int val2code[MAX_HUFF+1]; // value to code lookup for small values | |
| 68 } cram_huffman_encoder; | |
| 69 | |
| 70 typedef struct { | |
| 71 int32_t offset; | |
| 72 int32_t nbits; | |
| 73 } cram_beta_decoder; | |
| 74 | |
| 75 typedef struct { | |
| 76 int32_t offset; | |
| 77 } cram_gamma_decoder; | |
| 78 | |
| 79 typedef struct { | |
| 80 int32_t offset; | |
| 81 int32_t k; | |
| 82 } cram_subexp_decoder; | |
| 83 | |
| 84 typedef struct { | |
| 85 int32_t content_id; | |
| 86 enum cram_external_type type; | |
| 87 } cram_external_decoder; | |
| 88 | |
| 89 typedef struct { | |
| 90 struct cram_codec *len_codec; | |
| 91 struct cram_codec *value_codec; | |
| 92 } cram_byte_array_len_decoder; | |
| 93 | |
| 94 typedef struct { | |
| 95 unsigned char stop; | |
| 96 int32_t content_id; | |
| 97 } cram_byte_array_stop_decoder; | |
| 98 | |
| 99 typedef struct { | |
| 100 enum cram_encoding len_encoding; | |
| 101 enum cram_encoding val_encoding; | |
| 102 void *len_dat; | |
| 103 void *val_dat; | |
| 104 struct cram_codec *len_codec; | |
| 105 struct cram_codec *val_codec; | |
| 106 } cram_byte_array_len_encoder; | |
| 107 | |
| 108 /* | |
| 109 * A generic codec structure. | |
| 110 */ | |
| 111 typedef struct cram_codec { | |
| 112 enum cram_encoding codec; | |
| 113 cram_block *out; | |
| 114 void (*free)(struct cram_codec *codec); | |
| 115 int (*decode)(cram_slice *slice, struct cram_codec *codec, | |
| 116 cram_block *in, char *out, int *out_size); | |
| 117 int (*encode)(cram_slice *slice, struct cram_codec *codec, | |
| 118 char *in, int in_size); | |
| 119 int (*store)(struct cram_codec *codec, cram_block *b, char *prefix, | |
| 120 int version); | |
| 121 union { | |
| 122 cram_huffman_decoder huffman; | |
| 123 cram_external_decoder external; | |
| 124 cram_beta_decoder beta; | |
| 125 cram_gamma_decoder gamma; | |
| 126 cram_subexp_decoder subexp; | |
| 127 cram_byte_array_len_decoder byte_array_len; | |
| 128 cram_byte_array_stop_decoder byte_array_stop; | |
| 129 | |
| 130 cram_huffman_encoder e_huffman; | |
| 131 cram_external_decoder e_external; | |
| 132 cram_byte_array_stop_decoder e_byte_array_stop; | |
| 133 cram_byte_array_len_encoder e_byte_array_len; | |
| 134 cram_beta_decoder e_beta; | |
| 135 }; | |
| 136 } cram_codec; | |
| 137 | |
| 138 char *cram_encoding2str(enum cram_encoding t); | |
| 139 | |
| 140 cram_codec *cram_decoder_init(enum cram_encoding codec, char *data, int size, | |
| 141 enum cram_external_type option, | |
| 142 int version); | |
| 143 cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st, | |
| 144 enum cram_external_type option, void *dat, | |
| 145 int version); | |
| 146 | |
| 147 //int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size); | |
| 148 //void cram_decoder_free(void *codes); | |
| 149 | |
| 150 //#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++)) | |
| 151 | |
| 152 #define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7) | |
| 153 | |
| 154 /* | |
| 155 * Returns the content_id used by this codec, also in id2 if byte_array_len. | |
| 156 * Returns -1 for the CORE block and -2 for unneeded. | |
| 157 * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs. | |
| 158 */ | |
| 159 int cram_codec_to_id(cram_codec *c, int *id2); | |
| 160 | |
| 161 #ifdef __cplusplus | |
| 162 } | |
| 163 #endif | |
| 164 | |
| 165 #endif /* _CRAM_ENCODINGS_H_ */ |
