Mercurial > repos > siyuan > prada
comparison pyPRADA_1.2/tools/samtools-0.1.16/bam.h @ 0:acc2ca1a3ba4
Uploaded
author | siyuan |
---|---|
date | Thu, 20 Feb 2014 00:44:58 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acc2ca1a3ba4 |
---|---|
1 /* The MIT License | |
2 | |
3 Copyright (c) 2008-2010 Genome Research Ltd (GRL). | |
4 | |
5 Permission is hereby granted, free of charge, to any person obtaining | |
6 a copy of this software and associated documentation files (the | |
7 "Software"), to deal in the Software without restriction, including | |
8 without limitation the rights to use, copy, modify, merge, publish, | |
9 distribute, sublicense, and/or sell copies of the Software, and to | |
10 permit persons to whom the Software is furnished to do so, subject to | |
11 the following conditions: | |
12 | |
13 The above copyright notice and this permission notice shall be | |
14 included in all copies or substantial portions of the Software. | |
15 | |
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
23 SOFTWARE. | |
24 */ | |
25 | |
26 /* Contact: Heng Li <lh3@sanger.ac.uk> */ | |
27 | |
28 #ifndef BAM_BAM_H | |
29 #define BAM_BAM_H | |
30 | |
31 /*! | |
32 @header | |
33 | |
34 BAM library provides I/O and various operations on manipulating files | |
35 in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map) | |
36 format. It now supports importing from or exporting to SAM, sorting, | |
37 merging, generating pileup, and quickly retrieval of reads overlapped | |
38 with a specified region. | |
39 | |
40 @copyright Genome Research Ltd. | |
41 */ | |
42 | |
43 #define BAM_VERSION "0.1.16 (r963:234)" | |
44 | |
45 #include <stdint.h> | |
46 #include <stdlib.h> | |
47 #include <string.h> | |
48 #include <stdio.h> | |
49 | |
50 #ifndef BAM_LITE | |
51 #define BAM_VIRTUAL_OFFSET16 | |
52 #include "bgzf.h" | |
53 /*! @abstract BAM file handler */ | |
54 typedef BGZF *bamFile; | |
55 #define bam_open(fn, mode) bgzf_open(fn, mode) | |
56 #define bam_dopen(fd, mode) bgzf_fdopen(fd, mode) | |
57 #define bam_close(fp) bgzf_close(fp) | |
58 #define bam_read(fp, buf, size) bgzf_read(fp, buf, size) | |
59 #define bam_write(fp, buf, size) bgzf_write(fp, buf, size) | |
60 #define bam_tell(fp) bgzf_tell(fp) | |
61 #define bam_seek(fp, pos, dir) bgzf_seek(fp, pos, dir) | |
62 #else | |
63 #define BAM_TRUE_OFFSET | |
64 #include <zlib.h> | |
65 typedef gzFile bamFile; | |
66 #define bam_open(fn, mode) gzopen(fn, mode) | |
67 #define bam_dopen(fd, mode) gzdopen(fd, mode) | |
68 #define bam_close(fp) gzclose(fp) | |
69 #define bam_read(fp, buf, size) gzread(fp, buf, size) | |
70 /* no bam_write/bam_tell/bam_seek() here */ | |
71 #endif | |
72 | |
73 /*! @typedef | |
74 @abstract Structure for the alignment header. | |
75 @field n_targets number of reference sequences | |
76 @field target_name names of the reference sequences | |
77 @field target_len lengths of the referene sequences | |
78 @field dict header dictionary | |
79 @field hash hash table for fast name lookup | |
80 @field rg2lib hash table for @RG-ID -> LB lookup | |
81 @field l_text length of the plain text in the header | |
82 @field text plain text | |
83 | |
84 @discussion Field hash points to null by default. It is a private | |
85 member. | |
86 */ | |
87 typedef struct { | |
88 int32_t n_targets; | |
89 char **target_name; | |
90 uint32_t *target_len; | |
91 void *dict, *hash, *rg2lib; | |
92 size_t l_text, n_text; | |
93 char *text; | |
94 } bam_header_t; | |
95 | |
96 /*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */ | |
97 #define BAM_FPAIRED 1 | |
98 /*! @abstract the read is mapped in a proper pair */ | |
99 #define BAM_FPROPER_PAIR 2 | |
100 /*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */ | |
101 #define BAM_FUNMAP 4 | |
102 /*! @abstract the mate is unmapped */ | |
103 #define BAM_FMUNMAP 8 | |
104 /*! @abstract the read is mapped to the reverse strand */ | |
105 #define BAM_FREVERSE 16 | |
106 /*! @abstract the mate is mapped to the reverse strand */ | |
107 #define BAM_FMREVERSE 32 | |
108 /*! @abstract this is read1 */ | |
109 #define BAM_FREAD1 64 | |
110 /*! @abstract this is read2 */ | |
111 #define BAM_FREAD2 128 | |
112 /*! @abstract not primary alignment */ | |
113 #define BAM_FSECONDARY 256 | |
114 /*! @abstract QC failure */ | |
115 #define BAM_FQCFAIL 512 | |
116 /*! @abstract optical or PCR duplicate */ | |
117 #define BAM_FDUP 1024 | |
118 | |
119 #define BAM_OFDEC 0 | |
120 #define BAM_OFHEX 1 | |
121 #define BAM_OFSTR 2 | |
122 | |
123 /*! @abstract defautl mask for pileup */ | |
124 #define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP) | |
125 | |
126 #define BAM_CORE_SIZE sizeof(bam1_core_t) | |
127 | |
128 /** | |
129 * Describing how CIGAR operation/length is packed in a 32-bit integer. | |
130 */ | |
131 #define BAM_CIGAR_SHIFT 4 | |
132 #define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1) | |
133 | |
134 /* | |
135 CIGAR operations. | |
136 */ | |
137 /*! @abstract CIGAR: match */ | |
138 #define BAM_CMATCH 0 | |
139 /*! @abstract CIGAR: insertion to the reference */ | |
140 #define BAM_CINS 1 | |
141 /*! @abstract CIGAR: deletion from the reference */ | |
142 #define BAM_CDEL 2 | |
143 /*! @abstract CIGAR: skip on the reference (e.g. spliced alignment) */ | |
144 #define BAM_CREF_SKIP 3 | |
145 /*! @abstract CIGAR: clip on the read with clipped sequence present in qseq */ | |
146 #define BAM_CSOFT_CLIP 4 | |
147 /*! @abstract CIGAR: clip on the read with clipped sequence trimmed off */ | |
148 #define BAM_CHARD_CLIP 5 | |
149 /*! @abstract CIGAR: padding */ | |
150 #define BAM_CPAD 6 | |
151 | |
152 /*! @typedef | |
153 @abstract Structure for core alignment information. | |
154 @field tid chromosome ID, defined by bam_header_t | |
155 @field pos 0-based leftmost coordinate | |
156 @field strand strand; 0 for forward and 1 otherwise | |
157 @field bin bin calculated by bam_reg2bin() | |
158 @field qual mapping quality | |
159 @field l_qname length of the query name | |
160 @field flag bitwise flag | |
161 @field n_cigar number of CIGAR operations | |
162 @field l_qseq length of the query sequence (read) | |
163 */ | |
164 typedef struct { | |
165 int32_t tid; | |
166 int32_t pos; | |
167 uint32_t bin:16, qual:8, l_qname:8; | |
168 uint32_t flag:16, n_cigar:16; | |
169 int32_t l_qseq; | |
170 int32_t mtid; | |
171 int32_t mpos; | |
172 int32_t isize; | |
173 } bam1_core_t; | |
174 | |
175 /*! @typedef | |
176 @abstract Structure for one alignment. | |
177 @field core core information about the alignment | |
178 @field l_aux length of auxiliary data | |
179 @field data_len current length of bam1_t::data | |
180 @field m_data maximum length of bam1_t::data | |
181 @field data all variable-length data, concatenated; structure: cigar-qname-seq-qual-aux | |
182 | |
183 @discussion Notes: | |
184 | |
185 1. qname is zero tailing and core.l_qname includes the tailing '\0'. | |
186 2. l_qseq is calculated from the total length of an alignment block | |
187 on reading or from CIGAR. | |
188 */ | |
189 typedef struct { | |
190 bam1_core_t core; | |
191 int l_aux, data_len, m_data; | |
192 uint8_t *data; | |
193 } bam1_t; | |
194 | |
195 typedef struct __bam_iter_t *bam_iter_t; | |
196 | |
197 #define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0) | |
198 #define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0) | |
199 | |
200 /*! @function | |
201 @abstract Get the CIGAR array | |
202 @param b pointer to an alignment | |
203 @return pointer to the CIGAR array | |
204 | |
205 @discussion In the CIGAR array, each element is a 32-bit integer. The | |
206 lower 4 bits gives a CIGAR operation and the higher 28 bits keep the | |
207 length of a CIGAR. | |
208 */ | |
209 #define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname)) | |
210 | |
211 /*! @function | |
212 @abstract Get the name of the query | |
213 @param b pointer to an alignment | |
214 @return pointer to the name string, null terminated | |
215 */ | |
216 #define bam1_qname(b) ((char*)((b)->data)) | |
217 | |
218 /*! @function | |
219 @abstract Get query sequence | |
220 @param b pointer to an alignment | |
221 @return pointer to sequence | |
222 | |
223 @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G, | |
224 8 for T and 15 for N. Two bases are packed in one byte with the base | |
225 at the higher 4 bits having smaller coordinate on the read. It is | |
226 recommended to use bam1_seqi() macro to get the base. | |
227 */ | |
228 #define bam1_seq(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname) | |
229 | |
230 /*! @function | |
231 @abstract Get query quality | |
232 @param b pointer to an alignment | |
233 @return pointer to quality string | |
234 */ | |
235 #define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1)) | |
236 | |
237 /*! @function | |
238 @abstract Get a base on read | |
239 @param s Query sequence returned by bam1_seq() | |
240 @param i The i-th position, 0-based | |
241 @return 4-bit integer representing the base. | |
242 */ | |
243 #define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf) | |
244 | |
245 /*! @function | |
246 @abstract Get query sequence and quality | |
247 @param b pointer to an alignment | |
248 @return pointer to the concatenated auxiliary data | |
249 */ | |
250 #define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2) | |
251 | |
252 #ifndef kroundup32 | |
253 /*! @function | |
254 @abstract Round an integer to the next closest power-2 integer. | |
255 @param x integer to be rounded (in place) | |
256 @discussion x will be modified. | |
257 */ | |
258 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) | |
259 #endif | |
260 | |
261 /*! | |
262 @abstract Whether the machine is big-endian; modified only in | |
263 bam_header_init(). | |
264 */ | |
265 extern int bam_is_be; | |
266 | |
267 /*! | |
268 @abstract Verbose level between 0 and 3; 0 is supposed to disable all | |
269 debugging information, though this may not have been implemented. | |
270 */ | |
271 extern int bam_verbose; | |
272 | |
273 /*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */ | |
274 extern unsigned char bam_nt16_table[256]; | |
275 | |
276 /*! @abstract Table for converting a 4-bit encoded nucleotide to a letter. */ | |
277 extern char *bam_nt16_rev_table; | |
278 | |
279 extern char bam_nt16_nt4_table[]; | |
280 | |
281 #ifdef __cplusplus | |
282 extern "C" { | |
283 #endif | |
284 | |
285 /********************* | |
286 * Low-level SAM I/O * | |
287 *********************/ | |
288 | |
289 /*! @abstract TAM file handler */ | |
290 typedef struct __tamFile_t *tamFile; | |
291 | |
292 /*! | |
293 @abstract Open a SAM file for reading, either uncompressed or compressed by gzip/zlib. | |
294 @param fn SAM file name | |
295 @return SAM file handler | |
296 */ | |
297 tamFile sam_open(const char *fn); | |
298 | |
299 /*! | |
300 @abstract Close a SAM file handler | |
301 @param fp SAM file handler | |
302 */ | |
303 void sam_close(tamFile fp); | |
304 | |
305 /*! | |
306 @abstract Read one alignment from a SAM file handler | |
307 @param fp SAM file handler | |
308 @param header header information (ordered names of chromosomes) | |
309 @param b read alignment; all members in b will be updated | |
310 @return 0 if successful; otherwise negative | |
311 */ | |
312 int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b); | |
313 | |
314 /*! | |
315 @abstract Read header information from a TAB-delimited list file. | |
316 @param fn_list file name for the list | |
317 @return a pointer to the header structure | |
318 | |
319 @discussion Each line in this file consists of chromosome name and | |
320 the length of chromosome. | |
321 */ | |
322 bam_header_t *sam_header_read2(const char *fn_list); | |
323 | |
324 /*! | |
325 @abstract Read header from a SAM file (if present) | |
326 @param fp SAM file handler | |
327 @return pointer to header struct; 0 if no @SQ lines available | |
328 */ | |
329 bam_header_t *sam_header_read(tamFile fp); | |
330 | |
331 /*! | |
332 @abstract Parse @SQ lines a update a header struct | |
333 @param h pointer to the header struct to be updated | |
334 @return number of target sequences | |
335 | |
336 @discussion bam_header_t::{n_targets,target_len,target_name} will | |
337 be destroyed in the first place. | |
338 */ | |
339 int sam_header_parse(bam_header_t *h); | |
340 int32_t bam_get_tid(const bam_header_t *header, const char *seq_name); | |
341 | |
342 /*! | |
343 @abstract Parse @RG lines a update a header struct | |
344 @param h pointer to the header struct to be updated | |
345 @return number of @RG lines | |
346 | |
347 @discussion bam_header_t::rg2lib will be destroyed in the first | |
348 place. | |
349 */ | |
350 int sam_header_parse_rg(bam_header_t *h); | |
351 | |
352 #define sam_write1(header, b) bam_view1(header, b) | |
353 | |
354 | |
355 /******************************** | |
356 * APIs for string dictionaries * | |
357 ********************************/ | |
358 | |
359 int bam_strmap_put(void *strmap, const char *rg, const char *lib); | |
360 const char *bam_strmap_get(const void *strmap, const char *rg); | |
361 void *bam_strmap_dup(const void*); | |
362 void *bam_strmap_init(); | |
363 void bam_strmap_destroy(void *strmap); | |
364 | |
365 | |
366 /********************* | |
367 * Low-level BAM I/O * | |
368 *********************/ | |
369 | |
370 /*! | |
371 @abstract Initialize a header structure. | |
372 @return the pointer to the header structure | |
373 | |
374 @discussion This function also modifies the global variable | |
375 bam_is_be. | |
376 */ | |
377 bam_header_t *bam_header_init(); | |
378 | |
379 /*! | |
380 @abstract Destroy a header structure. | |
381 @param header pointer to the header | |
382 */ | |
383 void bam_header_destroy(bam_header_t *header); | |
384 | |
385 /*! | |
386 @abstract Read a header structure from BAM. | |
387 @param fp BAM file handler, opened by bam_open() | |
388 @return pointer to the header structure | |
389 | |
390 @discussion The file position indicator must be placed at the | |
391 beginning of the file. Upon success, the position indicator will | |
392 be set at the start of the first alignment. | |
393 */ | |
394 bam_header_t *bam_header_read(bamFile fp); | |
395 | |
396 /*! | |
397 @abstract Write a header structure to BAM. | |
398 @param fp BAM file handler | |
399 @param header pointer to the header structure | |
400 @return always 0 currently | |
401 */ | |
402 int bam_header_write(bamFile fp, const bam_header_t *header); | |
403 | |
404 /*! | |
405 @abstract Read an alignment from BAM. | |
406 @param fp BAM file handler | |
407 @param b read alignment; all members are updated. | |
408 @return number of bytes read from the file | |
409 | |
410 @discussion The file position indicator must be | |
411 placed right before an alignment. Upon success, this function | |
412 will set the position indicator to the start of the next | |
413 alignment. This function is not affected by the machine | |
414 endianness. | |
415 */ | |
416 int bam_read1(bamFile fp, bam1_t *b); | |
417 | |
418 /*! | |
419 @abstract Write an alignment to BAM. | |
420 @param fp BAM file handler | |
421 @param c pointer to the bam1_core_t structure | |
422 @param data_len total length of variable size data related to | |
423 the alignment | |
424 @param data pointer to the concatenated data | |
425 @return number of bytes written to the file | |
426 | |
427 @discussion This function is not affected by the machine | |
428 endianness. | |
429 */ | |
430 int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8_t *data); | |
431 | |
432 /*! | |
433 @abstract Write an alignment to BAM. | |
434 @param fp BAM file handler | |
435 @param b alignment to write | |
436 @return number of bytes written to the file | |
437 | |
438 @abstract It is equivalent to: | |
439 bam_write1_core(fp, &b->core, b->data_len, b->data) | |
440 */ | |
441 int bam_write1(bamFile fp, const bam1_t *b); | |
442 | |
443 /*! @function | |
444 @abstract Initiate a pointer to bam1_t struct | |
445 */ | |
446 #define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t))) | |
447 | |
448 /*! @function | |
449 @abstract Free the memory allocated for an alignment. | |
450 @param b pointer to an alignment | |
451 */ | |
452 #define bam_destroy1(b) do { \ | |
453 if (b) { free((b)->data); free(b); } \ | |
454 } while (0) | |
455 | |
456 /*! | |
457 @abstract Format a BAM record in the SAM format | |
458 @param header pointer to the header structure | |
459 @param b alignment to print | |
460 @return a pointer to the SAM string | |
461 */ | |
462 char *bam_format1(const bam_header_t *header, const bam1_t *b); | |
463 | |
464 char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of); | |
465 | |
466 /*! | |
467 @abstract Check whether a BAM record is plausibly valid | |
468 @param header associated header structure, or NULL if unavailable | |
469 @param b alignment to validate | |
470 @return 0 if the alignment is invalid; non-zero otherwise | |
471 | |
472 @discussion Simple consistency check of some of the fields of the | |
473 alignment record. If the header is provided, several additional checks | |
474 are made. Not all fields are checked, so a non-zero result is not a | |
475 guarantee that the record is valid. However it is usually good enough | |
476 to detect when bam_seek() has been called with a virtual file offset | |
477 that is not the offset of an alignment record. | |
478 */ | |
479 int bam_validate1(const bam_header_t *header, const bam1_t *b); | |
480 | |
481 const char *bam_get_library(bam_header_t *header, const bam1_t *b); | |
482 | |
483 | |
484 /*************** | |
485 * pileup APIs * | |
486 ***************/ | |
487 | |
488 /*! @typedef | |
489 @abstract Structure for one alignment covering the pileup position. | |
490 @field b pointer to the alignment | |
491 @field qpos position of the read base at the pileup site, 0-based | |
492 @field indel indel length; 0 for no indel, positive for ins and negative for del | |
493 @field is_del 1 iff the base on the padded read is a deletion | |
494 @field level the level of the read in the "viewer" mode | |
495 | |
496 @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The | |
497 difference between the two functions is that the former does not | |
498 set bam_pileup1_t::level, while the later does. Level helps the | |
499 implementation of alignment viewers, but calculating this has some | |
500 overhead. | |
501 */ | |
502 typedef struct { | |
503 bam1_t *b; | |
504 int32_t qpos; | |
505 int indel, level; | |
506 uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28; | |
507 } bam_pileup1_t; | |
508 | |
509 typedef int (*bam_plp_auto_f)(void *data, bam1_t *b); | |
510 | |
511 struct __bam_plp_t; | |
512 typedef struct __bam_plp_t *bam_plp_t; | |
513 | |
514 bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data); | |
515 int bam_plp_push(bam_plp_t iter, const bam1_t *b); | |
516 const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); | |
517 const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp); | |
518 void bam_plp_set_mask(bam_plp_t iter, int mask); | |
519 void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt); | |
520 void bam_plp_reset(bam_plp_t iter); | |
521 void bam_plp_destroy(bam_plp_t iter); | |
522 | |
523 struct __bam_mplp_t; | |
524 typedef struct __bam_mplp_t *bam_mplp_t; | |
525 | |
526 bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data); | |
527 void bam_mplp_destroy(bam_mplp_t iter); | |
528 void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt); | |
529 int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp); | |
530 | |
531 /*! @typedef | |
532 @abstract Type of function to be called by bam_plbuf_push(). | |
533 @param tid chromosome ID as is defined in the header | |
534 @param pos start coordinate of the alignment, 0-based | |
535 @param n number of elements in pl array | |
536 @param pl array of alignments | |
537 @param data user provided data | |
538 @discussion See also bam_plbuf_push(), bam_plbuf_init() and bam_pileup1_t. | |
539 */ | |
540 typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); | |
541 | |
542 typedef struct { | |
543 bam_plp_t iter; | |
544 bam_pileup_f func; | |
545 void *data; | |
546 } bam_plbuf_t; | |
547 | |
548 void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask); | |
549 void bam_plbuf_reset(bam_plbuf_t *buf); | |
550 bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data); | |
551 void bam_plbuf_destroy(bam_plbuf_t *buf); | |
552 int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf); | |
553 | |
554 int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data); | |
555 | |
556 struct __bam_lplbuf_t; | |
557 typedef struct __bam_lplbuf_t bam_lplbuf_t; | |
558 | |
559 void bam_lplbuf_reset(bam_lplbuf_t *buf); | |
560 | |
561 /*! @abstract bam_plbuf_init() equivalent with level calculated. */ | |
562 bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data); | |
563 | |
564 /*! @abstract bam_plbuf_destroy() equivalent with level calculated. */ | |
565 void bam_lplbuf_destroy(bam_lplbuf_t *tv); | |
566 | |
567 /*! @abstract bam_plbuf_push() equivalent with level calculated. */ | |
568 int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf); | |
569 | |
570 | |
571 /********************* | |
572 * BAM indexing APIs * | |
573 *********************/ | |
574 | |
575 struct __bam_index_t; | |
576 typedef struct __bam_index_t bam_index_t; | |
577 | |
578 /*! | |
579 @abstract Build index for a BAM file. | |
580 @discussion Index file "fn.bai" will be created. | |
581 @param fn name of the BAM file | |
582 @return always 0 currently | |
583 */ | |
584 int bam_index_build(const char *fn); | |
585 | |
586 /*! | |
587 @abstract Load index from file "fn.bai". | |
588 @param fn name of the BAM file (NOT the index file) | |
589 @return pointer to the index structure | |
590 */ | |
591 bam_index_t *bam_index_load(const char *fn); | |
592 | |
593 /*! | |
594 @abstract Destroy an index structure. | |
595 @param idx pointer to the index structure | |
596 */ | |
597 void bam_index_destroy(bam_index_t *idx); | |
598 | |
599 /*! @typedef | |
600 @abstract Type of function to be called by bam_fetch(). | |
601 @param b the alignment | |
602 @param data user provided data | |
603 */ | |
604 typedef int (*bam_fetch_f)(const bam1_t *b, void *data); | |
605 | |
606 /*! | |
607 @abstract Retrieve the alignments that are overlapped with the | |
608 specified region. | |
609 | |
610 @discussion A user defined function will be called for each | |
611 retrieved alignment ordered by its start position. | |
612 | |
613 @param fp BAM file handler | |
614 @param idx pointer to the alignment index | |
615 @param tid chromosome ID as is defined in the header | |
616 @param beg start coordinate, 0-based | |
617 @param end end coordinate, 0-based | |
618 @param data user provided data (will be transferred to func) | |
619 @param func user defined function | |
620 */ | |
621 int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func); | |
622 | |
623 bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end); | |
624 int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b); | |
625 void bam_iter_destroy(bam_iter_t iter); | |
626 | |
627 /*! | |
628 @abstract Parse a region in the format: "chr2:100,000-200,000". | |
629 @discussion bam_header_t::hash will be initialized if empty. | |
630 @param header pointer to the header structure | |
631 @param str string to be parsed | |
632 @param ref_id the returned chromosome ID | |
633 @param begin the returned start coordinate | |
634 @param end the returned end coordinate | |
635 @return 0 on success; -1 on failure | |
636 */ | |
637 int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end); | |
638 | |
639 | |
640 /************************** | |
641 * APIs for optional tags * | |
642 **************************/ | |
643 | |
644 /*! | |
645 @abstract Retrieve data of a tag | |
646 @param b pointer to an alignment struct | |
647 @param tag two-character tag to be retrieved | |
648 | |
649 @return pointer to the type and data. The first character is the | |
650 type that can be 'iIsScCdfAZH'. | |
651 | |
652 @discussion Use bam_aux2?() series to convert the returned data to | |
653 the corresponding type. | |
654 */ | |
655 uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]); | |
656 | |
657 int32_t bam_aux2i(const uint8_t *s); | |
658 float bam_aux2f(const uint8_t *s); | |
659 double bam_aux2d(const uint8_t *s); | |
660 char bam_aux2A(const uint8_t *s); | |
661 char *bam_aux2Z(const uint8_t *s); | |
662 | |
663 int bam_aux_del(bam1_t *b, uint8_t *s); | |
664 void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data); | |
665 uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get() | |
666 | |
667 | |
668 /***************** | |
669 * Miscellaneous * | |
670 *****************/ | |
671 | |
672 /*! | |
673 @abstract Calculate the rightmost coordinate of an alignment on the | |
674 reference genome. | |
675 | |
676 @param c pointer to the bam1_core_t structure | |
677 @param cigar the corresponding CIGAR array (from bam1_t::cigar) | |
678 @return the rightmost coordinate, 0-based | |
679 */ | |
680 uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar); | |
681 | |
682 /*! | |
683 @abstract Calculate the length of the query sequence from CIGAR. | |
684 @param c pointer to the bam1_core_t structure | |
685 @param cigar the corresponding CIGAR array (from bam1_t::cigar) | |
686 @return length of the query sequence | |
687 */ | |
688 int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar); | |
689 | |
690 #ifdef __cplusplus | |
691 } | |
692 #endif | |
693 | |
694 /*! | |
695 @abstract Calculate the minimum bin that contains a region [beg,end). | |
696 @param beg start of the region, 0-based | |
697 @param end end of the region, 0-based | |
698 @return bin | |
699 */ | |
700 static inline int bam_reg2bin(uint32_t beg, uint32_t end) | |
701 { | |
702 --end; | |
703 if (beg>>14 == end>>14) return 4681 + (beg>>14); | |
704 if (beg>>17 == end>>17) return 585 + (beg>>17); | |
705 if (beg>>20 == end>>20) return 73 + (beg>>20); | |
706 if (beg>>23 == end>>23) return 9 + (beg>>23); | |
707 if (beg>>26 == end>>26) return 1 + (beg>>26); | |
708 return 0; | |
709 } | |
710 | |
711 /*! | |
712 @abstract Copy an alignment | |
713 @param bdst destination alignment struct | |
714 @param bsrc source alignment struct | |
715 @return pointer to the destination alignment struct | |
716 */ | |
717 static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc) | |
718 { | |
719 uint8_t *data = bdst->data; | |
720 int m_data = bdst->m_data; // backup data and m_data | |
721 if (m_data < bsrc->data_len) { // double the capacity | |
722 m_data = bsrc->data_len; kroundup32(m_data); | |
723 data = (uint8_t*)realloc(data, m_data); | |
724 } | |
725 memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data | |
726 *bdst = *bsrc; // copy the rest | |
727 // restore the backup | |
728 bdst->m_data = m_data; | |
729 bdst->data = data; | |
730 return bdst; | |
731 } | |
732 | |
733 /*! | |
734 @abstract Duplicate an alignment | |
735 @param src source alignment struct | |
736 @return pointer to the destination alignment struct | |
737 */ | |
738 static inline bam1_t *bam_dup1(const bam1_t *src) | |
739 { | |
740 bam1_t *b; | |
741 b = bam_init1(); | |
742 *b = *src; | |
743 b->m_data = b->data_len; | |
744 b->data = (uint8_t*)calloc(b->data_len, 1); | |
745 memcpy(b->data, src->data, b->data_len); | |
746 return b; | |
747 } | |
748 | |
749 static inline int bam_aux_type2size(int x) | |
750 { | |
751 if (x == 'C' || x == 'c' || x == 'A') return 1; | |
752 else if (x == 'S' || x == 's') return 2; | |
753 else if (x == 'I' || x == 'i' || x == 'f') return 4; | |
754 else return 0; | |
755 } | |
756 | |
757 | |
758 #endif |