annotate bwa-0.6.2/bwtaln.h @ 2:a294fbfcb1db draft default tip

Uploaded BWA
author ashvark
date Fri, 18 Jul 2014 07:55:59 -0400
parents dd1186b11b3b
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
1 #ifndef BWTALN_H
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
2 #define BWTALN_H
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
3
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
4 #include <stdint.h>
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
5 #include "bwt.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
6
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
7 #define BWA_TYPE_NO_MATCH 0
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
8 #define BWA_TYPE_UNIQUE 1
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
9 #define BWA_TYPE_REPEAT 2
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
10 #define BWA_TYPE_MATESW 3
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
11
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
12 #define SAM_FPD 1 // paired
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
13 #define SAM_FPP 2 // properly paired
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
14 #define SAM_FSU 4 // self-unmapped
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
15 #define SAM_FMU 8 // mate-unmapped
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
16 #define SAM_FSR 16 // self on the reverse strand
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
17 #define SAM_FMR 32 // mate on the reverse strand
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
18 #define SAM_FR1 64 // this is read one
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
19 #define SAM_FR2 128 // this is read two
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
20 #define SAM_FSC 256 // secondary alignment
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
21
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
22 #define BWA_AVG_ERR 0.02
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
23 #define BWA_MIN_RDLEN 35 // for read trimming
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
24
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
25 #define BWA_MAX_BCLEN 63 // maximum barcode length; 127 is the maximum
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
26
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
27 #ifndef bns_pac
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
28 #define bns_pac(pac, k) ((pac)[(k)>>2] >> ((~(k)&3)<<1) & 3)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
29 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
30
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
31 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
32 bwtint_t w;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
33 int bid;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
34 } bwt_width_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
35
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
36 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
37 uint32_t n_mm:16, n_gapo:8, n_gape:8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
38 int score;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
39 bwtint_t k, l;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
40 } bwt_aln1_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
41
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
42 typedef uint16_t bwa_cigar_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
43 /* rgoya: If changing order of bytes, beware of operations like:
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
44 * s->cigar[0] += s->full_len - s->len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
45 */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
46 #define CIGAR_OP_SHIFT 14
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
47 #define CIGAR_LN_MASK 0x3fff
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
48
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
49 #define __cigar_op(__cigar) ((__cigar)>>CIGAR_OP_SHIFT)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
50 #define __cigar_len(__cigar) ((__cigar)&CIGAR_LN_MASK)
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
51 #define __cigar_create(__op, __len) ((__op)<<CIGAR_OP_SHIFT | (__len))
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
52
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
53 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
54 uint32_t n_cigar:15, gap:8, mm:8, strand:1;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
55 bwtint_t pos;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
56 bwa_cigar_t *cigar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
57 } bwt_multi1_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
58
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
59 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
60 char *name;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
61 ubyte_t *seq, *rseq, *qual;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
62 uint32_t len:20, strand:1, type:2, dummy:1, extra_flag:8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
63 uint32_t n_mm:8, n_gapo:8, n_gape:8, mapQ:8;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
64 int score;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
65 int clip_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
66 // alignments in SA coordinates
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
67 int n_aln;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
68 bwt_aln1_t *aln;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
69 // multiple hits
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
70 int n_multi;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
71 bwt_multi1_t *multi;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
72 // alignment information
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
73 bwtint_t sa, pos;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
74 uint64_t c1:28, c2:28, seQ:8; // number of top1 and top2 hits; single-end mapQ
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
75 int n_cigar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
76 bwa_cigar_t *cigar;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
77 // for multi-threading only
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
78 int tid;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
79 // barcode
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
80 char bc[BWA_MAX_BCLEN+1]; // null terminated; up to BWA_MAX_BCLEN bases
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
81 // NM and MD tags
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
82 uint32_t full_len:20, nm:12;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
83 char *md;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
84 } bwa_seq_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
85
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
86 #define BWA_MODE_GAPE 0x01
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
87 #define BWA_MODE_COMPREAD 0x02
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
88 #define BWA_MODE_LOGGAP 0x04
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
89 #define BWA_MODE_CFY 0x08
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
90 #define BWA_MODE_NONSTOP 0x10
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
91 #define BWA_MODE_BAM 0x20
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
92 #define BWA_MODE_BAM_SE 0x40
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
93 #define BWA_MODE_BAM_READ1 0x80
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
94 #define BWA_MODE_BAM_READ2 0x100
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
95 #define BWA_MODE_IL13 0x200
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
96
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
97 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
98 int s_mm, s_gapo, s_gape;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
99 int mode; // bit 24-31 are the barcode length
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
100 int indel_end_skip, max_del_occ, max_entries;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
101 float fnr;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
102 int max_diff, max_gapo, max_gape;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
103 int max_seed_diff, seed_len;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
104 int n_threads;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
105 int max_top2;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
106 int trim_qual;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
107 } gap_opt_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
108
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
109 #define BWA_PET_STD 1
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
110 #define BWA_PET_SOLID 2
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
111
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
112 typedef struct {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
113 int max_isize, force_isize;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
114 int max_occ;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
115 int n_multi, N_multi;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
116 int type, is_sw, is_preload;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
117 double ap_prior;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
118 } pe_opt_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
119
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
120 struct __bwa_seqio_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
121 typedef struct __bwa_seqio_t bwa_seqio_t;
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
122
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
123 #ifdef __cplusplus
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
124 extern "C" {
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
125 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
126
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
127 gap_opt_t *gap_init_opt();
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
128 void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
129
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
130 bwa_seqio_t *bwa_seq_open(const char *fn);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
131 bwa_seqio_t *bwa_bam_open(const char *fn, int which);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
132 void bwa_seq_close(bwa_seqio_t *bs);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
133 void seq_reverse(int len, ubyte_t *seq, int is_comp);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
134 bwa_seq_t *bwa_read_seq(bwa_seqio_t *seq, int n_needed, int *n, int mode, int trim_qual);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
135 void bwa_free_read_seq(int n_seqs, bwa_seq_t *seqs);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
136
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
137 int bwa_cal_maxdiff(int l, double err, double thres);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
138 void bwa_cal_sa_reg_gap(int tid, bwt_t *const bwt, int n_seqs, bwa_seq_t *seqs, const gap_opt_t *opt);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
139
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
140 void bwa_cs2nt_core(bwa_seq_t *p, bwtint_t l_pac, ubyte_t *pac);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
141
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
142
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
143 /* rgoya: Temporary clone of aln_path2cigar to accomodate for bwa_cigar_t,
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
144 __cigar_op and __cigar_len while keeping stdaln stand alone */
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
145 #include "stdaln.h"
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
146
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
147 bwa_cigar_t *bwa_aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
148
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
149 #ifdef __cplusplus
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
150 }
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
151 #endif
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
152
dd1186b11b3b Uploaded BWA
ashvark
parents:
diff changeset
153 #endif