Mercurial > repos > ryanmorin > nextgen_variant_identification
comparison SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.h @ 0:74f5ea818cea
Uploaded
author | ryanmorin |
---|---|
date | Wed, 12 Oct 2011 19:50:38 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:74f5ea818cea |
---|---|
1 /* The MIT License | |
2 | |
3 Copyright (c) 2009, by Sohrab Shah <sshah@bccrc.ca> and Rodrigo Goya <rgoya@bcgsc.ca> | |
4 | |
5 Permission is hereby granted, free of charge, to any person obtaining | |
6 a copy of this software and associated documentation files (the | |
7 "Software"), to deal in the Software without restriction, including | |
8 without limitation the rights to use, copy, modify, merge, publish, | |
9 distribute, sublicense, and/or sell copies of the Software, and to | |
10 permit persons to whom the Software is furnished to do so, subject to | |
11 the following conditions: | |
12 | |
13 The above copyright notice and this permission notice shall be | |
14 included in all copies or substantial portions of the Software. | |
15 | |
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | |
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | |
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | |
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
23 SOFTWARE. | |
24 */ | |
25 #include <stdio.h> | |
26 #include <stdlib.h> | |
27 #include <string.h> | |
28 #include <math.h> | |
29 #include <unistd.h> | |
30 #include <stdbool.h> | |
31 #include "sam.h" | |
32 #include "faidx.h" | |
33 #include "khash.h" | |
34 | |
35 #define TYPE_mb 0 | |
36 #define TYPE_m 1 | |
37 #define TYPE_b 2 | |
38 #define TYPE_M 3 | |
39 #define TYPE_Mb 4 | |
40 #define TYPE_MB 5 | |
41 #define TYPE_SNVMix1 6 | |
42 | |
43 #define Q_PILEUP 0 | |
44 #define M_PILEUP 1 | |
45 #define S_PILEUP 2 | |
46 #define BAM_FILE 3 | |
47 | |
48 #define PHRED_MAX 200 | |
49 | |
50 #define N 0 | |
51 #define A 1 | |
52 #define G 2 | |
53 #define C 3 | |
54 #define T 4 | |
55 | |
56 char base_code[] = {'N','A','G','C','T'}; | |
57 | |
58 struct params_train { | |
59 long double *alpha; | |
60 long double *beta; | |
61 long double *delta; | |
62 char *param_file; | |
63 int max_iter; | |
64 unsigned char **bQ; | |
65 unsigned char **mQ; | |
66 signed char **calls; | |
67 char *ref; | |
68 int *pos; | |
69 int *depth; | |
70 int len; | |
71 }; | |
72 | |
73 typedef struct { | |
74 FILE *input; | |
75 FILE *output; | |
76 char *inputfile; | |
77 char *outputfile; | |
78 char *modelfile; | |
79 char *bamfile; | |
80 char *fastafile; | |
81 char *listfile; | |
82 int filter_type; | |
83 int filter_chast; | |
84 int filter_dup; | |
85 int train; | |
86 int classify; | |
87 int filter; | |
88 int full; | |
89 int input_type; // 0 = processed, 1 = maq pileup, 2 = sam pileup | |
90 long double *mu; | |
91 long double *pi; | |
92 int max_iter; | |
93 int bQ; | |
94 int mQ; | |
95 int debug; | |
96 //struct { | |
97 // int alpha[3]; | |
98 // int beta[3]; | |
99 //} train; | |
100 struct params_train trainP; | |
101 int states; | |
102 int window; | |
103 } param_struct; | |
104 | |
105 typedef int *indel_list_t; | |
106 KHASH_MAP_INIT_INT64(64, indel_list_t) | |
107 | |
108 typedef struct{ | |
109 bool in_use; | |
110 uint32_t tid; // get with snvmix_pl->in->header->target_name[tid], | |
111 uint32_t pos; | |
112 char ref; | |
113 char nref; | |
114 int ref_count; | |
115 int nref_count; | |
116 long double *p; | |
117 int maxP; | |
118 // 0 = REF , 1 = NREF | |
119 int forward[2]; | |
120 int reverse[2]; | |
121 int good_pair[2]; | |
122 int bad_pair[2]; | |
123 int c_clean[2]; | |
124 int c_ins[2]; | |
125 int c_del[2]; | |
126 int c_junc[2]; | |
127 int nref_edges; // FIXME: fixed 5 bp edges | |
128 int nref_center; // | |
129 int indel_pos; // How many reads have a deletion at that site | |
130 int indel_near; // How many reasd have a deletion overall | |
131 int aln_unique_pos; | |
132 int full_depth; // Full depth at this position | |
133 int raw_cvg[5]; | |
134 int thr_cvg[5]; | |
135 } snvmix_call_t; | |
136 | |
137 typedef struct { | |
138 param_struct *params; | |
139 int begin, end; | |
140 samfile_t *in; | |
141 faidx_t *fai; | |
142 int tid, len; | |
143 char *ref; | |
144 /* might want to move this elsewhere */ | |
145 khash_t(64) *hash; | |
146 long double *notmu, *log_pi, *b; | |
147 long double phred[PHRED_MAX + 1]; | |
148 int *calls, depth_allocated; | |
149 snvmix_call_t *buffer; | |
150 int n_buffer, b_start, b_end; | |
151 // Extra flags | |
152 } snvmix_pl_t; | |
153 | |
154 | |
155 void updatePhred(long double *phredTable); | |
156 void initPhred(long double *phredTable, int elem); | |
157 | |
158 void resetParams(param_struct *params); | |
159 void initSNVMix(int argc , char **argv, param_struct *params); | |
160 void usage(char *selfname); | |
161 | |
162 void allocateParameters(param_struct *params); | |
163 void setTrainingParameters(param_struct *params); | |
164 void setClassificationParameters(param_struct *params); | |
165 void readParamFile(param_struct *params, char type); | |
166 | |
167 | |
168 void snvmixClassify_qualities(param_struct *params); | |
169 void snvmixClassify_pileup(param_struct *params); | |
170 | |
171 int snvmixClassify_bamfile(param_struct *params); | |
172 static int snvmixClassify_pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); | |
173 | |
174 inline void snvmixGetCallString(char *col, int *calls, int depth, char *nref); | |
175 | |
176 inline int snvmixFilterCalls(int *calls, int depth, char *bQ, char *mQ, param_struct *params); | |
177 inline int snvmixSkipCall(int *calls, int qual_num, param_struct *params, char *bQ, char *mQ); | |
178 inline int snvmixSkipCall_alt(param_struct *params, int call, char bQ, char mQ); | |
179 | |
180 void snvmixTrain_qualities(param_struct *params); | |
181 void snvmixGetTrainSet_pileup(param_struct *params); | |
182 void snvmixTrain_pileup(param_struct *params); | |
183 | |
184 long double normalise(long double *values, int len); | |
185 | |
186 char **__bam_get_lines(const char *fn, int *_n); | |
187 static khash_t(64) *load_pos(const char *fn, bam_header_t *h) | |
188 { | |
189 char **list; | |
190 int i, j, n, *fields, max_fields; | |
191 khash_t(64) *hash; | |
192 bam_init_header_hash(h); | |
193 list = __bam_get_lines(fn, &n); | |
194 fprintf(stderr,"got %d lines\n", n); | |
195 hash = kh_init(64); | |
196 max_fields = 0; fields = 0; | |
197 for (i = 0; i < n; ++i) { | |
198 char *str = list[i]; | |
199 int chr, n_fields, ret; | |
200 khint_t k; | |
201 uint64_t x; | |
202 n_fields = ksplit_core(str, 0, &max_fields, &fields); | |
203 if (n_fields < 2) continue; | |
204 chr = bam_get_tid(h, str + fields[0]); | |
205 if (chr < 0) { | |
206 fprintf(stderr, "[load_pos] unknown reference sequence name: %s\n", str + fields[0]); | |
207 continue; | |
208 } | |
209 x = (uint64_t)chr << 32 | (atoi(str + fields[1]) - 1); | |
210 k = kh_put(64, hash, x, &ret); | |
211 if (ret == 0) { | |
212 fprintf(stderr, "[load_pos] position %s:%s has been loaded.\n", str+fields[0], str+fields[1]); | |
213 continue; | |
214 } | |
215 kh_val(hash, k) = 0; | |
216 if (n_fields > 2) { | |
217 // count | |
218 for (j = 2; j < n_fields; ++j) { | |
219 char *s = str + fields[j]; | |
220 if ((*s != '+' && *s != '-') || !isdigit(s[1])) break; | |
221 } | |
222 if (j > 2) { // update kh_val() | |
223 int *q, y, z; | |
224 q = kh_val(hash, k) = (int*)calloc(j - 1, sizeof(int)); | |
225 q[0] = j - 2; z = j; y = 1; | |
226 for (j = 2; j < z; ++j) | |
227 q[y++] = atoi(str + fields[j]); | |
228 } | |
229 } | |
230 free(str); | |
231 } | |
232 free(list); free(fields); | |
233 return hash; | |
234 } |