comparison SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.h @ 0:74f5ea818cea

Uploaded
author ryanmorin
date Wed, 12 Oct 2011 19:50:38 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:74f5ea818cea
1 /* The MIT License
2
3 Copyright (c) 2009, by Sohrab Shah <sshah@bccrc.ca> and Rodrigo Goya <rgoya@bcgsc.ca>
4
5 Permission is hereby granted, free of charge, to any person obtaining
6 a copy of this software and associated documentation files (the
7 "Software"), to deal in the Software without restriction, including
8 without limitation the rights to use, copy, modify, merge, publish,
9 distribute, sublicense, and/or sell copies of the Software, and to
10 permit persons to whom the Software is furnished to do so, subject to
11 the following conditions:
12
13 The above copyright notice and this permission notice shall be
14 included in all copies or substantial portions of the Software.
15
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24 */
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <math.h>
29 #include <unistd.h>
30 #include <stdbool.h>
31 #include "sam.h"
32 #include "faidx.h"
33 #include "khash.h"
34
35 #define TYPE_mb 0
36 #define TYPE_m 1
37 #define TYPE_b 2
38 #define TYPE_M 3
39 #define TYPE_Mb 4
40 #define TYPE_MB 5
41 #define TYPE_SNVMix1 6
42
43 #define Q_PILEUP 0
44 #define M_PILEUP 1
45 #define S_PILEUP 2
46 #define BAM_FILE 3
47
48 #define PHRED_MAX 200
49
50 #define N 0
51 #define A 1
52 #define G 2
53 #define C 3
54 #define T 4
55
56 char base_code[] = {'N','A','G','C','T'};
57
58 struct params_train {
59 long double *alpha;
60 long double *beta;
61 long double *delta;
62 char *param_file;
63 int max_iter;
64 unsigned char **bQ;
65 unsigned char **mQ;
66 signed char **calls;
67 char *ref;
68 int *pos;
69 int *depth;
70 int len;
71 };
72
73 typedef struct {
74 FILE *input;
75 FILE *output;
76 char *inputfile;
77 char *outputfile;
78 char *modelfile;
79 char *bamfile;
80 char *fastafile;
81 char *listfile;
82 int filter_type;
83 int filter_chast;
84 int filter_dup;
85 int train;
86 int classify;
87 int filter;
88 int full;
89 int input_type; // 0 = processed, 1 = maq pileup, 2 = sam pileup
90 long double *mu;
91 long double *pi;
92 int max_iter;
93 int bQ;
94 int mQ;
95 int debug;
96 //struct {
97 // int alpha[3];
98 // int beta[3];
99 //} train;
100 struct params_train trainP;
101 int states;
102 int window;
103 } param_struct;
104
105 typedef int *indel_list_t;
106 KHASH_MAP_INIT_INT64(64, indel_list_t)
107
108 typedef struct{
109 bool in_use;
110 uint32_t tid; // get with snvmix_pl->in->header->target_name[tid],
111 uint32_t pos;
112 char ref;
113 char nref;
114 int ref_count;
115 int nref_count;
116 long double *p;
117 int maxP;
118 // 0 = REF , 1 = NREF
119 int forward[2];
120 int reverse[2];
121 int good_pair[2];
122 int bad_pair[2];
123 int c_clean[2];
124 int c_ins[2];
125 int c_del[2];
126 int c_junc[2];
127 int nref_edges; // FIXME: fixed 5 bp edges
128 int nref_center; //
129 int indel_pos; // How many reads have a deletion at that site
130 int indel_near; // How many reasd have a deletion overall
131 int aln_unique_pos;
132 int full_depth; // Full depth at this position
133 int raw_cvg[5];
134 int thr_cvg[5];
135 } snvmix_call_t;
136
137 typedef struct {
138 param_struct *params;
139 int begin, end;
140 samfile_t *in;
141 faidx_t *fai;
142 int tid, len;
143 char *ref;
144 /* might want to move this elsewhere */
145 khash_t(64) *hash;
146 long double *notmu, *log_pi, *b;
147 long double phred[PHRED_MAX + 1];
148 int *calls, depth_allocated;
149 snvmix_call_t *buffer;
150 int n_buffer, b_start, b_end;
151 // Extra flags
152 } snvmix_pl_t;
153
154
155 void updatePhred(long double *phredTable);
156 void initPhred(long double *phredTable, int elem);
157
158 void resetParams(param_struct *params);
159 void initSNVMix(int argc , char **argv, param_struct *params);
160 void usage(char *selfname);
161
162 void allocateParameters(param_struct *params);
163 void setTrainingParameters(param_struct *params);
164 void setClassificationParameters(param_struct *params);
165 void readParamFile(param_struct *params, char type);
166
167
168 void snvmixClassify_qualities(param_struct *params);
169 void snvmixClassify_pileup(param_struct *params);
170
171 int snvmixClassify_bamfile(param_struct *params);
172 static int snvmixClassify_pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data);
173
174 inline void snvmixGetCallString(char *col, int *calls, int depth, char *nref);
175
176 inline int snvmixFilterCalls(int *calls, int depth, char *bQ, char *mQ, param_struct *params);
177 inline int snvmixSkipCall(int *calls, int qual_num, param_struct *params, char *bQ, char *mQ);
178 inline int snvmixSkipCall_alt(param_struct *params, int call, char bQ, char mQ);
179
180 void snvmixTrain_qualities(param_struct *params);
181 void snvmixGetTrainSet_pileup(param_struct *params);
182 void snvmixTrain_pileup(param_struct *params);
183
184 long double normalise(long double *values, int len);
185
186 char **__bam_get_lines(const char *fn, int *_n);
187 static khash_t(64) *load_pos(const char *fn, bam_header_t *h)
188 {
189 char **list;
190 int i, j, n, *fields, max_fields;
191 khash_t(64) *hash;
192 bam_init_header_hash(h);
193 list = __bam_get_lines(fn, &n);
194 fprintf(stderr,"got %d lines\n", n);
195 hash = kh_init(64);
196 max_fields = 0; fields = 0;
197 for (i = 0; i < n; ++i) {
198 char *str = list[i];
199 int chr, n_fields, ret;
200 khint_t k;
201 uint64_t x;
202 n_fields = ksplit_core(str, 0, &max_fields, &fields);
203 if (n_fields < 2) continue;
204 chr = bam_get_tid(h, str + fields[0]);
205 if (chr < 0) {
206 fprintf(stderr, "[load_pos] unknown reference sequence name: %s\n", str + fields[0]);
207 continue;
208 }
209 x = (uint64_t)chr << 32 | (atoi(str + fields[1]) - 1);
210 k = kh_put(64, hash, x, &ret);
211 if (ret == 0) {
212 fprintf(stderr, "[load_pos] position %s:%s has been loaded.\n", str+fields[0], str+fields[1]);
213 continue;
214 }
215 kh_val(hash, k) = 0;
216 if (n_fields > 2) {
217 // count
218 for (j = 2; j < n_fields; ++j) {
219 char *s = str + fields[j];
220 if ((*s != '+' && *s != '-') || !isdigit(s[1])) break;
221 }
222 if (j > 2) { // update kh_val()
223 int *q, y, z;
224 q = kh_val(hash, k) = (int*)calloc(j - 1, sizeof(int));
225 q[0] = j - 2; z = j; y = 1;
226 for (j = 2; j < z; ++j)
227 q[y++] = atoi(str + fields[j]);
228 }
229 }
230 free(str);
231 }
232 free(list); free(fields);
233 return hash;
234 }