annotate SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.c @ 5:a4975ec34575

Uploaded
author ryanmorin
date Mon, 17 Oct 2011 14:57:09 -0400
parents 74f5ea818cea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1 /* The MIT License
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
2
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
3 Copyright (c) 2009, by Sohrab Shah <sshah@bccrc.ca> and Rodrigo Goya <rgoya@bcgsc.ca>
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
4
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
5 Permission is hereby granted, free of charge, to any person obtaining
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
6 a copy of this software and associated documentation files (the
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
7 "Software"), to deal in the Software without restriction, including
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
8 without limitation the rights to use, copy, modify, merge, publish,
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
9 distribute, sublicense, and/or sell copies of the Software, and to
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
10 permit persons to whom the Software is furnished to do so, subject to
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
11 the following conditions:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
12
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
13 The above copyright notice and this permission notice shall be
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
14 included in all copies or substantial portions of the Software.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
15
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
23 SOFTWARE.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
24 */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
25
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
26 /*
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
27 C Implementation of SNVMix2
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
28 */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
29
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
30 #define VERSION "0.12.1-rc1"
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
31
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
32 #include <stdio.h>
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
33 #include <stdlib.h>
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
34 #include <string.h>
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
35 #include <math.h>
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
36 #include "SNVMix2.h"
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
37
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
38 #include "bam.h"
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
39
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
40 #define START_QNUM 1000
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
41
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
42 int main(int argc, char **argv) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
43
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
44 param_struct params;// = {NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, 0};
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
45
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
46 initSNVMix(argc, argv, &params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
47
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
48 if(params.classify || params.filter) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
49 if(params.input_type == Q_PILEUP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
50 snvmixClassify_qualities(&params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
51 else if(params.input_type == M_PILEUP || params.input_type == S_PILEUP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
52 snvmixClassify_pileup(&params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
53 else if(params.input_type == BAM_FILE) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
54 fprintf(stderr,"Output is:\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
55 fprintf(stderr,"\tchr:pos ref nref ref:ref_count,nref:nref_count,pAA,pAB,pBB,maxP");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
56 fprintf(stderr,"\tref_fwd ref_rev");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
57 fprintf(stderr,"\tnref_fwd nref_rev");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
58 fprintf(stderr,"\tnref_center nref_edges");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
59 fprintf(stderr,"\tindel_pos");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
60 fprintf(stderr,"\tref_clean nref_clean");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
61 fprintf(stderr,"\tref_bad_pair nref_bad_pair");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
62 fprintf(stderr,"\tref_ins nref_ins");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
63 fprintf(stderr,"\tref_del nref_del");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
64 fprintf(stderr,"\tref_junc nref_junc");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
65 fprintf(stderr,"\tnref_in_unique_pos");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
66 fprintf(stderr,"\traw[A,C,G,T,N]");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
67 fprintf(stderr,"\tthr[A,C,G,T,N]\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
68 snvmixClassify_bamfile(&params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
69 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
70 } else if(params.train) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
71 if(params.input_type == M_PILEUP || params.input_type == S_PILEUP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
72 snvmixTrain_pileup(&params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
73 else if(params.input_type == Q_PILEUP) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
74 fprintf(stderr,"Sorry, Training with quality-type input is not yet implemented\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
75 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
76 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
77 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
78 return(0);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
79 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
80
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
81 /*
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
82 void snvmixClassify_qualities
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
83 Arguments:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
84 *params : pointer to model parameters and command line options
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
85 Function:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
86 Reads a pilep-style file that has been preprocessed to include the quality of
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
87 calls being the reference allele and the maping quality both in decimal values.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
88
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
89 For each line it evaluates de model according to the parameters in *params and
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
90 outputs the corresponding SNV prediction values.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
91
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
92 This function may come in handy when filtering of calls is done by a
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
93 different program or the base-calling and maping quality information is not
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
94 in a pileup/phred format.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
95
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
96 The file read is a tab-separated file where the columns are:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
97 - target sequence (e.g. chromosome)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
98 - sequence position
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
99 - reference base
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
100 - non-reference base
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
101 - comma separated probabilities of the call being the reference
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
102 - comma separated mapping qualities, one for each base call
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
103
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
104 */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
105 void snvmixClassify_qualities(param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
106 FILE *fptrIN, *fptrOUT;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
107
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
108 char *line = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
109 size_t line_len = 0, prev_len = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
110 int read_len = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
111
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
112 int qual_num = 0, col_num = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
113
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
114 char *col, *qual;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
115 char *col_sep = "\t", *col_str, *col_ptr;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
116 char *qual_sep = ",", *qual_str, *qual_ptr;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
117 char *chr, *pos, *ref, *nref;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
118
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
119 long double *bQ, *mQ, *tmpQ;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
120 int pos_int, depth = 0, maxp;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
121 long int depth_allocated = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
122
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
123 long double bsum = 0, msum = 0, *b, *mu, *notmu, *pi, *log_pi, z = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
124 int i, k, states;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
125
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
126 states = params->states;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
127
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
128 // Initialize local values of parameters
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
129 mu = params->mu;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
130 pi = params->pi;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
131
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
132 if( ( b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_qualities] Could not allocate b\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
133 if( ( notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_qualities] Could not allocate notmu\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
134 if( ( log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_qualities] Could not allocate log_pi\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
135
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
136 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
137 notmu[k] = 1 - mu[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
138 log_pi[k] = logl(pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
139 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
140 fptrIN = params->input;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
141 fptrOUT = params->output;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
142
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
143
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
144 if( (bQ = malloc(START_QNUM * sizeof (long double))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
145 perror("ERROR allocating initial space for bQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
146 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
147 if( (mQ = malloc(START_QNUM * sizeof (long double))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
148 perror("ERROR allocating initial space for mQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
149 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
150 depth_allocated = START_QNUM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
151 #if defined __linux__ || defined _GNU_SOURCE
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
152 while( (read_len = getline(&line,&line_len,fptrIN)) != -1 )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
153 #endif
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
154 #ifdef __APPLE__
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
155 while( (line = fgetln(fptrIN,&line_len)) != NULL )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
156 #endif
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
157 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
158 line[line_len-1] = '\0';
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
159 depth = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
160 col_ptr = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
161 for(col_num = 0, col_str = line; ; col_num++, col_str = NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
162 col = strtok_r(col_str, "\t", &col_ptr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
163 if(col == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
164 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
165 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
166 if(col_num == 0) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
167 chr = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
168 } else if(col_num == 1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
169 pos = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
170 pos_int = strtol(pos, NULL, 10);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
171 } else if(col_num == 2) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
172 ref = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
173 } else if(col_num == 3) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
174 nref = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
175 } else if(col_num == 4) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
176 for(qual_num = 0, qual_str = col; ; qual_num++, qual_str = NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
177 qual = strtok_r(qual_str, ",", &qual_ptr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
178 if(qual == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
179 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
180 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
181 if(qual_num >= depth_allocated) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
182 if( (bQ = realloc(bQ, sizeof(long double) * (depth_allocated + START_QNUM))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
183 perror("ERROR allocating bQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
184 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
185 if( (mQ = realloc(mQ, sizeof(long double) * (depth_allocated + START_QNUM))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
186 perror("ERROR allocating mQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
187 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
188 depth_allocated = depth_allocated + START_QNUM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
189 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
190 bQ[qual_num] = atof(qual);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
191 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
192 depth = qual_num;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
193 } else if(col_num == 5) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
194 for(qual_num = 0, qual_str = col; ; qual_num++, qual_str = NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
195 qual = strtok_r(qual_str, ",", &qual_ptr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
196 if(qual == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
197 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
198 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
199 if(qual_num >= depth_allocated) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
200 fprintf(stderr, "FATAL ERROR: should not have more mapping qualities than base qualities\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
201 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
202 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
203 mQ[qual_num] = atof(qual);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
204 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
205 if(depth != qual_num) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
206 fprintf(stderr, "FATAL ERROR: should not have more base qualities than mapping qualities\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
207 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
208 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
209 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
210 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
211
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
212
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
213 for(k = 0; k < states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
214 b[k] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
215
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
216 //b = sum(log(notm*0.5 + m.*(yr.*mur+notyr.*notmur)),2);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
217 for(qual_num = 0; qual_num < depth; qual_num++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
218 for(k = 0; k < states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
219 b[k] = b[k] + logl((1- mQ[qual_num])*0.5 + mQ[qual_num] * (bQ[qual_num]*mu[k]+(1-bQ[qual_num])*notmu[k]));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
220
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
221 z = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
222 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
223 b[k] = expl(b[k] + log_pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
224 z += b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
225 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
226 if(!z) { z = 1; }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
227 for(k = 0; k < states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
228 b[k] = b[k] / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
229 maxp = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
230 z = b[0];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
231 for(k = 0; k < states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
232 if( b[k] > z) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
233 maxp = k;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
234 z = b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
235 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
236
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
237 fprintf(fptrOUT,"%s\t%s\t%s\t%s",chr, pos, ref, nref);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
238 for(k = 0; k , states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
239 fprintf(fptrOUT,"%0.10Lf,",b[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
240 fprintf(fptrOUT,"%d\n",maxp+1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
241 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
242 fclose(fptrIN);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
243 fclose(fptrOUT);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
244 free(line);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
245
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
246 free(b); free(notmu); free(log_pi);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
247 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
248
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
249
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
250 /*
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
251 void snvmixClassify_pileup
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
252 Arguments:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
253 *params : pointer to model parameters and command line options
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
254 Function:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
255 Reads a MAQ or Samtools pileup format. For required formatting we refer the user
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
256 to the corresponding websites:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
257 http://maq.sourceforge.net/
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
258 http://samtools.sourceforge.net/
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
259
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
260 In general the format of both files can be described as a tab separated file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
261 where columns are:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
262 - target sequence (e.g. chromosome)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
263 - sequence position
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
264 - reference base
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
265 - depth
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
266 - stream of base calls
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
267 - stream of base call PHRED-scaled qualities
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
268 - stream of mapping PHRED-scaled qualities
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
269
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
270 Note: when handling pileup files, care should be taken when doing copy-paste
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
271 operations not to transform 'tabs' into streams of spaces.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
272
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
273 For each line it evaluates de model according to the parameters in *params and
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
274 outputs the corresponding SNV prediction values.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
275
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
276 Predictions can be output either only for positions where when non-reference values
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
277 are observed or, when run with '-f' flag, for all positions. The -f flag is useful
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
278 when the resulting calls are being compared or joined accros different datasets
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
279 and all predictions need to be present.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
280 */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
281 void snvmixClassify_pileup(param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
282 //MAQ:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
283 // 1 554484 C 1752 @,,.,.,, @xxx @xxx xxxxx
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
284 //SAM:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
285 // 1 554484 C 1752 ,,.,.,, xxx xxxx
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
286 FILE *fptrIN, *fptrOUT;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
287
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
288 char *line = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
289 size_t line_len = 0, prev_len = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
290 int read_len = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
291 int col_num = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
292 long int line_num = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
293
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
294 char *col, *qual;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
295 char *col_sep = "\t", *col_str, *col_ptr;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
296 char *qual_sep = ",", *qual_str, *qual_ptr;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
297 char *chr, *pos, *ref, nref, call, nref_skip = 'N';
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
298
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
299 char *bQ, *mQ;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
300 int *calls, *tmpQ;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
301 int pos_int, depth = 0, qual_num, maxp;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
302 int depth_allocated = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
303
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
304 long double bsum = 0, msum = 0, *b, *mu, *notmu, *pi, *log_pi, z = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
305 int nref_count = 0, ref_count = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
306 long double Y, not_Y;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
307 long double phred[PHRED_MAX + 1];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
308 int i, call_i, k, states;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
309
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
310 //initPhred(phred, PHRED_MAX+1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
311 initPhred(phred, PHRED_MAX);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
312
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
313 states = params->states;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
314
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
315 // Initialize local values of parameters
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
316 mu = params->mu;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
317 pi = params->pi;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
318
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
319 if( ( b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate b\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
320 if( ( notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate notmu\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
321 if( ( log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate log_pi\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
322
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
323 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
324 notmu[k] = 1 - mu[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
325 log_pi[k] = logl(pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
326 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
327 fptrIN = params->input;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
328 fptrOUT = params->output;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
329 if(params->full)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
330 nref_skip = -2;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
331
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
332
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
333 if( (calls = malloc(START_QNUM * sizeof (int))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
334 perror("ERROR allocating initial space for calls"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
335 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
336 depth_allocated = START_QNUM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
337 #if defined __linux__ || defined _GNU_SOURCE
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
338 while( (read_len = getline(&line,&line_len,fptrIN)) != -1 )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
339 #endif
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
340 #ifdef __APPLE__
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
341 while( (line = fgetln(fptrIN,&line_len)) != NULL )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
342 #endif
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
343 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
344 line[line_len-1] = '\0';
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
345 line_num++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
346 depth = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
347 nref = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
348 col_ptr = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
349 for(col_num = 0, col_str = line; nref != nref_skip ; col_num++, col_str = NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
350 col = strtok_r(col_str, "\t", &col_ptr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
351 if(col == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
352 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
353 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
354 if(col_num == 0) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
355 chr = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
356 } else if(col_num == 1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
357 pos = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
358 } else if(col_num == 2) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
359 ref = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
360 } else if(col_num == 3) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
361 depth = atoi(col);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
362 if(depth > depth_allocated) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
363 if( (calls = realloc(calls, sizeof (int) * (depth + START_QNUM))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
364 perror("ERROR allocating space for calls"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
365 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
366 depth_allocated = depth + START_QNUM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
367 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
368 } else if(col_num == 4) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
369 if(params->input_type == M_PILEUP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
370 col = col+sizeof(char);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
371 snvmixGetCallString(col, calls, depth, &nref);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
372 } else if(col_num == 5) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
373 bQ = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
374 // If it's a MAQ pileup, we need to skip the @ sign
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
375 if(params->input_type == M_PILEUP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
376 bQ = bQ + sizeof(char);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
377 for(call_i = 0; call_i < depth; call_i++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
378 bQ[call_i] = bQ[call_i]-33;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
379 } else if(col_num == 6) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
380 mQ = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
381 // If it's a MAQ pileup, we need to skip the @ sign
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
382 if(params->input_type == M_PILEUP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
383 mQ = mQ + sizeof(char);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
384 for(call_i = 0; call_i < depth; call_i++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
385 mQ[call_i] = mQ[call_i]-33;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
386 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
387 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
388 // If we quit the for because no nref was found, skip this too, next line
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
389 nref = snvmixFilterCalls(calls,depth,bQ,mQ,params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
390 nref_count = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
391 ref_count = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
392 for(qual_num = 0; qual_num < depth; qual_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
393 //if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
394 if(calls[qual_num] == -1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
395 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
396 if(calls[qual_num] == 1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
397 ref_count++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
398 if(calls[qual_num] == nref)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
399 nref_count++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
400 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
401 if(params->filter) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
402 fprintf(fptrOUT,"%s:%s\t%s:%d\t%c:%d\t", chr, pos, ref, ref_count, nref, nref_count);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
403 for(qual_num = 0; qual_num < ref_count; qual_num++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
404 fprintf(fptrOUT,",");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
405 for(qual_num = 0; qual_num < nref_count; qual_num++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
406 fprintf(fptrOUT,"%c",nref);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
407 fprintf(fptrOUT,"\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
408 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
409 if(nref == nref_skip)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
410 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
411 //nref = snvmixFilterCalls(calls,depth,bQ,mQ,params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
412 //if(nref == nref_skip)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
413 // continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
414 for(k = 0; k < states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
415 b[k] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
416 nref_count = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
417 for(qual_num = 0; qual_num < depth; qual_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
418 //if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
419 if(calls[qual_num] == -1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
420 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
421 // from matlab:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
422 // b = sum(log(notm*0.5 + m.*(yr.*mur+notyr.*notmur)),2);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
423 if(calls[qual_num] == 1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
424 // If REF then
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
425 Y = phred[(unsigned char) bQ[qual_num]];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
426 not_Y = 1-phred[(unsigned char) bQ[qual_num]];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
427 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
428 nref_count++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
429 // If NREF then
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
430 Y = (1-phred[(unsigned char) bQ[qual_num]])/3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
431 not_Y = phred[(unsigned char) bQ[qual_num]] + 2*(1-phred[(unsigned char)bQ[qual_num]])/3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
432 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
433 for(k = 0; k < states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
434 b[k] = b[k] + logl((1-phred[(unsigned char) mQ[qual_num]])*0.5 + phred[(unsigned char) mQ[qual_num]] * (Y * mu[k] + not_Y * notmu[k]));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
435 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
436 // Check if any non-references actually passed the filter
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
437 //if(!nref_count)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
438 // continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
439 z = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
440 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
441 b[k] = expl(b[k] + log_pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
442 z += b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
443 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
444 if(!z) z = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
445 for(k = 0; k < states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
446 b[k] = b[k] / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
447 maxp = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
448 z = b[0];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
449 for(k = 0; k < states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
450 if( b[k] > z) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
451 maxp = k;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
452 z = b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
453 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
454
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
455
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
456 fprintf(fptrOUT,"%s:%s\t%s\t%c\t%s:%d,%c:%d,",chr,pos, ref, nref, ref, ref_count, nref, nref_count);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
457 for(k = 0; k < states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
458 fprintf(fptrOUT,"%0.10Lf,",b[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
459 fprintf(fptrOUT,"%d\n",maxp+1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
460 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
461 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
462 fclose(fptrIN);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
463 fclose(fptrOUT);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
464 free(line);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
465
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
466 free(b); free(notmu); free(log_pi);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
467 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
468
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
469 int snvmixClassify_bamfile(param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
470 snvmix_pl_t snvmix_pl;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
471 int states, i, k, bam_flag_mask;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
472
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
473 snvmix_pl.params = params;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
474 snvmix_pl.tid = -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
475 // TODO: change this to read range
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
476 snvmix_pl.begin = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
477 snvmix_pl.end = 0x7fffffff;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
478 snvmix_pl.in = samopen(params->inputfile, "rb", 0);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
479 if (snvmix_pl.in == 0) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
480 fprintf(stderr, "ERROR: Fail to open BAM file %s\n", params->bamfile);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
481 return 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
482 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
483 snvmix_pl.fai = fai_load(params->fastafile);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
484 if (snvmix_pl.fai == 0) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
485 fprintf(stderr, "Fail to open BAM file %s\n", params->fastafile);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
486 return 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
487 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
488 if(params->listfile) snvmix_pl.hash = load_pos(params->listfile, snvmix_pl.in->header);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
489 snvmix_pl.depth_allocated = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
490 snvmix_pl.ref = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
491 snvmix_pl.calls = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
492 //snvmix_pl.bQ = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
493 //snvmix_pl.mQ = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
494
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
495
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
496 /* This looks ugly... but we don't want to be allocating new memory for EVERY location. */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
497 states = params->states;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
498 if( ( snvmix_pl.notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate snvmix_pl.notmu\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
499 if( ( snvmix_pl.log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate snvmix_pl.log_pi\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
500 if( ( snvmix_pl.b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate snvmix_pl.b\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
501 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
502 snvmix_pl.notmu[k] = 1 - params->mu[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
503 snvmix_pl.log_pi[k] = logl(params->pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
504 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
505 initPhred(snvmix_pl.phred, PHRED_MAX);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
506
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
507 /* Init call buffer */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
508 snvmix_pl.n_buffer = params->window;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
509 if( (snvmix_pl.buffer = malloc(snvmix_pl.n_buffer * sizeof(snvmix_call_t)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate space for buffer\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
510 snvmix_pl.b_start = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
511 snvmix_pl.b_end = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
512 for(i = 0; i < snvmix_pl.n_buffer; i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
513 snvmix_pl.buffer[i].tid = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
514 snvmix_pl.buffer[i].pos = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
515 snvmix_pl.buffer[i].in_use = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
516 if( (snvmix_pl.buffer[i].p = malloc(states * sizeof(long double)) ) == NULL) {perror("[snvmixClassify_bamfile] Could not allocate space for buffer.p\n"); exit(1);}
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
517 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
518
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
519 bam_flag_mask = (BAM_FUNMAP | BAM_FSECONDARY); // | BAM_FQCFAIL | BAM_FDUP);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
520 if(params->filter_chast)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
521 bam_flag_mask |= BAM_FQCFAIL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
522 if(params->filter_dup)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
523 bam_flag_mask |= BAM_FDUP;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
524
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
525 /* Call pileup with our function and bam_flag_mask */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
526 sampileup(snvmix_pl.in, bam_flag_mask, snvmixClassify_pileup_func, &snvmix_pl);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
527
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
528 free(snvmix_pl.notmu);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
529 free(snvmix_pl.log_pi);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
530 free(snvmix_pl.b);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
531 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
532
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
533 int flush_buffer(uint32_t tid, uint32_t pos, snvmix_pl_t *snvmix_pl) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
534 snvmix_call_t *s;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
535 int i, k, cnt, buff_id;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
536 cnt = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
537 #define _fptrOUT snvmix_pl->params->output
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
538 //#define _fptrOUT stderr
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
539 //fprintf(_fptrOUT, "ENTERING flush_buffer, b_start = %d\tb_end = %d\n",snvmix_pl->b_start, snvmix_pl->b_end);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
540 for(i = 0, buff_id = snvmix_pl->b_start; i < snvmix_pl->n_buffer; i++, buff_id=(buff_id+1)%snvmix_pl->params->window) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
541 //fprintf(stderr, "ITERATING in flush_buffer, buff_id = %d\n", buff_id);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
542 s = snvmix_pl->buffer + buff_id;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
543 if(pos+1 >= (s->pos + snvmix_pl->params->window) || tid != s->tid) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
544 //fprintf(stderr, "PASSED if in flush_buffer, buff_id = %d\n", buff_id);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
545 if(s->in_use) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
546 //fprintf(_fptrOUT,"%d\t",buff_id);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
547 fprintf(_fptrOUT,"%s:%d\t%c\t%c\t%c:%d,%c:%d", snvmix_pl->in->header->target_name[s->tid], s->pos, s->ref, s->nref, s->ref, s->ref_count, s->nref, s->nref_count);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
548
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
549 if(!snvmix_pl->params->filter) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
550 fprintf(_fptrOUT,",");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
551 for(k = 0; k < snvmix_pl->params->states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
552 fprintf(_fptrOUT,"%0.10Lf,",s->p[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
553 fprintf(_fptrOUT,"%d",s->maxP);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
554 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
555
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
556 fprintf(_fptrOUT,"\t%d %d", s->forward[0], s->reverse[0]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
557 fprintf(_fptrOUT,"\t%d %d", s->forward[1], s->reverse[1]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
558 fprintf(_fptrOUT,"\t%d %d", s->nref_center, s->nref_edges);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
559 fprintf(_fptrOUT,"\t%d", s->indel_pos);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
560 fprintf(_fptrOUT,"\t%d %d", s->c_clean[0], s->c_clean[1]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
561 fprintf(_fptrOUT,"\t%d %d", s->bad_pair[0], s->bad_pair[1]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
562 fprintf(_fptrOUT,"\t%d %d", s->c_ins[0], s->c_ins[1]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
563 fprintf(_fptrOUT,"\t%d %d", s->c_del[0], s->c_del[1]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
564 fprintf(_fptrOUT,"\t%d %d", s->c_junc[0], s->c_junc[1]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
565 fprintf(_fptrOUT,"\t%d", s->aln_unique_pos);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
566 fprintf(_fptrOUT,"\t[%d,%d,%d,%d,%d]", s->raw_cvg[0],s->raw_cvg[1],s->raw_cvg[2],s->raw_cvg[3],s->raw_cvg[4]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
567 fprintf(_fptrOUT,"\t[%d,%d,%d,%d,%d]", s->thr_cvg[0],s->thr_cvg[1],s->thr_cvg[2],s->thr_cvg[3],s->thr_cvg[4]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
568 if(snvmix_pl->params->filter) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
569 fprintf(_fptrOUT,"\t");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
570 for(k = 0; k < s->ref_count; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
571 fprintf(_fptrOUT,",");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
572 for(k = 0; k < s->nref_count; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
573 fprintf(_fptrOUT,"%c",s->nref);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
574 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
575 fprintf(_fptrOUT,"\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
576 cnt++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
577 s->in_use = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
578 snvmix_pl->b_start = (buff_id+1)%snvmix_pl->params->window;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
579 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
580 } else break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
581 if(buff_id == snvmix_pl->b_end)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
582 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
583 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
584 //fprintf(_fptrOUT, "EXITING flush_buffer, b_start = %d\tb_end = %d\n",snvmix_pl->b_start, snvmix_pl->b_end);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
585 return cnt;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
586 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
587
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
588
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
589 static int snvmixClassify_pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
590
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
591 #define _bQ(p, x) bam1_qual(p[x].b)[p[x].qpos]
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
592 #define _mQ(p, x) p[x].b->core.qual
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
593
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
594 //FILE *fptrOUT;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
595 char nref_skip = 'N';
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
596 //int *calls, ;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
597 int depth = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
598 //long double *b, *notmu, *log_pi, *mu, *pi;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
599 long double Y, not_Y, z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
600 //int states;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
601 int i, k, qual_num, maxp;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
602 //long double *phred;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
603 int b_end_tmp = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
604 snvmix_call_t *s;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
605 snvmix_pl_t *snvmix_pl = (snvmix_pl_t *)data;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
606 param_struct *params = snvmix_pl->params;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
607 // Avoid allocating new variables, use defines to make things readable
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
608 #define _calls snvmix_pl->calls
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
609 #define _b snvmix_pl->b
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
610 #define _notmu snvmix_pl->notmu
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
611 #define _log_pi snvmix_pl->log_pi
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
612 #define _mu params->mu
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
613 #define _pi params->pi
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
614 #define _phred snvmix_pl->phred
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
615 #define _states params->states
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
616 //fprintf(stderr,"DEBUG: snvmix_pl->buffer[snvmix_pl->b_start].in_use = %d\n", snvmix_pl->buffer[snvmix_pl->b_start].in_use);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
617 //if(pos+1 == 148971)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
618 // printf("DEBUG: test 148971, start.in_use = %d\tb_start=%d\tb_end=%d\n",snvmix_pl->buffer[snvmix_pl->b_start].in_use, snvmix_pl->b_start,snvmix_pl->b_end);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
619 //if(pos == 148971)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
620 // printf("DEBUG: test 148972, start.in_use = %d\tb_start=%d\tb_end=%d (pos+1 >= (snvmix_pl->buffer[snvmix_pl->b_start].pos + snvmix_pl->params->window) = %d >= %d\n", snvmix_pl->buffer[snvmix_pl->b_start].in_use, snvmix_pl->b_start,snvmix_pl->b_end, pos+1, snvmix_pl->buffer[snvmix_pl->b_start].pos + snvmix_pl->params->window);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
621 // If we have gone further than window indicates, or changed tid, then flush buffer
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
622 if(snvmix_pl->buffer[snvmix_pl->b_start].in_use && (pos+1 >= (snvmix_pl->buffer[snvmix_pl->b_start].pos + snvmix_pl->params->window) || tid != snvmix_pl->buffer[snvmix_pl->b_start].tid ))
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
623 flush_buffer(tid, pos, snvmix_pl);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
624 b_end_tmp = snvmix_pl->b_end;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
625 s = snvmix_pl->buffer + snvmix_pl->b_end;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
626 snvmix_pl->b_end = (snvmix_pl->b_end+1)%snvmix_pl->params->window;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
627
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
628 if(snvmix_pl->hash) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
629 khint_t k_h = kh_get(64, snvmix_pl->hash, (uint64_t)tid<<32|pos);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
630 if (k_h == kh_end(snvmix_pl->hash)) return 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
631 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
632
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
633 s->in_use = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
634
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
635
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
636 if(params->full)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
637 nref_skip = -2;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
638
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
639 if (snvmix_pl->fai && (int)tid != snvmix_pl->tid) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
640 free(snvmix_pl->ref);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
641 snvmix_pl->ref = fai_fetch(snvmix_pl->fai, snvmix_pl->in->header->target_name[tid], &snvmix_pl->len);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
642 snvmix_pl->tid = tid;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
643 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
644
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
645 //chr = snvmix_pl->in->header->target_name[tid],
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
646 s->tid = tid;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
647 s->ref = toupper((snvmix_pl->ref && (int)pos < snvmix_pl->len)? snvmix_pl->ref[pos] : 'N');
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
648 s->pos = pos + 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
649 depth = n;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
650
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
651 if(depth > snvmix_pl->depth_allocated) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
652 if( (snvmix_pl->calls = realloc(snvmix_pl->calls, sizeof (int) * (depth + START_QNUM))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
653 perror("ERROR allocating space for snvmix_pl->calls"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
654 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
655 //calls = snvmix_pl->calls;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
656 snvmix_pl->depth_allocated = depth + START_QNUM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
657 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
658
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
659 //for(i = 0; i < depth; i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
660 // FIXME: watch out for deletions, do they get automatically an "n"? if so, ok
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
661 //fprintf(stderr, "DEBUG: len = %d\tref = %c\tcalls = ", snvmix_pl->len, ref);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
662 s->raw_cvg[0] = s->raw_cvg[1] = s->raw_cvg[2] = s->raw_cvg[3] = s->raw_cvg[4] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
663 s->thr_cvg[0] = s->thr_cvg[1] = s->thr_cvg[2] = s->thr_cvg[3] = s->thr_cvg[4] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
664 for(i = depth; i--; ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
665 if(pl[i].is_del) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
666 _calls[i] = -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
667 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
668 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
669 _calls[i] = "nACnGnnnTnnnnnnn"[bam1_seqi(bam1_seq(pl[i].b), pl[i].qpos)];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
670 //fprintf(stderr, "%c", _calls[i]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
671 switch(_calls[i]) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
672 case 'A': s->raw_cvg[0]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
673 case 'C': s->raw_cvg[1]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
674 case 'G': s->raw_cvg[2]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
675 case 'T': s->raw_cvg[3]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
676 case 'n': s->raw_cvg[4]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
677 default: break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
678 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
679 // Mask calls according to quality
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
680 if( snvmixSkipCall_alt(params, _calls[i], (char) bam1_qual(pl[i].b)[pl[i].qpos], (char) pl[i].b->core.qual) )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
681 _calls[i] = -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
682 else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
683 switch(_calls[i]) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
684 case 'A': s->thr_cvg[0]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
685 case 'C': s->thr_cvg[1]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
686 case 'G': s->thr_cvg[2]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
687 case 'T': s->thr_cvg[3]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
688 case 'n': s->thr_cvg[4]++; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
689 default: break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
690 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
691 if(_calls[i] == s->ref)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
692 _calls[i] = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
693 else if(_calls[i] == 'n')
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
694 _calls[i] = -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
695 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
696 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
697
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
698 s->nref = snvmixFilterCalls(_calls,depth,NULL,NULL,params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
699 s->nref_count = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
700 s->ref_count = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
701
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
702 // FIXME: review this section, extra flags
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
703 for(i = 0; i < 2; i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
704 s->forward[i] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
705 s->reverse[i] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
706 s->good_pair[i] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
707 s->bad_pair[i] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
708 s->c_clean[i] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
709 s->c_ins[i] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
710 s->c_del[i] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
711 s->c_junc[i] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
712 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
713 s->nref_edges = 0; // FIXME: fixed 5 bp edges
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
714 s->nref_center = 0; // FIXME: fixed 5 bp edges
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
715 s->indel_pos = 0; // How many reads have a deletion at that site
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
716 s->indel_near = 0; // How many reasd have a deletion overall
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
717 s->aln_unique_pos = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
718 uint8_t cigar_ops; // 0x1 INS, 0x2 DEL , 0x4 JUNCTION
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
719 //for(qual_num = 0; qual_num < depth; qual_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
720 uint32_t debug_sort = 0x7fffffff;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
721 if(s->nref != nref_skip) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
722 k = -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
723 for(qual_num = depth; qual_num--; ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
724 if(_calls[qual_num] == -1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
725 if(pl[qual_num].is_del)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
726 s->indel_pos++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
727 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
728 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
729 cigar_ops = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
730 for(i=0; i<pl[qual_num].b->core.n_cigar; i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
731 int op = bam1_cigar(pl[qual_num].b)[i] & BAM_CIGAR_MASK;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
732 if(op == BAM_CINS)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
733 cigar_ops |= 0x1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
734 if(op == BAM_CDEL)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
735 cigar_ops |= 0x2;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
736 if(op == BAM_CREF_SKIP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
737 cigar_ops |= 0x4;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
738 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
739 if(pl[qual_num].indel) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
740 s->indel_pos++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
741 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
742 if(_calls[qual_num] == 1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
743 s->ref_count++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
744 k = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
745 } else if(_calls[qual_num] == s->nref) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
746 s->nref_count++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
747 k = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
748 if(pl[qual_num].qpos > (pl[qual_num].b->core.l_qseq - 5) || pl[qual_num].qpos < 5)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
749 s->nref_edges++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
750 else
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
751 s->nref_center++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
752 if(pl[qual_num].b->core.pos < debug_sort) { s->aln_unique_pos++; debug_sort = pl[qual_num].b->core.pos; }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
753 else if(pl[qual_num].b->core.pos > debug_sort) {fprintf(stderr, "ERROR checking sort in read-position, prev = %Ld, new = %Ld\n", debug_sort, pl[qual_num].b->core.pos); exit(1);}
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
754 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
755
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
756 if(k != -1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
757 if(cigar_ops) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
758 if(cigar_ops & 0x1) s->c_ins[k]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
759 if(cigar_ops & 0x2) s->c_del[k]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
760 if(cigar_ops & 0x4) s->c_junc[k]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
761 } else s->c_clean[k]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
762
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
763 if(pl[qual_num].b->core.tid != pl[qual_num].b->core.mtid) s->bad_pair[k]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
764
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
765 if(pl[qual_num].b->core.flag & 0x10) s->reverse[k]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
766 else s->forward[k]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
767 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
768 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
769 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
770 if(!params->filter) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
771 if(s->nref == nref_skip) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
772 snvmix_pl->b_end = b_end_tmp;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
773 s->in_use = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
774 return 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
775 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
776 int block = (_states / 3) * 3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
777 //for(k = 0; k < _states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
778 //for(k = _states; k--; )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
779 // _b[k] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
780 k = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
781 while( k < block ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
782 _b[k] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
783 _b[k+1] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
784 _b[k+2] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
785 k += 3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
786 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
787 if( k < _states) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
788 switch( _states - k ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
789 case 2: _b[k] = 0; k++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
790 case 1: _b[k] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
791 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
792 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
793 //for(qual_num = 0; qual_num < depth; qual_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
794 for(qual_num = depth; qual_num--; ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
795 //if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
796 if(_calls[qual_num] == -1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
797 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
798 // from matlab:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
799 // b = sum(log(notm*0.5 + m.*(yr.*mur+notyr.*notmur)),2);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
800 if(_calls[qual_num] == 1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
801 // If REF then
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
802 Y = _phred[(unsigned char) _bQ(pl, qual_num)];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
803 not_Y = 1-_phred[(unsigned char) _bQ(pl, qual_num)];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
804 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
805 // If NREF then
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
806 Y = (1-_phred[(unsigned char) _bQ(pl, qual_num)])/3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
807 not_Y = _phred[(unsigned char) _bQ(pl, qual_num)] + 2*(1-_phred[(unsigned char)_bQ(pl, qual_num)])/3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
808 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
809 //for(k = 0; k < _states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
810 //for(k = _states; k--; )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
811 // _b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k]));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
812 k = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
813 while( k < block ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
814 _b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k]));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
815 _b[k+1] = _b[k+1] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k+1] + not_Y * _notmu[k+1]));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
816 _b[k+2] = _b[k+2] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k+2] + not_Y * _notmu[k+2]));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
817 k += 3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
818 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
819 if( k < _states) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
820 switch( _states - k ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
821 case 2: _b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k])); k++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
822 case 1: _b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k]));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
823 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
824 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
825 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
826 z = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
827 //for(k = 0; k < _states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
828 //for(k = _states; k--; ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
829 // _b[k] = expl(_b[k] + _log_pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
830 // z += _b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
831 //}
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
832 k = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
833 while( k < block ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
834 _b[k] = expl(_b[k] + _log_pi[k]); z += _b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
835 _b[k+1] = expl(_b[k+1] + _log_pi[k+1]); z += _b[k+1];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
836 _b[k+2] = expl(_b[k+2] + _log_pi[k+2]); z += _b[k+2];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
837 k += 3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
838 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
839 if( k < _states ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
840 switch( _states - k ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
841 case 2: _b[k] = expl(_b[k] + _log_pi[k]); z += _b[k]; k++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
842 case 1: _b[k] = expl(_b[k] + _log_pi[k]); z += _b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
843 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
844 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
845 if(!z) z = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
846 //for(k = 0; k < _states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
847 //for(k = _states; k--; )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
848 // _b[k] = _b[k] / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
849 k = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
850 while( k < block ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
851 _b[k] = _b[k] / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
852 _b[k+1] = _b[k+1] / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
853 _b[k+2] = _b[k+2] / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
854 k += 3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
855 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
856 if( k < _states ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
857 switch( _states - k ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
858 case 2: _b[k] = _b[k] / z; k++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
859 case 1: _b[k] = _b[k] / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
860 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
861 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
862 maxp = _states - 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
863 z = _b[maxp];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
864 //for(k = 0; k < _states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
865 for(k = _states; k--; )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
866 if( _b[k] > z) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
867 maxp = k;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
868 z = _b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
869 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
870
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
871
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
872 for(k = 0; k < _states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
873 s->p[k] = _b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
874 s->maxP = maxp+1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
875 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
876 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
877
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
878 /*
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
879 void snvmixGetCallString
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
880 Arguments:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
881 *col : pointer to the current file-line in memory
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
882 *calls : pointer to array where we will store the final base-calls
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
883 depth : number of base reads for this position
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
884 *nref : the observed NREF value will be placed here (-1 if none was found)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
885
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
886 Function:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
887 This will parse column 5 of the pileup file, which contains the
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
888 base calls and will fill the array "calls" with:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
889 1 : when a reference call was made
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
890 -1 : when an invalid value is seen ('N', 'n', '*')
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
891 [ACTG] : when a non-reference call was made
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
892
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
893
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
894
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
895 */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
896 inline void snvmixGetCallString(char *col, int *calls, int depth, char *nref) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
897 int i;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
898 int call_i = 0, call_skip_num = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
899 char call_skip_char[10];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
900 for(i = 0 ; call_i < depth; i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
901 switch(col[i]){
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
902 case ',':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
903 case '.':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
904 calls[call_i] = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
905 call_i++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
906 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
907 case 'a': case 'A':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
908 case 't': case 'T':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
909 case 'g': case 'G':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
910 case 'c': case 'C':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
911 //calls[call_i] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
912 calls[call_i] = toupper(col[i]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
913 call_i++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
914 //if(*nref == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
915 //*nref = toupper(*(col+sizeof(char)*i));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
916 if(*nref == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
917 *nref = toupper(col[i]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
918 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
919 case 'N':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
920 case 'n':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
921 case '*':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
922 // Not comparable values, we ignore them, but need to
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
923 // keep count to compare against the number of mapping qualities
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
924 calls[call_i] = -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
925 call_i++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
926 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
927 case '$':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
928 // End of a read, just ignore it
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
929 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
930 case '^':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
931 // Start of read, ignore it and skip the next value (not base-related)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
932 i++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
933 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
934 case '+':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
935 case '-':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
936 // Eliminate:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
937 // +2AA
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
938 // -3AAA
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
939 // Start skiping the sign
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
940 i++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
941 for(call_skip_num = 0; col[i] <= '9' && col[i] >= '0'; call_skip_num++, i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
942 //call_skip_char[call_skip_num] = call;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
943 call_skip_char[call_skip_num] = col[i];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
944 //i++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
945 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
946 // we have the number string in call_skip_char, lets terminate it
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
947 call_skip_char[call_skip_num] = '\0';
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
948 // i has been updated to first letter, just need to jump the rest of them
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
949 i = i+atoi(call_skip_char)-1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
950 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
951 default:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
952 fprintf(stderr,"ERROR: problem reading pileup file calls (%c)\n",col[i]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
953 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
954 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
955 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
956 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
957 // If no nref was found, don't bother parsing the other columns, make the for fail with -1
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
958 if(!*nref)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
959 *nref = -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
960 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
961
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
962 /*
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
963 int snvmixFilterCalls
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
964 Arguments:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
965 *calls : array built by snvmixGetCallString containing
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
966 1 : when a reference call was made
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
967 -1 : when an invalid value is seen ('N', 'n', '*')
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
968 [ACTG] : when a non-reference call was made
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
969 depth : number of calls for the current position
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
970 *bQ : base qualities observed for each call
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
971 *mQ : map quality for each call
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
972 *params : parameter structure, get thresholding data
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
973 Function:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
974 For each valid call read in the position this function will apply
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
975 thresholding according to the type selected (-t flag) and the bQ (-q)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
976 and mQ (-Q) thresholds provided.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
977
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
978 Any base-call that does not pass thresholds will be switched from it's
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
979 current value in *calls to -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
980
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
981 The most prevalent NREF after filtering will be determined and returned.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
982
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
983 */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
984 inline int snvmixFilterCalls(int *calls, int depth, char *bQ, char *mQ, param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
985 int qual_num, nref_counts[5];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
986 nref_counts[0] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
987 nref_counts[1] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
988 nref_counts[2] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
989 nref_counts[3] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
990 nref_counts[4] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
991 int max_nref = N;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
992
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
993 char nref = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
994
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
995 //for(qual_num = 0; qual_num < depth; qual_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
996 for(qual_num = depth; qual_num--; ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
997 //if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
998 if( bQ != NULL && mQ != NULL && snvmixSkipCall(calls,qual_num,params,bQ,mQ) ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
999 calls[qual_num] = -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1000 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1001 nref_counts[0]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1002 switch(calls[qual_num]) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1003 case 'A':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1004 nref_counts[A]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1005 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1006 case 'C':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1007 nref_counts[C]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1008 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1009 case 'G':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1010 nref_counts[G]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1011 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1012 case 'T':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1013 nref_counts[T]++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1014 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1015 case 1:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1016 case -1:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1017 nref_counts[0]--;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1018 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1019 default:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1020 fprintf(stderr,"ERROR: unknown call base\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1021 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1022 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1023 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1024 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1025 if(nref_counts[0]) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1026 max_nref = A;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1027 if(nref_counts[max_nref] < nref_counts[C])
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1028 max_nref = C;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1029 if(nref_counts[max_nref] < nref_counts[G])
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1030 max_nref = G;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1031 if(nref_counts[max_nref] < nref_counts[T])
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1032 max_nref = T;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1033 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1034 //return -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1035 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1036 //for(qual_num = 0; qual_num < depth; qual_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1037 for(qual_num = depth; qual_num--; ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1038 if(calls[qual_num] == 1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1039 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1040 if(calls[qual_num] != base_code[max_nref])
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1041 calls[qual_num] = -1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1042 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1043 return base_code[max_nref];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1044 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1045
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1046 /*
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1047 int snvmixSkipCall
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1048 Arguments:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1049 *calls : array built by snvmixGetCallString containing
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1050 1 : when a reference call was made
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1051 -1 : when an invalid value is seen ('N', 'n', '*')
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1052 [ACTG] : when a non-reference call was made
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1053 qual_num : call number that is being evaluated
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1054 *params : parameter structure, get thresholding data
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1055 *bQ : base qualities observed for each call
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1056 *mQ : map quality for each call
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1057 Function:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1058 Evalates quality values in each of the filtering models
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1059
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1060 Returns 1 if the calls[qual_num] needs to be filtered out
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1061 Returns 0 otherwise
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1062 */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1063 inline int snvmixSkipCall(int *calls, int qual_num, param_struct *params, char *bQ, char *mQ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1064 if(calls[qual_num] == -1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1065 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1066 if(params->filter_type == TYPE_mb) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1067 if(bQ[qual_num] <= params->bQ || mQ[qual_num] <= params->mQ)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1068 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1069 } else if(params->filter_type == TYPE_b) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1070 if(bQ[qual_num] <= params->bQ)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1071 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1072 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1073 if(mQ[qual_num] <= params->mQ)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1074 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1075 if(params->filter_type == TYPE_m) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1076 // Use mapping as surrogate
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1077 bQ[qual_num] = mQ[qual_num];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1078 // Make mapping one
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1079 mQ[qual_num] = (char) PHRED_MAX;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1080 } else if(params->filter_type == TYPE_M) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1081 // Mapping passed, make it one
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1082 mQ[qual_num] = (char) PHRED_MAX;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1083 } else if(params->filter_type == TYPE_Mb) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1084 // Nothing special here
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1085 } else if(params->filter_type == TYPE_MB || params->filter_type == TYPE_SNVMix1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1086 if(bQ[qual_num] <= params->bQ)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1087 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1088 if(params->filter_type == TYPE_SNVMix1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1089 bQ[qual_num] = (char) PHRED_MAX;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1090 mQ[qual_num] = (char) PHRED_MAX;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1091 } }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1092 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1093 return(0);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1094 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1095
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1096 inline int snvmixSkipCall_alt(param_struct *params, int call, char bQ, char mQ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1097 if(call == -1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1098 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1099 if(params->filter_type != TYPE_b && mQ <= params->mQ)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1100 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1101 switch(params->filter_type) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1102 case TYPE_mb:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1103 if(bQ <= params->bQ || mQ <= params->mQ)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1104 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1105 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1106 case TYPE_b:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1107 if(bQ <= params->bQ)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1108 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1109 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1110 case TYPE_m:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1111 // Use mapping as surrogate
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1112 bQ = mQ;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1113 // Make mapping one
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1114 mQ = (char) PHRED_MAX;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1115 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1116 case TYPE_M:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1117 // Mapping passed, make it one
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1118 mQ = (char) PHRED_MAX;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1119 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1120 case TYPE_Mb:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1121 // Nothing special here
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1122 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1123 case TYPE_MB:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1124 case TYPE_SNVMix1:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1125 if(bQ <= params->bQ)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1126 return(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1127 if(params->filter_type == TYPE_SNVMix1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1128 bQ = (char) PHRED_MAX;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1129 mQ = (char) PHRED_MAX;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1130 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1131 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1132 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1133 return(0);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1134 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1135
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1136
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1137 void resetParams(param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1138 params->input = stdin;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1139 params->output = stdout;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1140 params->inputfile = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1141 params->outputfile = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1142 params->modelfile = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1143 params->fastafile = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1144 params->listfile = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1145 params->window = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1146 params->filter_type = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1147 params->filter_dup = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1148 params->filter_chast = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1149 params->train = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1150 params->classify = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1151 params->filter = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1152 params->full = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1153 params->input_type = S_PILEUP; // 0 = processed, 1 = maq pileup, 2 = sam pileup, 3 = bam file
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1154 params->mu = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1155 params->pi = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1156 params->max_iter = 1000;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1157 params->bQ = 19;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1158 params->mQ = 19;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1159 params->debug = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1160 params->states = 3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1161 params->trainP.alpha = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1162 params->trainP.beta = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1163 params->trainP.delta = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1164 params->trainP.param_file = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1165 params->trainP.max_iter = 100;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1166 params->trainP.bQ = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1167 params->trainP.mQ = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1168 params->trainP.calls = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1169 params->window = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1170 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1171
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1172 void initSNVMix(int argc, char **argv, param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1173 char c;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1174 int i;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1175 resetParams(params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1176 while ((c = getopt (argc, argv, "hDTCFfi:o:m:t:p:q:Q:a:b:d:M:S:r:l:w:cR")) != -1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1177 switch (c)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1178 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1179 case 'm': params->modelfile = optarg; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1180 case 'i': params->inputfile = optarg; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1181 case 'o': params->outputfile = optarg; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1182 // Bam file opts
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1183 case 'r': params->fastafile = optarg; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1184 case 'l': params->listfile = optarg; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1185 case 'T': params->train = 1; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1186 case 'C': params->classify = 1; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1187 case 'F': params->filter = 1; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1188 case 'f': params->full = 1; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1189 case 'p':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1190 if(*optarg == 'q') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1191 params->input_type = Q_PILEUP;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1192 } else if(*optarg == 'm') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1193 params->input_type = M_PILEUP;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1194 } else if(*optarg == 's') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1195 params->input_type = S_PILEUP;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1196 } else if(*optarg == 'b') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1197 params->input_type = BAM_FILE;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1198 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1199 fprintf(stderr,"ERROR: unknown pileup format %c\n",*optarg);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1200 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1201 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1202 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1203 case 't':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1204 if(strcmp("mb",optarg) == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1205 params->filter_type = TYPE_mb;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1206 else if(strcmp("m",optarg) == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1207 params->filter_type = TYPE_m;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1208 else if(strcmp("b",optarg) == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1209 params->filter_type = TYPE_b;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1210 else if(strcmp("M",optarg) == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1211 params->filter_type = TYPE_M;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1212 else if(strcmp("Mb",optarg) == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1213 params->filter_type = TYPE_Mb;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1214 else if(strcmp("MB",optarg) == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1215 params->filter_type = TYPE_MB;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1216 else if(strcmp("SNVMix1",optarg) == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1217 params->filter_type = TYPE_SNVMix1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1218 else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1219 fprintf(stderr,"ERROR: filter type '%s' not recognized\n",optarg);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1220 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1221 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1222 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1223 case 'q':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1224 params->bQ = atoi(optarg);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1225 if( params->bQ < 0 || params->bQ >= PHRED_MAX ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1226 fprintf(stderr,"ERROR: quality threshold value Q%d out of range\n",params->bQ);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1227 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1228 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1229 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1230 case 'Q':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1231 params->mQ = atoi(optarg);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1232 if( params->mQ < 0 || params->mQ >= PHRED_MAX ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1233 fprintf(stderr,"ERROR: mapping threshold value Q%d out of range\n",params->mQ);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1234 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1235 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1236 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1237 case 'h':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1238 usage(argv[0]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1239 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1240 case 'D': params->debug = 1; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1241 case 'M': params->trainP.param_file = optarg; break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1242 case 'S':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1243 params->states = atoi(optarg);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1244 if( params->states < 3 ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1245 fprintf(stderr,"ERROR: state minimum is 3\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1246 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1247 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1248 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1249 case 'w':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1250 params->window = atoi(optarg);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1251 if(params->window < 1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1252 params->window = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1253 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1254 case 'c':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1255 params->filter_chast = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1256 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1257 case 'R':
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1258 params->filter_dup = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1259 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1260 default:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1261 fprintf(stderr,"Unknown option\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1262 usage(argv[0]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1263 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1264 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1265 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1266 /* Decide if we're going to train or classify */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1267 if(params->filter) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1268 params->train = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1269 params->classify = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1270 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1271 if(params->train && params->classify) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1272 fprintf(stderr,"ERROR: choose either train or classify\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1273 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1274 } else if(!params->train && !params->classify && !params->filter) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1275 fprintf(stderr,"Train/Classify/Filter not selected, picking default: Classify\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1276 params->classify = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1277 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1278
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1279 /* Set parameters */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1280 allocateParameters(params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1281 if( params->train ) setTrainingParameters(params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1282 if( params->classify ) setClassificationParameters(params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1283
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1284 /* Open input and output streams */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1285 if( params->input_type == BAM_FILE ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1286 if( params->train ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1287 fprintf(stderr, "BAM input for training not yet implemented\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1288 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1289 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1290 if( params->inputfile == NULL || strcmp(params->inputfile, "-") == 0) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1291 fprintf(stderr, "ERROR: if '-p b' is chosen, input file has to be a bam file\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1292 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1293 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1294 if( params->fastafile == NULL ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1295 fprintf(stderr, "ERROR: if '-p b' is chosen, reference fasta file is required (-r <ref.fa>)\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1296 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1297 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1298 } else if( params->inputfile != NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1299 params->input = fopen(params->inputfile, "r");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1300 if(params->input == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1301 perror("ERROR: could not open input file");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1302 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1303 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1304 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1305 params->input = stdin;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1306 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1307 if( params->outputfile != NULL ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1308 params->output = fopen(params->outputfile, "w");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1309 if(params->output == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1310 perror("ERROR: could not open output file");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1311 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1312 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1313 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1314 params->output = stdout;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1315 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1316 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1317
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1318 void allocateParameters(param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1319
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1320 /* Allocate alpha */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1321 if( (params->trainP.alpha = malloc(params->states * sizeof(long double))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1322 perror("ERROR: could not allocate space for alpha\n"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1323 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1324 /* Allocate beta */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1325 if( (params->trainP.beta = malloc(params->states * sizeof(long double))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1326 perror("ERROR: could not allocate space for beta\n"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1327 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1328 /* Allocate delta*/
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1329 if( (params->trainP.delta = malloc(params->states * sizeof(long double))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1330 perror("ERROR: could not allocate space for delta\n"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1331 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1332 /* Allocate mu*/
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1333 if( (params->mu = malloc(params->states * sizeof(long double))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1334 perror("ERROR: could not allocate space for mu\n"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1335 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1336 /* Allocate pi*/
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1337 if( (params->pi = malloc(params->states * sizeof(long double))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1338 perror("ERROR: could not allocate space for pi\n"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1339 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1340 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1341
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1342 void setTrainingParameters(param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1343 if( params->trainP.param_file != NULL ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1344 readParamFile(params,'t');
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1345 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1346 if(params->states != 3) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1347 fprintf(stderr, "ERROR: if state space larger than 3 requested, parameters must be provided\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1348 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1349 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1350 fprintf(stderr,"Training parameter file not given, using defaults\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1351 params->trainP.alpha[0] = 1000;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1352 params->trainP.alpha[1] = 5000;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1353 params->trainP.alpha[2] = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1354 params->trainP.beta[0] = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1355 params->trainP.beta[1] = 5000;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1356 params->trainP.beta[2] = 1000;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1357 params->trainP.delta[0] = 10;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1358 params->trainP.delta[1] = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1359 params->trainP.delta[2] = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1360 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1361 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1362
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1363 void setClassificationParameters(param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1364 int k;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1365 if( params->modelfile != NULL ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1366 readParamFile(params,'c');
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1367 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1368 if(params->states != 3) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1369 fprintf(stderr, "ERROR: if state space larger than 3 requested, model file must be provided\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1370 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1371 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1372 params->mu[0] = 0.995905287891696078261816182930;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1373 params->mu[1] = 0.499569401000925172873223800707;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1374 params->mu[2] = 0.001002216846753116409260431219;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1375 params->pi[0] = 0.672519580755555956841362785781;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1376 params->pi[1] = 0.139342327124010650907237618412;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1377 params->pi[2] = 0.188138092120433392251399595807;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1378 fprintf(stderr,"Classification parameter file not given, using for mQ35 bQ10\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1379 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1380 fprintf(stderr,"Mu and pi values:\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1381 for(k = 0; k < params->states; k++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1382 fprintf(stderr,"\tmu[%d] = %Lf\tpi[%d] = %Lf\n", k, params->mu[k], k, params->pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1383 fprintf(stderr,"\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1384 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1385
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1386 void readParamFile(param_struct *params, char type) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1387 FILE *fptrPARAM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1388 char *line = NULL, *param_name, *param, *param_ptr, *filename;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1389 size_t line_len = 0, read_len = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1390 long double *target;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1391 int i;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1392
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1393 if(type == 't') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1394 filename=params->trainP.param_file;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1395 } else if(type == 'c') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1396 filename=params->modelfile;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1397 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1398 fptrPARAM=fopen(filename, "r");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1399 if(fptrPARAM == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1400 fprintf(stderr, "ERROR: could not open parameter file %s\n", filename);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1401 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1402 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1403
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1404 #if defined __linux__ || defined _GNU_SOURCE
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1405 while( (read_len = getline(&line,&line_len,fptrPARAM)) != -1 )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1406 #endif
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1407 #ifdef __APPLE__
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1408 while( (line = fgetln(fptrPARAM,&line_len)) != NULL )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1409 #endif
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1410 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1411 line[line_len-1] = '\0';
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1412 target = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1413 param_name = strtok_r(line, " ", &param_ptr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1414 if(type == 't') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1415 if(strcmp("alpha", param_name) == 0) target = params->trainP.alpha;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1416 else if(strcmp("beta", param_name) == 0) target = params->trainP.beta;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1417 else if(strcmp("delta", param_name) == 0) target = params->trainP.delta;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1418 } else if(type == 'c') {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1419 if(strcmp("mu", param_name) == 0) target = params->mu;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1420 else if(strcmp("pi", param_name) == 0) target = params->pi;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1421
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1422 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1423 if(target == NULL) { fprintf(stderr, "Unknown parameter %s, skipping\n", param_name); continue; }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1424
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1425 for(i = 0; i < params->states; i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1426 param = strtok_r(NULL, " ", &param_ptr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1427 if(param == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1428 fprintf(stderr,"ERROR: missing value #%d for %s\n", i, param_name);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1429 exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1430 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1431 target[i] = atof(param);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1432 fprintf(stderr, "DEBUG: %s[%d] = %Lf\n", param_name, i, target[i]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1433 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1434 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1435 fclose(fptrPARAM);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1436 free(line);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1437 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1438
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1439 void initPhred(long double *phredTable, int elem) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1440 int i;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1441 for(i = 0; i < elem; i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1442 phredTable[i] = 1-powl(10,(-(long double)i/10));
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1443 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1444 phredTable[i] = 1;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1445 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1446
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1447
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1448 void usage(char *selfname) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1449 param_struct default_params;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1450 resetParams(&default_params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1451 fprintf(stderr,"Syntax:\n\t%s\t-m <modelfile> [-i <infile>] [-o <outfile>] [-T | -C | -F] [-p < q | m | s >] [-t < mb | m | b | M | Mb | MB | SNVMix1>] [-q <#>] [-Q <#>] [-M <trainP file>] [-h]\n",selfname);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1452 fprintf(stderr,"\tRequired:\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1453 fprintf(stderr,"\t-m <modelfile>\tmodel file, a line for mu and a line for pi, three\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1454 fprintf(stderr,"\t\t\tspace-separated values each, like:\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1455 fprintf(stderr,"\t\t\tmu 0.xxxxxxxx 0.xxxxxxxx 0.xxxxxxxx\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1456 fprintf(stderr,"\t\t\tpi 0.xxxxxxxx 0.xxxxxxxx 0.xxxxxxxx\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1457 fprintf(stderr,"\tOptional:\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1458 fprintf(stderr,"\t-i <infile>\tquality pileup, from pileup2matlab.pl script (def: STDIN)\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1459 fprintf(stderr,"\t-o <outfile>\twhere to put the results (def: STDOUT)\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1460 fprintf(stderr,"\t-T | -C | -F\tTrain, Classify or Filter (def: Classify)\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1461 fprintf(stderr,"\t-p q|m|s\tInput pileup format (def: s)\n\t\t\tq = quality\n\t\t\tm = MAQ\n\t\t\ts = SAMtools\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1462 fprintf(stderr,"\t-t mb|m|b|M|Mb|MB|SNVMix1\n\t\t\tFilter type (def: mb)\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1463 fprintf(stderr,"\t\t\tmb\tLowest between map and base quality\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1464 fprintf(stderr,"\t\t\tm\tFilter on map, and use as surrogate for base quality\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1465 fprintf(stderr,"\t\t\tb\tFilter on base quality, take map quality as 1\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1466 fprintf(stderr,"\t\t\tM\tFilter on map quality but use only base quality\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1467 fprintf(stderr,"\t\t\tMb\tFilter on map quality and use both map and base qualities\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1468 fprintf(stderr,"\t\t\tMB\tFilter on map quality AND base quality\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1469 fprintf(stderr,"\t\t\tSNVMix1\tFilter on base quality and map quality, afterwards consider them perfect\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1470 fprintf(stderr,"\t-q #\t\tCutoff Phred value for Base Quality, anything <= this value is ignored (def: Q%d)\n",default_params.bQ);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1471 fprintf(stderr,"\t-Q #\t\tCutoff Phred value for Map Quality, anything <= this value is ignored (def: Q%d)\n",default_params.mQ);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1472 fprintf(stderr,"\n\tTraining parameters:\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1473 fprintf(stderr,"\t-M <file>\tProvide a file containing training parameters\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1474 fprintf(stderr,"\t\t\tValues are space-separated:\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1475 fprintf(stderr,"\t\t\talpha # # #\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1476 fprintf(stderr,"\t\t\tbeta # # #\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1477 fprintf(stderr,"\t\t\tdelta # # #\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1478 fprintf(stderr,"\n\t-h\t\tthis message\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1479 fprintf(stderr,"\nImplementation: Rodrigo Goya, 2009. Version %s\n",VERSION);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1480 exit(0);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1481 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1482
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1483 // Based on classify pileup, but reads the entire file to memory for training purposes.
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1484 void snvmixGetTrainSet_pileup(param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1485 //MAQ:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1486 // 1 554484 C 1752 @,,.,.,, @xxx @xxx xxxxx
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1487 //SAM:
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1488 // 1 554484 C 1752 ,,.,.,, xxx xxxx
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1489
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1490 FILE *fptrIN;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1491
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1492 char *line = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1493 size_t line_len = 0, prev_len = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1494 int read_len = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1495 int col_num = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1496 long int line_num = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1497
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1498 char *col, *qual;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1499 char *col_sep = "\t", *col_str, *col_ptr;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1500 char *qual_sep = ",", *qual_str, *qual_ptr;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1501 char *chr, *pos, *ref, nref, call;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1502
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1503 char *bQ, *mQ;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1504 int *calls, *tmpQ, pos_int;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1505 int depth = 0, qual_num, maxp;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1506 int depth_allocated = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1507
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1508 int trainset = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1509 int trainset_allocated = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1510
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1511 int nref_count = 0, ref_count = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1512 int i, call_i;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1513
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1514 fptrIN = params->input;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1515
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1516 if( (params->trainP.bQ = malloc(START_QNUM * sizeof (unsigned char **))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1517 perror("ERROR allocating initial space for train.bQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1518 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1519 if( (params->trainP.mQ = malloc(START_QNUM * sizeof (unsigned char **))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1520 perror("ERROR allocating initial space for train.mQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1521 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1522 if( (params->trainP.calls = malloc(START_QNUM * sizeof (signed char **))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1523 perror("ERROR allocating initial space for train.calls"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1524 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1525 if( (params->trainP.depth = malloc(START_QNUM * sizeof (int *))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1526 perror("ERROR allocating initial space for train.depth"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1527 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1528 trainset_allocated = START_QNUM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1529
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1530 if( (calls = malloc(START_QNUM * sizeof (int))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1531 perror("ERROR allocating initial space for train.depth"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1532 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1533 depth_allocated = START_QNUM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1534 #if defined __linux__ || defined _GNU_SOURCE
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1535 while( (read_len = getline(&line,&line_len,fptrIN)) != -1 )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1536 #endif
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1537 #ifdef __APPLE__
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1538 while( (line = fgetln(fptrIN,&line_len)) != NULL )
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1539 #endif
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1540 {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1541 line[line_len-1] = '\0';
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1542 depth = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1543 nref = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1544 col_ptr = NULL;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1545 for(col_num = 0, col_str = line; ; col_num++, col_str = NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1546 col = strtok_r(col_str, "\t", &col_ptr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1547 if(col == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1548 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1549 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1550 if(col_num == 0) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1551 chr = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1552 } else if(col_num == 1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1553 pos = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1554 pos_int = strtol(pos, NULL, 10);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1555 } else if(col_num == 2) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1556 ref = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1557 } else if(col_num == 3) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1558 depth = atoi(col);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1559 if(depth > depth_allocated) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1560 if( (calls = realloc(calls, sizeof (int) * (depth + START_QNUM))) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1561 perror("ERROR allocating space for calls"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1562 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1563 depth_allocated = depth + START_QNUM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1564 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1565 } else if(col_num == 4) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1566 if(params->input_type == M_PILEUP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1567 col = col+sizeof(char);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1568 snvmixGetCallString(col, calls, depth, &nref);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1569 } else if(col_num == 5) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1570 bQ = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1571 // If it's a MAQ pileup, we need to skip the @ sign
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1572 if(params->input_type == M_PILEUP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1573 bQ = bQ + sizeof(char);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1574 for(call_i = 0; call_i < depth; call_i++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1575 bQ[call_i] = bQ[call_i]-33;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1576 } else if(col_num == 6) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1577 mQ = col;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1578 // If it's a MAQ pileup, we need to skip the @ sign
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1579 if(params->input_type == M_PILEUP)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1580 mQ = mQ + sizeof(char);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1581 for(call_i = 0; call_i < depth; call_i++)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1582 mQ[call_i] = mQ[call_i]-33;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1583 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1584 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1585 if(line_num >= trainset_allocated) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1586 if( ( params->trainP.bQ = realloc(params->trainP.bQ, (line_num + START_QNUM) * sizeof (unsigned char *)) ) == NULL ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1587 perror("ERROR reallocating space for trainP.bQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1588 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1589 if( ( params->trainP.mQ = realloc(params->trainP.mQ, (line_num + START_QNUM) * sizeof (unsigned char *)) ) == NULL ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1590 perror("ERROR reallocating space for trainP.mQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1591 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1592 if( ( params->trainP.calls = realloc(params->trainP.calls, (line_num + START_QNUM) * sizeof (signed char *)) ) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1593 perror("ERROR reallocating space for trainP.calls"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1594 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1595 if( ( params->trainP.depth = realloc(params->trainP.depth, (line_num + START_QNUM) * sizeof (int)) ) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1596 perror("ERROR reallocating space for trainP.depth"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1597 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1598 trainset_allocated = line_num + START_QNUM;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1599 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1600
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1601 nref = snvmixFilterCalls(calls,depth,bQ,mQ,params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1602 nref_count = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1603 ref_count = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1604 for(qual_num = 0; qual_num < depth; qual_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1605 if(calls[qual_num] == -1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1606 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1607 if(calls[qual_num] == 1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1608 ref_count++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1609 if(calls[qual_num] == nref)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1610 nref_count++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1611 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1612 params->trainP.depth[line_num] = ref_count + nref_count;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1613
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1614 if( (params->trainP.bQ[line_num] = malloc(sizeof(unsigned char) * params->trainP.depth[line_num])) == NULL ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1615 perror("ERROR allocating space for trainP.bQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1616 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1617 if( (params->trainP.mQ[line_num] = malloc(sizeof(unsigned char) * params->trainP.depth[line_num])) == NULL ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1618 perror("ERROR allocating space for trainP.mQ"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1619 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1620 if( (params->trainP.calls[line_num] = malloc(sizeof(signed char) * params->trainP.depth[line_num])) == NULL ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1621 perror("ERROR allocating space for trainP.calls"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1622 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1623
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1624 call_i = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1625 for(qual_num = 0; qual_num < depth; qual_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1626 if(calls[qual_num] == -1)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1627 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1628
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1629 params->trainP.calls[line_num][call_i] = (signed char) calls[qual_num];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1630 params->trainP.bQ[line_num][call_i] = (unsigned char) bQ[qual_num];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1631 params->trainP.mQ[line_num][call_i] = (unsigned char) mQ[qual_num];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1632 call_i++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1633 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1634 if( params->trainP.depth[line_num] != call_i ) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1635 fprintf(stderr, "ERROR: mismatch between trainP.depth and call_i\n"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1636 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1637 line_num++;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1638 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1639 params->trainP.len = line_num;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1640 params->trainP.len = line_num;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1641 fclose(fptrIN);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1642 free(line); free(calls);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1643 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1644
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1645 // Use EM algorithm on a memory-located data set to train the parameters
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1646 void snvmixTrain_pileup(param_struct *params) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1647 int line_num = 0, call_i = 0, k = 0, states;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1648 int iter = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1649 FILE *fptrOUT;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1650
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1651 long double phred[PHRED_MAX + 1];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1652 long double bsum = 0, msum = 0, *b, *mu, *notmu, *pi, *log_pi, z = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1653 long double **pG, **xr;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1654 long double *xrhat, *sum_pG;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1655 long double Y, not_Y, M;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1656 int N_sum;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1657 int strength;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1658
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1659 long double ll, prev_ll = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1660 //initPhred(phred, PHRED_MAX+1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1661 initPhred(phred, PHRED_MAX);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1662
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1663 states = params->states;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1664
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1665 // Initialize local values of parameters
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1666 mu = params->mu;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1667 pi = params->pi;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1668
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1669 if( ( b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate b\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1670 if( ( notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate notmu\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1671 if( ( log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate log_pi\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1672 if( ( xrhat = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate xrhat\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1673 if( ( sum_pG = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate sum_pG\n"); exit(1); }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1674
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1675 snvmixGetTrainSet_pileup(params);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1676
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1677 if(params->debug) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1678 for(line_num = 0; line_num < params->trainP.len; line_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1679 fprintf(stderr, "line_num: %d", line_num);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1680 for(call_i = 0; call_i < params->trainP.depth[line_num]; call_i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1681 fprintf(stderr, "\t(%d,bQ%d,mQ%d)", params->trainP.calls[line_num][call_i], params->trainP.bQ[line_num][call_i], params->trainP.mQ[line_num][call_i]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1682 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1683 fprintf(stderr, "\n\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1684 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1685 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1686 // Initialize mu and pi
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1687
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1688 fprintf(stderr, "Initializing mu\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1689 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1690 params->mu[k] = (long double) params->trainP.alpha[k] / (params->trainP.alpha[k] + params->trainP.beta[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1691 fprintf(stderr,"\talpha[%d] = %Lf,\tbeta[%d] = %Lf,\tmu[%d] = %Lf\n", k, params->trainP.alpha[k], k, params->trainP.beta[k], k, params->mu[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1692 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1693
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1694 fprintf(stderr, "Initializing pi\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1695 z = (long double) params->trainP.delta[0] + params->trainP.delta[1] + params->trainP.delta[2];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1696 if(!z) { z = 1; }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1697 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1698 params->pi[k] = (long double) params->trainP.delta[k] / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1699 fprintf(stderr,"\tdelta[%d] = %Lf,\tpi[%d] = %Lf\n", k, params->trainP.delta[k], k, params->pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1700 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1701
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1702 strength = params->trainP.len;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1703
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1704 // Initialize matrices
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1705 if( (pG = malloc(sizeof(long double *) * params->trainP.len)) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1706 perror("ERROR allocating initial space for pG"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1707 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1708 if( (xr = malloc(sizeof(long double *) * params->trainP.len)) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1709 perror("ERROR allocating initial space for xr"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1710 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1711 for(line_num = 0; line_num < params->trainP.len; line_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1712 // [states] cells for each line_num
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1713 if( (pG[line_num] = malloc(sizeof(long double) * states)) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1714 perror("ERROR allocating space for pG"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1715 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1716 if( (xr[line_num] = malloc(sizeof(long double) * states)) == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1717 perror("ERROR allocating space for xr"); exit(1);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1718 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1719 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1720
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1721 for(iter = 0; iter < params->trainP.max_iter; iter++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1722 // Reset values for this iteration
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1723 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1724 notmu[k] = 1 - mu[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1725 log_pi[k] = logl(pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1726
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1727 sum_pG[k] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1728 xrhat[k] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1729
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1730 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1731 N_sum = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1732 ll = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1733
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1734 // E step
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1735 for(line_num = 0; line_num < params->trainP.len; line_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1736 if(params->trainP.depth == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1737 continue;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1738 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1739 xr[line_num][k] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1740 b[k] = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1741 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1742
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1743 for(call_i = 0; call_i < params->trainP.depth[line_num]; call_i++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1744 if(params->trainP.calls[line_num][call_i] == 1) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1745 Y = phred[params->trainP.bQ[line_num][call_i]];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1746 not_Y = 1-phred[params->trainP.bQ[line_num][call_i]];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1747 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1748 Y = (1-phred[params->trainP.bQ[line_num][call_i]])/3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1749 not_Y = phred[params->trainP.bQ[line_num][call_i]] + 2*(1-phred[params->trainP.bQ[line_num][call_i]])/3;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1750 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1751 M = phred[params->trainP.mQ[line_num][call_i]];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1752 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1753 b[k] = b[k] + logl( (1 - M) * 0.5 + M * (Y * mu[k] + not_Y * notmu[k]) );
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1754 xr[line_num][k] = xr[line_num][k] + Y;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1755 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1756 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1757 z = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1758 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1759 b[k] = expl(b[k] + log_pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1760 z = z + b[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1761 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1762 if(!z) { z=1; }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1763 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1764 pG[line_num][k] = b[k] / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1765 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1766
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1767 ll = ll + logl(z);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1768
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1769 N_sum = N_sum + params->trainP.depth[line_num];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1770
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1771 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1772 sum_pG[k] = sum_pG[k] + pG[line_num][k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1773 xrhat[k] = xrhat[k] + xr[line_num][k] * pG[line_num][k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1774 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1775 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1776
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1777 // Check log likelihood
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1778 if(iter == 0)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1779 prev_ll = ll;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1780 else if(ll <= prev_ll)
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1781 break;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1782 prev_ll = ll;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1783
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1784 // M step
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1785 z = 0;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1786 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1787 z = z + sum_pG[k] + params->trainP.delta[k];
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1788 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1789 if(!z) { z=1; }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1790 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1791 // Update pi
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1792 params->pi[k] = (sum_pG[k] + params->trainP.delta[k]) / z;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1793 // Update mu
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1794 params->mu[k] = (xrhat[k] + params->trainP.alpha[k]*strength-1) / (N_sum + params->trainP.alpha[k]*strength + params->trainP.beta[k]*strength-2);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1795 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1796 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1797
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1798 if(params->modelfile == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1799 fptrOUT = stdout;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1800 } else {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1801 fptrOUT = fopen(params->modelfile, "w");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1802 if(fptrOUT == NULL) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1803 perror("ERROR: could not open modelfile for writing, using STDOUT");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1804 fptrOUT = stdout;
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1805 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1806 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1807 fprintf(fptrOUT,"mu");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1808 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1809 fprintf(fptrOUT, " %0.30Lf", params->mu[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1810 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1811 fprintf(fptrOUT,"\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1812 fprintf(fptrOUT,"pi");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1813 for(k = 0; k < states; k++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1814 fprintf(fptrOUT, " %0.30Lf", params->pi[k]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1815 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1816 fprintf(fptrOUT,"\n");
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1817
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1818 /* free unused memory */
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1819 free(b); free(notmu); free(log_pi);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1820 free(xrhat); free(sum_pG);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1821 for(line_num = 0; line_num < params->trainP.len; line_num++) {
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1822 // free [states] cells for each line_num
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1823 free(pG[line_num]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1824 free(xr[line_num]);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1825 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1826 free(pG); free(xr);
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1827 }
74f5ea818cea Uploaded
ryanmorin
parents:
diff changeset
1828