view SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.c @ 0:74f5ea818cea

Uploaded
author ryanmorin
date Wed, 12 Oct 2011 19:50:38 -0400
parents
children
line wrap: on
line source

/* The MIT License

   Copyright (c) 2009, by Sohrab Shah <sshah@bccrc.ca> and Rodrigo Goya <rgoya@bcgsc.ca>

   Permission is hereby granted, free of charge, to any person obtaining
   a copy of this software and associated documentation files (the
   "Software"), to deal in the Software without restriction, including
   without limitation the rights to use, copy, modify, merge, publish,
   distribute, sublicense, and/or sell copies of the Software, and to
   permit persons to whom the Software is furnished to do so, subject to
   the following conditions:

   The above copyright notice and this permission notice shall be
   included in all copies or substantial portions of the Software.

   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
   SOFTWARE.
*/

/*
C Implementation of SNVMix2
*/

#define VERSION "0.12.1-rc1"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "SNVMix2.h"

#include "bam.h"

#define START_QNUM 1000

int main(int argc, char **argv) {
	
	param_struct params;// = {NULL, NULL, NULL, NULL, NULL, 0, 0, 0, 0, 0};

	initSNVMix(argc, argv, &params);

	if(params.classify || params.filter) {
		if(params.input_type == Q_PILEUP)
			snvmixClassify_qualities(&params);
		else if(params.input_type == M_PILEUP || params.input_type == S_PILEUP)
			snvmixClassify_pileup(&params);
		else if(params.input_type == BAM_FILE) {
			fprintf(stderr,"Output is:\n");
			fprintf(stderr,"\tchr:pos ref nref ref:ref_count,nref:nref_count,pAA,pAB,pBB,maxP");
			fprintf(stderr,"\tref_fwd ref_rev");
			fprintf(stderr,"\tnref_fwd nref_rev");
			fprintf(stderr,"\tnref_center nref_edges");
			fprintf(stderr,"\tindel_pos");
			fprintf(stderr,"\tref_clean nref_clean");
			fprintf(stderr,"\tref_bad_pair nref_bad_pair");
			fprintf(stderr,"\tref_ins nref_ins");
			fprintf(stderr,"\tref_del nref_del");
			fprintf(stderr,"\tref_junc nref_junc");
			fprintf(stderr,"\tnref_in_unique_pos");
			fprintf(stderr,"\traw[A,C,G,T,N]");
			fprintf(stderr,"\tthr[A,C,G,T,N]\n");
			snvmixClassify_bamfile(&params);
		}
	} else if(params.train) {
		if(params.input_type == M_PILEUP || params.input_type == S_PILEUP)
			snvmixTrain_pileup(&params);
		else if(params.input_type == Q_PILEUP) {
			fprintf(stderr,"Sorry, Training with quality-type input is not yet implemented\n");
			exit(1);
		}
	}
	return(0);
}

/*
	void snvmixClassify_qualities
	Arguments:
		*params : pointer to model parameters and command line options
	Function:
		Reads a pilep-style file that has been preprocessed to include the quality of
		calls being the reference allele and the maping quality both in decimal values.

		For each line it evaluates de model according to the parameters in *params and
		outputs the corresponding SNV prediction values.

		This function may come in handy when filtering of calls is done by a
		different program or the base-calling and maping quality information is not
		in a pileup/phred format.
	
		The file read is a tab-separated file where the columns are:
			-  target sequence (e.g. chromosome)
			-  sequence position
			-  reference base
			-  non-reference base
			-  comma separated probabilities of the call being the reference
			-  comma separated mapping qualities, one for each base call

*/
void snvmixClassify_qualities(param_struct *params) {
	FILE *fptrIN, *fptrOUT;

	char *line = NULL;
	size_t line_len = 0, prev_len = 0;
	int read_len = 0;

	int qual_num = 0, col_num = 0;

	char *col, *qual;
	char *col_sep = "\t", *col_str, *col_ptr;
	char *qual_sep = ",", *qual_str, *qual_ptr;
	char *chr, *pos, *ref, *nref;

	long double *bQ, *mQ, *tmpQ;
	int pos_int, depth = 0, maxp;
	long int depth_allocated = 0;

	long double bsum = 0, msum = 0, *b, *mu, *notmu, *pi, *log_pi, z = 0;
	int i, k, states;

	states = params->states;

	// Initialize local values of parameters
	mu = params->mu;
	pi = params->pi;

	if( ( b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_qualities] Could not allocate b\n"); exit(1); }
	if( ( notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_qualities] Could not allocate notmu\n"); exit(1); }
	if( ( log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_qualities] Could not allocate log_pi\n"); exit(1); }

	for(k = 0; k < states; k++) {
		notmu[k] = 1 - mu[k];
		log_pi[k] = logl(pi[k]);
	}
	fptrIN = params->input;
	fptrOUT = params->output;


	if( (bQ = malloc(START_QNUM * sizeof (long double))) == NULL) {
		perror("ERROR allocating initial space for bQ"); exit(1);
	}
	if( (mQ = malloc(START_QNUM * sizeof (long double))) == NULL) {
		perror("ERROR allocating initial space for mQ"); exit(1);
	}
	depth_allocated = START_QNUM;
#if defined __linux__ || defined _GNU_SOURCE
	while( (read_len = getline(&line,&line_len,fptrIN)) != -1 )
#endif
#ifdef __APPLE__
	while( (line = fgetln(fptrIN,&line_len)) != NULL )
#endif
	{
		line[line_len-1] = '\0';
		depth = 0;
		col_ptr = NULL;
		for(col_num = 0, col_str = line; ; col_num++, col_str = NULL) {
			col = strtok_r(col_str, "\t", &col_ptr);
			if(col == NULL) {
				break;
			}
			if(col_num == 0) {
				chr = col;
			} else if(col_num == 1) {
				pos = col;
				pos_int = strtol(pos, NULL, 10);
			} else if(col_num == 2) {
				ref = col;
			} else if(col_num == 3) {
				nref = col;
			} else if(col_num == 4) {
				for(qual_num = 0, qual_str = col; ; qual_num++, qual_str = NULL) {
					qual = strtok_r(qual_str, ",", &qual_ptr);
					if(qual == NULL) {
						break;
					}
					if(qual_num >= depth_allocated) {
						if( (bQ = realloc(bQ, sizeof(long double) * (depth_allocated + START_QNUM))) == NULL) {
							perror("ERROR allocating bQ"); exit(1);
						}
						if( (mQ = realloc(mQ, sizeof(long double) * (depth_allocated + START_QNUM))) == NULL) {
							perror("ERROR allocating mQ"); exit(1);
						}
						depth_allocated = depth_allocated + START_QNUM;
					}
					bQ[qual_num] = atof(qual);
				}
				depth = qual_num;
			} else if(col_num == 5) {
				for(qual_num = 0, qual_str = col; ; qual_num++, qual_str = NULL) {
					qual = strtok_r(qual_str, ",", &qual_ptr);
					if(qual == NULL) {
						break;
					}
					if(qual_num >= depth_allocated) {
						fprintf(stderr, "FATAL ERROR: should not have more mapping qualities than base qualities\n");
						exit(1);
					}
					mQ[qual_num] = atof(qual);
				}
				if(depth != qual_num) {
					fprintf(stderr, "FATAL ERROR: should not have more base qualities than mapping qualities\n");
					exit(1);
				}
			}
		}


		for(k = 0; k < states; k++)
			b[k] = 0;
		
    		//b = sum(log(notm*0.5 + m.*(yr.*mur+notyr.*notmur)),2);
		for(qual_num = 0; qual_num < depth; qual_num++)
			for(k = 0; k < states; k++)
				b[k] = b[k] + logl((1- mQ[qual_num])*0.5 + mQ[qual_num] * (bQ[qual_num]*mu[k]+(1-bQ[qual_num])*notmu[k]));

		z = 0;
		for(k = 0; k < states; k++) {
			b[k] = expl(b[k] + log_pi[k]);
			z += b[k];
		}
		if(!z) { z = 1; }
		for(k = 0; k < states; k++)
			b[k] = b[k] / z;
		maxp = 0;
		z = b[0];
		for(k = 0; k < states; k++)
			if( b[k] > z) {
				maxp = k;
				z = b[k];
			}

		fprintf(fptrOUT,"%s\t%s\t%s\t%s",chr, pos, ref, nref);
		for(k = 0; k , states; k++)
			fprintf(fptrOUT,"%0.10Lf,",b[k]);
		fprintf(fptrOUT,"%d\n",maxp+1);
	}
	fclose(fptrIN);
	fclose(fptrOUT);
	free(line);

	free(b); free(notmu); free(log_pi);
}


/*
	void snvmixClassify_pileup
	Arguments:
		*params : pointer to model parameters and command line options
	Function:
		Reads a MAQ or Samtools pileup format. For required formatting we refer the user
		to the corresponding websites:
			http://maq.sourceforge.net/
			http://samtools.sourceforge.net/

		In general the format of both files can be described as a tab separated file
		where columns are:
			-  target sequence (e.g. chromosome)
			-  sequence position
			-  reference base
			-  depth
			-  stream of base calls
			-  stream of base call PHRED-scaled qualities
			-  stream of mapping PHRED-scaled qualities

		Note: when handling pileup files, care should be taken when doing copy-paste
		operations not to transform 'tabs' into streams of spaces.

		For each line it evaluates de model according to the parameters in *params and
		outputs the corresponding SNV prediction values.

		Predictions can be output either only for positions where when non-reference values
		are observed or, when run with '-f' flag, for all positions. The -f flag is useful
		when the resulting calls are being compared or joined accros different datasets
		and all predictions need to be present.
*/
void snvmixClassify_pileup(param_struct *params) {
//MAQ:
//	1       554484  C       1752    @,,.,.,, @xxx @xxx xxxxx
//SAM:
//	1       554484  C       1752    ,,.,.,,	xxx xxxx
	FILE *fptrIN, *fptrOUT;

	char *line = NULL;
	size_t line_len = 0, prev_len = 0;
	int read_len = 0;
	int col_num = 0;
	long int line_num = 0;

	char *col, *qual;
	char *col_sep = "\t", *col_str, *col_ptr;
	char *qual_sep = ",", *qual_str, *qual_ptr;
	char *chr, *pos, *ref, nref, call, nref_skip = 'N';

	char *bQ, *mQ;
	int *calls, *tmpQ;
	int pos_int, depth = 0, qual_num,  maxp;
	int depth_allocated = 0;

	long double bsum = 0, msum = 0, *b, *mu, *notmu, *pi, *log_pi, z = 0;
	int nref_count = 0, ref_count = 0;
	long double Y, not_Y;
	long double phred[PHRED_MAX + 1];
	int i, call_i, k, states;

	//initPhred(phred, PHRED_MAX+1);
	initPhred(phred, PHRED_MAX);

	states = params->states;

	// Initialize local values of parameters
	mu = params->mu;
	pi = params->pi;

	if( ( b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate b\n"); exit(1); }
	if( ( notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate notmu\n"); exit(1); }
	if( ( log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate log_pi\n"); exit(1); }

	for(k = 0; k < states; k++) {
		notmu[k] = 1 - mu[k];
		log_pi[k] = logl(pi[k]);
	}
	fptrIN = params->input;
	fptrOUT = params->output;
	if(params->full)
		nref_skip = -2;


	if( (calls = malloc(START_QNUM * sizeof (int))) == NULL) {
		perror("ERROR allocating initial space for calls"); exit(1);
	}
	depth_allocated = START_QNUM;
#if defined __linux__ || defined _GNU_SOURCE
	while( (read_len = getline(&line,&line_len,fptrIN)) != -1 )
#endif
#ifdef __APPLE__
	while( (line = fgetln(fptrIN,&line_len)) != NULL )
#endif
	{
		line[line_len-1] = '\0';
		line_num++;
		depth = 0;
		nref = 0;
		col_ptr = NULL;
		for(col_num = 0, col_str = line; nref != nref_skip ; col_num++, col_str = NULL) {
			col = strtok_r(col_str, "\t", &col_ptr);
			if(col == NULL) {
				break;
			}
			if(col_num == 0) {
				chr = col;
			} else if(col_num == 1) {
				pos = col;
			} else if(col_num == 2) {
				ref = col;
			} else if(col_num == 3) {
				depth = atoi(col);
				if(depth > depth_allocated) {
					if( (calls = realloc(calls, sizeof (int) * (depth + START_QNUM))) == NULL) {
						perror("ERROR allocating space for calls"); exit(1);
					}
					depth_allocated = depth + START_QNUM;
				}
			} else if(col_num == 4) {
				if(params->input_type == M_PILEUP)
					col = col+sizeof(char);
					snvmixGetCallString(col, calls, depth, &nref);
			} else if(col_num == 5) {
				bQ = col;
				// If it's a MAQ pileup, we need to skip the @ sign
				if(params->input_type == M_PILEUP)
					bQ = bQ + sizeof(char);
				for(call_i = 0; call_i < depth; call_i++)
					bQ[call_i] = bQ[call_i]-33;
			} else if(col_num == 6) {
				mQ = col;
				// If it's a MAQ pileup, we need to skip the @ sign
				if(params->input_type == M_PILEUP)
					mQ = mQ + sizeof(char);
				for(call_i = 0; call_i < depth; call_i++)
					mQ[call_i] = mQ[call_i]-33;
			}
		}
		// If we quit the for because no nref was found, skip this too, next line
	nref = snvmixFilterCalls(calls,depth,bQ,mQ,params);
	nref_count = 0;
	ref_count = 0;
	for(qual_num = 0; qual_num < depth; qual_num++) {
		//if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) )
		if(calls[qual_num] == -1)
			continue;
		if(calls[qual_num] == 1)
			ref_count++;
		if(calls[qual_num] == nref)
			nref_count++;
	}
if(params->filter) {
	fprintf(fptrOUT,"%s:%s\t%s:%d\t%c:%d\t", chr, pos, ref, ref_count, nref, nref_count);
	for(qual_num = 0; qual_num < ref_count; qual_num++)
		fprintf(fptrOUT,",");
	for(qual_num = 0; qual_num < nref_count; qual_num++)
		fprintf(fptrOUT,"%c",nref);
	fprintf(fptrOUT,"\n");
} else {
		if(nref == nref_skip)
			continue;
		//nref = snvmixFilterCalls(calls,depth,bQ,mQ,params);
		//if(nref == nref_skip)
		//	continue;
		for(k = 0; k < states; k++)
			b[k] = 0;
		nref_count = 0;
		for(qual_num = 0; qual_num < depth; qual_num++) {
			//if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) )
			if(calls[qual_num] == -1)
				continue;
		// from matlab:
		// b = sum(log(notm*0.5 + m.*(yr.*mur+notyr.*notmur)),2);
			if(calls[qual_num] == 1) {
				// If REF then
				Y = phred[(unsigned char) bQ[qual_num]];
				not_Y = 1-phred[(unsigned char) bQ[qual_num]];
			} else {
				nref_count++;
				// If NREF then
				Y = (1-phred[(unsigned char) bQ[qual_num]])/3;
				not_Y = phred[(unsigned char) bQ[qual_num]] + 2*(1-phred[(unsigned char)bQ[qual_num]])/3;
			}
			for(k = 0; k < states; k++)
				b[k] = b[k] + logl((1-phred[(unsigned char) mQ[qual_num]])*0.5 + phred[(unsigned char) mQ[qual_num]] * (Y * mu[k] + not_Y * notmu[k]));
		}
		// Check if any non-references actually passed the filter
		//if(!nref_count)
		//	continue;
		z = 0;
		for(k = 0; k < states; k++) {
			b[k] = expl(b[k] + log_pi[k]);
			z += b[k];
		}
		if(!z) z = 1;
		for(k = 0; k < states; k++)
			b[k] = b[k] / z;
		maxp = 0;
		z = b[0];
		for(k = 0; k < states; k++)
			if( b[k] > z) {
				maxp = k;
				z = b[k];
			}


		fprintf(fptrOUT,"%s:%s\t%s\t%c\t%s:%d,%c:%d,",chr,pos, ref, nref, ref, ref_count,  nref, nref_count);
		for(k = 0; k < states; k++)
			fprintf(fptrOUT,"%0.10Lf,",b[k]);
		fprintf(fptrOUT,"%d\n",maxp+1);
}
	}
	fclose(fptrIN);
	fclose(fptrOUT);
	free(line);

	free(b); free(notmu); free(log_pi);
}

int snvmixClassify_bamfile(param_struct *params) {
	snvmix_pl_t snvmix_pl;
	int states, i, k, bam_flag_mask;

	snvmix_pl.params = params;
	snvmix_pl.tid = -1;
	// TODO: change this to read range
	snvmix_pl.begin = 0;
	snvmix_pl.end = 0x7fffffff;
	snvmix_pl.in = samopen(params->inputfile, "rb", 0);  
	if (snvmix_pl.in == 0) {
		fprintf(stderr, "ERROR: Fail to open BAM file %s\n", params->bamfile);
		return 1;
	}
	snvmix_pl.fai = fai_load(params->fastafile);
	if (snvmix_pl.fai == 0) {
		fprintf(stderr, "Fail to open BAM file %s\n", params->fastafile);
		return 1;
	}
	if(params->listfile) snvmix_pl.hash = load_pos(params->listfile, snvmix_pl.in->header);
	snvmix_pl.depth_allocated = 0;
	snvmix_pl.ref = NULL;
	snvmix_pl.calls = NULL;
	//snvmix_pl.bQ = NULL;
	//snvmix_pl.mQ = NULL;


	/* This looks ugly... but we don't want to be allocating new memory for EVERY location. */
	states = params->states;
	if( ( snvmix_pl.notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate snvmix_pl.notmu\n"); exit(1); }
	if( ( snvmix_pl.log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate snvmix_pl.log_pi\n"); exit(1); }
	if( ( snvmix_pl.b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate snvmix_pl.b\n"); exit(1); }
	for(k = 0; k < states; k++) {
		snvmix_pl.notmu[k] = 1 - params->mu[k];
		snvmix_pl.log_pi[k] = logl(params->pi[k]);
	}
	initPhred(snvmix_pl.phred, PHRED_MAX);

	/* Init call buffer */
	snvmix_pl.n_buffer = params->window;
	if( (snvmix_pl.buffer = malloc(snvmix_pl.n_buffer * sizeof(snvmix_call_t)) ) == NULL) { perror("[snvmixClassify_bamfile] Could not allocate space for buffer\n"); exit(1); }
	snvmix_pl.b_start = 0;
	snvmix_pl.b_end = 0;
	for(i = 0; i < snvmix_pl.n_buffer; i++) {
		snvmix_pl.buffer[i].tid = 0;
		snvmix_pl.buffer[i].pos = 0;
		snvmix_pl.buffer[i].in_use = 0;
		if( (snvmix_pl.buffer[i].p = malloc(states * sizeof(long double)) ) == NULL) {perror("[snvmixClassify_bamfile] Could not allocate space for buffer.p\n"); exit(1);}
	}
	
	bam_flag_mask = (BAM_FUNMAP | BAM_FSECONDARY); // | BAM_FQCFAIL | BAM_FDUP);
	if(params->filter_chast)
		bam_flag_mask |= BAM_FQCFAIL;
	if(params->filter_dup)
		bam_flag_mask |= BAM_FDUP;

	/* Call pileup with our function and bam_flag_mask */
	sampileup(snvmix_pl.in, bam_flag_mask, snvmixClassify_pileup_func, &snvmix_pl);

	free(snvmix_pl.notmu);
	free(snvmix_pl.log_pi);
	free(snvmix_pl.b);
}

int flush_buffer(uint32_t tid, uint32_t pos, snvmix_pl_t *snvmix_pl) {
	snvmix_call_t *s;
	int i, k, cnt, buff_id;
	cnt = 0;
	#define _fptrOUT snvmix_pl->params->output
	//#define _fptrOUT stderr
//fprintf(_fptrOUT, "ENTERING flush_buffer, b_start = %d\tb_end = %d\n",snvmix_pl->b_start, snvmix_pl->b_end);
	for(i = 0, buff_id = snvmix_pl->b_start; i < snvmix_pl->n_buffer; i++, buff_id=(buff_id+1)%snvmix_pl->params->window) {
//fprintf(stderr, "ITERATING in flush_buffer, buff_id = %d\n", buff_id);
		s = snvmix_pl->buffer + buff_id;
		if(pos+1 >= (s->pos + snvmix_pl->params->window) || tid != s->tid) {
//fprintf(stderr, "PASSED if in flush_buffer, buff_id = %d\n", buff_id);
			if(s->in_use) {
				//fprintf(_fptrOUT,"%d\t",buff_id);
				fprintf(_fptrOUT,"%s:%d\t%c\t%c\t%c:%d,%c:%d", snvmix_pl->in->header->target_name[s->tid], s->pos, s->ref, s->nref, s->ref, s->ref_count, s->nref, s->nref_count);

				if(!snvmix_pl->params->filter) {
					fprintf(_fptrOUT,",");
					for(k = 0; k < snvmix_pl->params->states; k++)
						fprintf(_fptrOUT,"%0.10Lf,",s->p[k]);
					fprintf(_fptrOUT,"%d",s->maxP);
				}

				fprintf(_fptrOUT,"\t%d %d", s->forward[0], s->reverse[0]);
				fprintf(_fptrOUT,"\t%d %d", s->forward[1], s->reverse[1]);
				fprintf(_fptrOUT,"\t%d %d", s->nref_center, s->nref_edges);
				fprintf(_fptrOUT,"\t%d", s->indel_pos);
				fprintf(_fptrOUT,"\t%d %d", s->c_clean[0], s->c_clean[1]);
				fprintf(_fptrOUT,"\t%d %d", s->bad_pair[0],  s->bad_pair[1]);
				fprintf(_fptrOUT,"\t%d %d", s->c_ins[0], s->c_ins[1]);
				fprintf(_fptrOUT,"\t%d %d", s->c_del[0], s->c_del[1]);
				fprintf(_fptrOUT,"\t%d %d", s->c_junc[0], s->c_junc[1]);
				fprintf(_fptrOUT,"\t%d", s->aln_unique_pos);
				fprintf(_fptrOUT,"\t[%d,%d,%d,%d,%d]", s->raw_cvg[0],s->raw_cvg[1],s->raw_cvg[2],s->raw_cvg[3],s->raw_cvg[4]);
				fprintf(_fptrOUT,"\t[%d,%d,%d,%d,%d]", s->thr_cvg[0],s->thr_cvg[1],s->thr_cvg[2],s->thr_cvg[3],s->thr_cvg[4]);
				if(snvmix_pl->params->filter) {
					fprintf(_fptrOUT,"\t");
					for(k = 0; k < s->ref_count; k++)
						fprintf(_fptrOUT,",");
					for(k = 0; k < s->nref_count; k++)
						fprintf(_fptrOUT,"%c",s->nref);
				}
				fprintf(_fptrOUT,"\n");
				cnt++;
				s->in_use = 0;
				snvmix_pl->b_start = (buff_id+1)%snvmix_pl->params->window;
			}
		} else break;
		if(buff_id == snvmix_pl->b_end)
			break;
	}
//fprintf(_fptrOUT, "EXITING flush_buffer, b_start = %d\tb_end = %d\n",snvmix_pl->b_start, snvmix_pl->b_end);
	return cnt;
}


static int snvmixClassify_pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) {

#define _bQ(p, x) bam1_qual(p[x].b)[p[x].qpos]
#define _mQ(p, x) p[x].b->core.qual

	//FILE *fptrOUT;
	char nref_skip = 'N';
	//int *calls, ;
	int depth = 0;
	//long double *b, *notmu, *log_pi, *mu, *pi;
	long double Y, not_Y, z;
	//int states;
	int i, k, qual_num, maxp;
	//long double *phred;
	int b_end_tmp = 0;
	snvmix_call_t *s;
	snvmix_pl_t *snvmix_pl = (snvmix_pl_t *)data;
	param_struct *params = snvmix_pl->params;
	// Avoid allocating new variables, use defines to make things readable
#define	_calls snvmix_pl->calls
#define _b snvmix_pl->b
#define _notmu snvmix_pl->notmu
#define _log_pi snvmix_pl->log_pi
#define _mu params->mu
#define _pi params->pi
#define	_phred snvmix_pl->phred
#define _states params->states
	//fprintf(stderr,"DEBUG: snvmix_pl->buffer[snvmix_pl->b_start].in_use = %d\n", snvmix_pl->buffer[snvmix_pl->b_start].in_use);
	//if(pos+1 == 148971)
	//	printf("DEBUG: test 148971, start.in_use = %d\tb_start=%d\tb_end=%d\n",snvmix_pl->buffer[snvmix_pl->b_start].in_use, snvmix_pl->b_start,snvmix_pl->b_end);
	//if(pos == 148971)
	//	printf("DEBUG: test 148972, start.in_use = %d\tb_start=%d\tb_end=%d (pos+1 >= (snvmix_pl->buffer[snvmix_pl->b_start].pos + snvmix_pl->params->window) = %d >= %d\n", snvmix_pl->buffer[snvmix_pl->b_start].in_use, snvmix_pl->b_start,snvmix_pl->b_end, pos+1, snvmix_pl->buffer[snvmix_pl->b_start].pos + snvmix_pl->params->window);
	// If we have gone further than window indicates, or changed tid, then flush buffer
	if(snvmix_pl->buffer[snvmix_pl->b_start].in_use && (pos+1 >= (snvmix_pl->buffer[snvmix_pl->b_start].pos + snvmix_pl->params->window) || tid != snvmix_pl->buffer[snvmix_pl->b_start].tid ))
			flush_buffer(tid, pos, snvmix_pl);
	b_end_tmp = snvmix_pl->b_end;
	s = snvmix_pl->buffer + snvmix_pl->b_end;
	snvmix_pl->b_end = (snvmix_pl->b_end+1)%snvmix_pl->params->window;

	if(snvmix_pl->hash) {
		khint_t k_h = kh_get(64, snvmix_pl->hash, (uint64_t)tid<<32|pos);
		if (k_h == kh_end(snvmix_pl->hash)) return 0;
	}

	s->in_use = 1;


	if(params->full)
		nref_skip = -2;

	if (snvmix_pl->fai && (int)tid != snvmix_pl->tid) {
		free(snvmix_pl->ref);
		snvmix_pl->ref = fai_fetch(snvmix_pl->fai, snvmix_pl->in->header->target_name[tid], &snvmix_pl->len);
		snvmix_pl->tid = tid;
	}

	//chr = snvmix_pl->in->header->target_name[tid],
	s->tid = tid;
	s->ref = toupper((snvmix_pl->ref && (int)pos < snvmix_pl->len)? snvmix_pl->ref[pos] : 'N');
	s->pos = pos + 1;
	depth = n;

	if(depth > snvmix_pl->depth_allocated) {
		if( (snvmix_pl->calls = realloc(snvmix_pl->calls, sizeof (int) * (depth + START_QNUM))) == NULL) {
			perror("ERROR allocating space for snvmix_pl->calls"); exit(1);
		}
		//calls = snvmix_pl->calls;
		snvmix_pl->depth_allocated = depth + START_QNUM;
	}

	//for(i = 0; i < depth; i++) {
	// FIXME: watch out for deletions, do they get automatically an "n"? if so, ok
//fprintf(stderr, "DEBUG: len = %d\tref = %c\tcalls = ", snvmix_pl->len, ref);
	s->raw_cvg[0] = s->raw_cvg[1] = s->raw_cvg[2] = s->raw_cvg[3] = s->raw_cvg[4] = 0;
	s->thr_cvg[0] = s->thr_cvg[1] = s->thr_cvg[2] = s->thr_cvg[3] = s->thr_cvg[4] = 0;
	for(i = depth; i--; ) {
		if(pl[i].is_del) {
			_calls[i] = -1;
			continue;
		}
		_calls[i] = "nACnGnnnTnnnnnnn"[bam1_seqi(bam1_seq(pl[i].b), pl[i].qpos)];
//fprintf(stderr, "%c", _calls[i]);
		switch(_calls[i]) {
			case 'A': s->raw_cvg[0]++; break;
			case 'C': s->raw_cvg[1]++; break;
			case 'G': s->raw_cvg[2]++; break;
			case 'T': s->raw_cvg[3]++; break;
			case 'n': s->raw_cvg[4]++; break;
	    		default: break;
		}
		// Mask calls according to quality
		if( snvmixSkipCall_alt(params, _calls[i], (char) bam1_qual(pl[i].b)[pl[i].qpos], (char) pl[i].b->core.qual) )
                        _calls[i] = -1;
		else {
			switch(_calls[i]) {
				case 'A': s->thr_cvg[0]++; break;
				case 'C': s->thr_cvg[1]++; break;
				case 'G': s->thr_cvg[2]++; break;
				case 'T': s->thr_cvg[3]++; break;
				case 'n': s->thr_cvg[4]++; break;
	    			default: break;
	    		}
			if(_calls[i] == s->ref)
				_calls[i] = 1;
			else if(_calls[i] == 'n')
				 _calls[i] = -1;
		}
	}

	s->nref = snvmixFilterCalls(_calls,depth,NULL,NULL,params);
	s->nref_count = 0;
	s->ref_count = 0;

	// FIXME: review this section, extra flags
	for(i = 0; i < 2; i++) {
	s->forward[i] = 0;
	s->reverse[i] = 0;
	s->good_pair[i] = 0;
	s->bad_pair[i] = 0;
	s->c_clean[i] = 0;
	s->c_ins[i] = 0;
	s->c_del[i] = 0;
	s->c_junc[i] = 0;
	}
	s->nref_edges = 0;    // FIXME: fixed 5 bp edges
	s->nref_center = 0;    // FIXME: fixed 5 bp edges
	s->indel_pos = 0;     // How many reads have a deletion at that site
	s->indel_near = 0;    // How many reasd have a deletion overall
	s->aln_unique_pos = 0;
	uint8_t cigar_ops; // 0x1 INS, 0x2 DEL , 0x4 JUNCTION
	//for(qual_num = 0; qual_num < depth; qual_num++) {
	uint32_t debug_sort = 0x7fffffff;
	if(s->nref != nref_skip) {
		k = -1;
		for(qual_num = depth; qual_num--; ) {
			if(_calls[qual_num] == -1) {
				if(pl[qual_num].is_del)
					s->indel_pos++;
				continue;
			}
			cigar_ops = 0;
			for(i=0; i<pl[qual_num].b->core.n_cigar; i++) {
				int op = bam1_cigar(pl[qual_num].b)[i] & BAM_CIGAR_MASK;
				if(op == BAM_CINS)
					cigar_ops |= 0x1;
				if(op == BAM_CDEL)
					cigar_ops |= 0x2;
				if(op == BAM_CREF_SKIP)
					cigar_ops |= 0x4;
			}
			if(pl[qual_num].indel) {
				s->indel_pos++;
			}
			if(_calls[qual_num] == 1) {
				s->ref_count++;
				k = 0;
			} else if(_calls[qual_num] == s->nref) {
				s->nref_count++;
				k = 1;
				if(pl[qual_num].qpos > (pl[qual_num].b->core.l_qseq - 5) || pl[qual_num].qpos < 5)
					s->nref_edges++;
				else
					s->nref_center++;
				if(pl[qual_num].b->core.pos < debug_sort) { s->aln_unique_pos++; debug_sort = pl[qual_num].b->core.pos; }
				else if(pl[qual_num].b->core.pos > debug_sort) {fprintf(stderr, "ERROR checking sort in read-position, prev = %Ld, new = %Ld\n", debug_sort, pl[qual_num].b->core.pos); exit(1);}
			}

			if(k != -1) {
				if(cigar_ops) {
					if(cigar_ops & 0x1) s->c_ins[k]++;
					if(cigar_ops & 0x2) s->c_del[k]++;
					if(cigar_ops & 0x4) s->c_junc[k]++;
				} else s->c_clean[k]++;

				if(pl[qual_num].b->core.tid != pl[qual_num].b->core.mtid) s->bad_pair[k]++;

				if(pl[qual_num].b->core.flag & 0x10) s->reverse[k]++;
				else s->forward[k]++;
			}
		}
	}
	if(!params->filter) {
		if(s->nref == nref_skip) {
			snvmix_pl->b_end = b_end_tmp;
			s->in_use = 0;
			return 0;
		}
		int block = (_states / 3) * 3;
		//for(k = 0; k < _states; k++)
		//for(k = _states; k--; )
		//	_b[k] = 0;
		k = 0;
		while( k < block ) {
			_b[k] = 0;
			_b[k+1] = 0;
			_b[k+2] = 0;
			k += 3;
		}
		if( k < _states) {
			switch( _states - k ) {
				case 2: _b[k] = 0; k++;
				case 1: _b[k] = 0;
			}
		}
		//for(qual_num = 0; qual_num < depth; qual_num++) {
		for(qual_num = depth; qual_num--; ) {
			//if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) )
			if(_calls[qual_num] == -1)
				continue;
		// from matlab:
		// b = sum(log(notm*0.5 + m.*(yr.*mur+notyr.*notmur)),2);
			if(_calls[qual_num] == 1) {
				// If REF then
				Y = _phred[(unsigned char) _bQ(pl, qual_num)];
				not_Y = 1-_phred[(unsigned char) _bQ(pl, qual_num)];
			} else {
				// If NREF then
				Y = (1-_phred[(unsigned char) _bQ(pl, qual_num)])/3;
				not_Y = _phred[(unsigned char) _bQ(pl, qual_num)] + 2*(1-_phred[(unsigned char)_bQ(pl, qual_num)])/3;
			}
			//for(k = 0; k < _states; k++)
			//for(k = _states; k--; )
			//	_b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k]));
			k = 0;
			while( k < block ) {
				_b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k]));
				_b[k+1] = _b[k+1] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k+1] + not_Y * _notmu[k+1]));
				_b[k+2] = _b[k+2] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k+2] + not_Y * _notmu[k+2]));
				k += 3;
			}
			if( k < _states) {
				switch( _states - k ) {
					case 2: _b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k])); k++;
					case 1: _b[k] = _b[k] + logl((1-_phred[(unsigned char) _mQ(pl, qual_num)])*0.5 + _phred[(unsigned char) _mQ(pl, qual_num)] * (Y * _mu[k] + not_Y * _notmu[k]));
				}
			}
		}
		z = 0;
		//for(k = 0; k < _states; k++) {
		//for(k = _states; k--; ) {
		//	_b[k] = expl(_b[k] + _log_pi[k]);
		//	z += _b[k];
		//}
		k = 0;
		while( k < block ) {
			_b[k] = expl(_b[k] + _log_pi[k]); z += _b[k];
			_b[k+1] = expl(_b[k+1] + _log_pi[k+1]); z += _b[k+1];
			_b[k+2] = expl(_b[k+2] + _log_pi[k+2]); z += _b[k+2];
			k += 3;
		}
		if( k < _states ) {
			switch( _states - k ) {
				case 2: _b[k] = expl(_b[k] + _log_pi[k]); z += _b[k]; k++;
				case 1: _b[k] = expl(_b[k] + _log_pi[k]); z += _b[k];
			}
		}
		if(!z) z = 1;
		//for(k = 0; k < _states; k++)
		//for(k = _states; k--; )
		//	_b[k] = _b[k] / z;
		k = 0;
		while( k < block ) {
			_b[k] = _b[k] / z;
			_b[k+1] = _b[k+1] / z;
			_b[k+2] = _b[k+2] / z;
			k += 3;
		}
		if( k < _states ) {
			switch( _states - k ) {
				case 2: _b[k] = _b[k] / z; k++;
				case 1: _b[k] = _b[k] / z;
			}
		}		
		maxp = _states - 1;
		z = _b[maxp];
		//for(k = 0; k < _states; k++)
		for(k = _states; k--; )
			if( _b[k] > z) {
				maxp = k;
				z = _b[k];
			}


		for(k = 0; k < _states; k++)
			s->p[k] = _b[k];
		s->maxP = maxp+1;
	}
}

/*
	void snvmixGetCallString
	Arguments:
		*col : pointer to the current file-line in memory
		*calls : pointer to array where we will store the final base-calls
		depth : number of base reads for this position
		*nref : the observed NREF value will be placed here (-1 if none was found)

	Function:
		This will parse column 5 of the pileup file, which contains the
		base calls and will fill the array "calls" with:
			1 : when a reference call was made
		       -1 : when an invalid value is seen ('N', 'n', '*') 
		   [ACTG] : when a non-reference call was made

		

*/
inline void snvmixGetCallString(char *col, int *calls, int depth, char *nref) {
	int i;
	int call_i = 0, call_skip_num = 0;
	char call_skip_char[10];
	for(i = 0 ; call_i < depth; i++) {
		switch(col[i]){
		case ',':
		case '.':
			calls[call_i] = 1;
			call_i++;
			break;
		case 'a': case 'A':
		case 't': case 'T':
		case 'g': case 'G':
		case 'c': case 'C':
			//calls[call_i] = 0;
			calls[call_i] = toupper(col[i]);
			call_i++;
			//if(*nref == 0)
				//*nref = toupper(*(col+sizeof(char)*i));
			if(*nref == 0)
				*nref = toupper(col[i]);
				break;
		case 'N':
		case 'n':
		case '*':
			// Not comparable values, we ignore them, but need to
			// keep count to compare against the number of mapping qualities
			calls[call_i] = -1;
			call_i++;
			break;
		case '$':
			// End of a read, just ignore it
			break;
		case '^':
			// Start of read, ignore it and skip the next value (not base-related)
			i++;
			break;
		case '+':
		case '-':
			// Eliminate:
			//		+2AA
			//		-3AAA
			// Start skiping the sign
			i++;
			for(call_skip_num = 0; col[i] <= '9' && col[i] >= '0'; call_skip_num++, i++) {
				//call_skip_char[call_skip_num] = call;
				call_skip_char[call_skip_num] = col[i];
				//i++;
			}
			// we have the number string in call_skip_char, lets terminate it
			call_skip_char[call_skip_num] = '\0';
			// i has been updated to first letter, just need to jump the rest of them
			i = i+atoi(call_skip_char)-1;
			break;
		default:
			fprintf(stderr,"ERROR: problem reading pileup file calls (%c)\n",col[i]);
			exit(1);
			break;
		}
	}
	// If no nref was found, don't bother parsing the other columns, make the for fail with -1
	if(!*nref)
		*nref = -1;
}

/*
	int snvmixFilterCalls
	Arguments:
		*calls : array built by snvmixGetCallString containing
			1 : when a reference call was made
		       -1 : when an invalid value is seen ('N', 'n', '*') 
		   [ACTG] : when a non-reference call was made
		depth : number of calls for the current position
		*bQ : base qualities observed for each call
		*mQ : map quality for each call
		*params : parameter structure, get thresholding data
	Function:
		For each valid call read in the position this function will apply
		thresholding according to the type selected (-t flag) and the bQ (-q)
		and mQ (-Q) thresholds provided.

		Any base-call that does not pass thresholds will be switched from it's
		current value in *calls to -1;

		The most prevalent NREF after filtering will be determined and returned.
	
*/
inline int snvmixFilterCalls(int *calls, int depth, char *bQ, char *mQ, param_struct *params) {
	int qual_num, nref_counts[5];
	nref_counts[0] = 0;
	nref_counts[1] = 0;
	nref_counts[2] = 0;
	nref_counts[3] = 0;
	nref_counts[4] = 0;
	int max_nref = N;

	char nref = 0;

	//for(qual_num = 0; qual_num < depth; qual_num++) {
	for(qual_num = depth; qual_num--; ) {
		//if( snvmixSkipCall(calls,qual_num,params,bQ,mQ) ) {
		if( bQ != NULL && mQ != NULL && snvmixSkipCall(calls,qual_num,params,bQ,mQ) ) {
			calls[qual_num] = -1;
		} else {
			nref_counts[0]++;
			switch(calls[qual_num]) {
				case 'A':
					nref_counts[A]++;
					break;
				case 'C':
					nref_counts[C]++;
					break;
				case 'G':
					nref_counts[G]++;
					break;
				case 'T':
					nref_counts[T]++;
					break;
				case 1:
				case -1:
					nref_counts[0]--;
					break;
				default:
					fprintf(stderr,"ERROR: unknown call base\n");
					exit(1);
			}
		}
	}
	if(nref_counts[0]) {
		max_nref = A;
		if(nref_counts[max_nref] < nref_counts[C])
			max_nref = C;
		if(nref_counts[max_nref] < nref_counts[G])
			max_nref = G;
		if(nref_counts[max_nref] < nref_counts[T])
			max_nref = T;
	} else {
		//return -1;
	}
	//for(qual_num = 0; qual_num < depth; qual_num++) {
	for(qual_num = depth; qual_num--; ) {
		if(calls[qual_num] == 1)
			continue;
		if(calls[qual_num] != base_code[max_nref])
			calls[qual_num] = -1;
	}
	return base_code[max_nref];
}

/*
	int snvmixSkipCall
	Arguments:
		*calls : array built by snvmixGetCallString containing
			1 : when a reference call was made
		       -1 : when an invalid value is seen ('N', 'n', '*') 
		   [ACTG] : when a non-reference call was made
		qual_num : call number that is being evaluated
		*params : parameter structure, get thresholding data
		*bQ : base qualities observed for each call
		*mQ : map quality for each call
	Function:
		Evalates quality values in each of the filtering models

		Returns 1 if the calls[qual_num] needs to be filtered out
		Returns 0 otherwise
*/
inline int snvmixSkipCall(int *calls, int qual_num, param_struct *params, char *bQ, char *mQ) {
	if(calls[qual_num] == -1)
		return(1);
	if(params->filter_type == TYPE_mb) {
		if(bQ[qual_num] <= params->bQ || mQ[qual_num] <= params->mQ)
			return(1);
	} else if(params->filter_type == TYPE_b) {
		if(bQ[qual_num] <= params->bQ)
			return(1);
	} else {
		if(mQ[qual_num] <= params->mQ)
			return(1);
		if(params->filter_type == TYPE_m) {
			// Use mapping as surrogate
			bQ[qual_num] = mQ[qual_num];
			// Make mapping one
			mQ[qual_num] = (char) PHRED_MAX;
		} else if(params->filter_type == TYPE_M) {
			// Mapping passed, make it one
			mQ[qual_num] = (char) PHRED_MAX;
		} else if(params->filter_type == TYPE_Mb) {
			// Nothing special here
		} else if(params->filter_type == TYPE_MB || params->filter_type == TYPE_SNVMix1) {
			if(bQ[qual_num] <= params->bQ)
				return(1);
			if(params->filter_type == TYPE_SNVMix1) {
				bQ[qual_num] = (char) PHRED_MAX;
				mQ[qual_num] = (char) PHRED_MAX;
		}	}
	}
	return(0);
}

inline int snvmixSkipCall_alt(param_struct *params, int call, char bQ, char mQ) {
	if(call == -1)
		return(1);
	if(params->filter_type != TYPE_b && mQ <= params->mQ)
		return(1);
	switch(params->filter_type) {
	case TYPE_mb:
		if(bQ <= params->bQ || mQ <= params->mQ)
			return(1);
		break;
	case TYPE_b:
		if(bQ <= params->bQ)
			return(1);
		break;
	case TYPE_m:
		// Use mapping as surrogate
		bQ = mQ;
		// Make mapping one
		mQ = (char) PHRED_MAX;
		break;
	case TYPE_M:
		// Mapping passed, make it one
		mQ = (char) PHRED_MAX;
		break;
	case TYPE_Mb:
		// Nothing special here
		break;
	case TYPE_MB:
	case TYPE_SNVMix1:
		if(bQ <= params->bQ)
			return(1);
		if(params->filter_type == TYPE_SNVMix1) {
			bQ = (char) PHRED_MAX;
			mQ = (char) PHRED_MAX;
		}
		break;
	}
	return(0);
}


void resetParams(param_struct *params) {
	params->input = stdin;
	params->output = stdout;
	params->inputfile = NULL;
	params->outputfile = NULL;
	params->modelfile = NULL;
	params->fastafile = NULL;
	params->listfile = NULL;
	params->window = 1;
	params->filter_type = 0;
	params->filter_dup = 1;
	params->filter_chast = 1;
	params->train = 0;
	params->classify = 0;
	params->filter = 0;
	params->full = 0;
	params->input_type = S_PILEUP; // 0 = processed, 1 = maq pileup, 2 = sam pileup, 3 = bam file
	params->mu = NULL;
	params->pi = NULL;
	params->max_iter = 1000;
	params->bQ = 19;
	params->mQ = 19;
	params->debug = 0;
	params->states = 3;
	params->trainP.alpha = NULL;
	params->trainP.beta = NULL;
	params->trainP.delta = NULL;
	params->trainP.param_file = NULL;
	params->trainP.max_iter = 100;
	params->trainP.bQ = NULL;
	params->trainP.mQ = NULL;
	params->trainP.calls = NULL;
	params->window = 1;
}

void initSNVMix(int argc, char **argv, param_struct *params) {
	char c;
	int i;
	resetParams(params);
	while ((c = getopt (argc, argv, "hDTCFfi:o:m:t:p:q:Q:a:b:d:M:S:r:l:w:cR")) != -1) {
		switch (c)
		{
			case 'm': params->modelfile = optarg; break;
			case 'i': params->inputfile = optarg; break;
			case 'o': params->outputfile = optarg; break;
			// Bam file opts
			case 'r': params->fastafile = optarg; break;
			case 'l': params->listfile = optarg; break;
			case 'T': params->train = 1; break;
			case 'C': params->classify = 1; break;
			case 'F': params->filter = 1; break;
			case 'f': params->full = 1; break;
			case 'p':
				if(*optarg == 'q') {
					params->input_type = Q_PILEUP;
				} else if(*optarg == 'm') {
					params->input_type = M_PILEUP;
				} else if(*optarg == 's') {
					params->input_type = S_PILEUP;
				} else if(*optarg == 'b') {
					params->input_type = BAM_FILE;
				} else {
					fprintf(stderr,"ERROR: unknown pileup format %c\n",*optarg);
					exit(1);
				}
				break;
			case 't':
				if(strcmp("mb",optarg) == 0)
					params->filter_type = TYPE_mb;
				else if(strcmp("m",optarg) == 0)
					params->filter_type = TYPE_m;
				else if(strcmp("b",optarg) == 0)
					params->filter_type = TYPE_b;
				else if(strcmp("M",optarg) == 0)
					params->filter_type = TYPE_M;
				else if(strcmp("Mb",optarg) == 0)
					params->filter_type = TYPE_Mb;
				else if(strcmp("MB",optarg) == 0)
					params->filter_type = TYPE_MB;
				else if(strcmp("SNVMix1",optarg) == 0)
					params->filter_type = TYPE_SNVMix1;
				else {
					fprintf(stderr,"ERROR: filter type '%s' not recognized\n",optarg);
					exit(1);
				}
				break;
			case 'q':
				params->bQ = atoi(optarg);
				if( params->bQ < 0 || params->bQ >= PHRED_MAX )  {
					fprintf(stderr,"ERROR: quality threshold value Q%d out of range\n",params->bQ);
					exit(1);
				}
				break;
			case 'Q':
				params->mQ = atoi(optarg);
				if( params->mQ < 0 || params->mQ >= PHRED_MAX )  {
					fprintf(stderr,"ERROR: mapping threshold value Q%d out of range\n",params->mQ);
					exit(1);
				}
				break;
			case 'h':
				usage(argv[0]);
				break;
			case 'D': params->debug = 1; break;
			case 'M': params->trainP.param_file = optarg; break;
			case 'S':
				params->states = atoi(optarg);
				if( params->states < 3 )  {
					fprintf(stderr,"ERROR: state minimum is 3\n");
					exit(1);
				}
				break;
			case 'w':
				params->window = atoi(optarg);
				if(params->window < 1)
					params->window = 1;
				break;
			case 'c':
				params->filter_chast = 0;
				break;
			case 'R':
				params->filter_dup = 0;
				break;
			default:
				fprintf(stderr,"Unknown option\n");
				usage(argv[0]);
				break;
			}
	}
	/* Decide if we're going to train or classify */
	if(params->filter) {
		params->train = 0;
		params->classify = 0;
	}
	if(params->train && params->classify) {
		fprintf(stderr,"ERROR: choose either train or classify\n");
		exit(1);
	} else if(!params->train && !params->classify && !params->filter) {
		fprintf(stderr,"Train/Classify/Filter not selected, picking default: Classify\n");
		params->classify = 1;
	}

	/* Set parameters */
	allocateParameters(params);
	if( params->train ) setTrainingParameters(params);
	if( params->classify ) setClassificationParameters(params);

	/* Open input and output streams */
	if( params->input_type == BAM_FILE ) {
		if( params->train ) {
			fprintf(stderr, "BAM input for training not yet implemented\n");
			exit(1);
		}
		if( params->inputfile == NULL || strcmp(params->inputfile, "-") == 0) {
			fprintf(stderr, "ERROR: if '-p b' is chosen, input file has to be a bam file\n");
			exit(1);
		}
		if( params->fastafile == NULL ) {
			fprintf(stderr, "ERROR: if '-p b' is chosen, reference fasta file is required (-r <ref.fa>)\n");
			exit(1);
		}
	} else if( params->inputfile != NULL) {
			params->input = fopen(params->inputfile, "r");
			if(params->input == NULL) {
				perror("ERROR: could not open input file");
				exit(1);
			}
	} else {
		params->input = stdin;
	}
	if( params->outputfile != NULL ) {
		params->output = fopen(params->outputfile, "w");
		if(params->output == NULL) {
			perror("ERROR: could not open output file");
			exit(1);
		}
	} else {
		params->output = stdout;
	}
}

void allocateParameters(param_struct *params) {

	/* Allocate alpha */
	if( (params->trainP.alpha = malloc(params->states * sizeof(long double))) == NULL) {
		perror("ERROR: could not allocate space for alpha\n"); exit(1);
	}
	/* Allocate beta */
	if( (params->trainP.beta = malloc(params->states * sizeof(long double))) == NULL) {
		perror("ERROR: could not allocate space for beta\n"); exit(1);
	}
	/* Allocate delta*/
	if( (params->trainP.delta = malloc(params->states * sizeof(long double))) == NULL) {
		perror("ERROR: could not allocate space for delta\n"); exit(1);
	}
	/* Allocate mu*/
	if( (params->mu = malloc(params->states * sizeof(long double))) == NULL) {
		perror("ERROR: could not allocate space for mu\n"); exit(1);
	}
	/* Allocate pi*/
	if( (params->pi = malloc(params->states * sizeof(long double))) == NULL) {
		perror("ERROR: could not allocate space for pi\n"); exit(1);
	}
}

void setTrainingParameters(param_struct *params) {
	if( params->trainP.param_file != NULL ) {
		readParamFile(params,'t');
	} else {
		if(params->states != 3) {
			fprintf(stderr, "ERROR: if state space larger than 3 requested, parameters must be provided\n");
			exit(1);
		}
		fprintf(stderr,"Training parameter file not given, using defaults\n");
		params->trainP.alpha[0] = 1000;
		params->trainP.alpha[1] = 5000;
		params->trainP.alpha[2] = 1;
		params->trainP.beta[0] = 1;
		params->trainP.beta[1] = 5000;
		params->trainP.beta[2] = 1000;
		params->trainP.delta[0] = 10;
		params->trainP.delta[1] = 1;
		params->trainP.delta[2] = 1;
	}
}

void setClassificationParameters(param_struct *params) {
	int k;
	if( params->modelfile != NULL ) {
		readParamFile(params,'c');
	} else {
		if(params->states != 3) {
				fprintf(stderr, "ERROR: if state space larger than 3 requested, model file must be provided\n");
				exit(1);
		}
		params->mu[0] = 0.995905287891696078261816182930;
		params->mu[1] = 0.499569401000925172873223800707;
		params->mu[2] = 0.001002216846753116409260431219;
		params->pi[0] = 0.672519580755555956841362785781;
		params->pi[1] = 0.139342327124010650907237618412;
		params->pi[2] = 0.188138092120433392251399595807;
		fprintf(stderr,"Classification parameter file not given, using for mQ35 bQ10\n");
	}
	fprintf(stderr,"Mu and pi values:\n");
	for(k = 0; k < params->states; k++)
		fprintf(stderr,"\tmu[%d] = %Lf\tpi[%d] = %Lf\n", k, params->mu[k], k, params->pi[k]);
	fprintf(stderr,"\n");
}

void readParamFile(param_struct *params, char type) {
	FILE *fptrPARAM;
	char *line = NULL, *param_name, *param, *param_ptr, *filename;
	size_t line_len = 0, read_len = 0;
	long double *target;
	int i;

	if(type == 't') {
		filename=params->trainP.param_file;
	} else if(type == 'c') {
		filename=params->modelfile;
	}
	fptrPARAM=fopen(filename, "r");
	if(fptrPARAM == NULL) {
		fprintf(stderr, "ERROR: could not open parameter file %s\n", filename);
		exit(1);
	}

#if defined __linux__ || defined _GNU_SOURCE
	while( (read_len = getline(&line,&line_len,fptrPARAM)) != -1 )
#endif
#ifdef __APPLE__
	while( (line = fgetln(fptrPARAM,&line_len)) != NULL )
#endif
	{
		line[line_len-1] = '\0';
		target = NULL;
		param_name = strtok_r(line, " ", &param_ptr);
		if(type == 't') {
			     if(strcmp("alpha", param_name) == 0) target = params->trainP.alpha;
			else if(strcmp("beta", param_name) == 0)  target = params->trainP.beta;
			else if(strcmp("delta", param_name) == 0) target = params->trainP.delta;
		} else if(type == 'c') {
			     if(strcmp("mu", param_name) == 0) target = params->mu;
			else if(strcmp("pi", param_name) == 0) target = params->pi;

		}
		if(target == NULL) { fprintf(stderr, "Unknown parameter %s, skipping\n", param_name); continue; }

		for(i = 0; i < params->states; i++) {
			param = strtok_r(NULL, " ", &param_ptr);
			if(param == NULL) {
				fprintf(stderr,"ERROR: missing value #%d for %s\n", i, param_name);
				exit(1);
			}
			target[i] = atof(param);
fprintf(stderr, "DEBUG: %s[%d] = %Lf\n", param_name, i, target[i]);
		}
	}
	fclose(fptrPARAM);
	free(line);
}

void initPhred(long double *phredTable, int elem) {
	int i;
	for(i = 0; i < elem; i++) {
		phredTable[i] = 1-powl(10,(-(long double)i/10));
	}
	phredTable[i] = 1;
}


void usage(char *selfname) {
	param_struct default_params;
	resetParams(&default_params);
	fprintf(stderr,"Syntax:\n\t%s\t-m <modelfile> [-i <infile>] [-o <outfile>] [-T | -C | -F] [-p < q | m | s >] [-t < mb | m | b | M | Mb | MB | SNVMix1>] [-q <#>] [-Q <#>] [-M <trainP file>] [-h]\n",selfname);
	fprintf(stderr,"\tRequired:\n");
	fprintf(stderr,"\t-m <modelfile>\tmodel file, a line for mu and a line for pi, three\n");
	fprintf(stderr,"\t\t\tspace-separated values each, like:\n");
	fprintf(stderr,"\t\t\tmu 0.xxxxxxxx 0.xxxxxxxx 0.xxxxxxxx\n");
	fprintf(stderr,"\t\t\tpi 0.xxxxxxxx 0.xxxxxxxx 0.xxxxxxxx\n");
	fprintf(stderr,"\tOptional:\n");
	fprintf(stderr,"\t-i <infile>\tquality pileup, from pileup2matlab.pl script (def: STDIN)\n");
	fprintf(stderr,"\t-o <outfile>\twhere to put the results (def: STDOUT)\n");
	fprintf(stderr,"\t-T | -C | -F\tTrain, Classify or Filter (def: Classify)\n");
	fprintf(stderr,"\t-p q|m|s\tInput pileup format (def: s)\n\t\t\tq = quality\n\t\t\tm = MAQ\n\t\t\ts = SAMtools\n");
	fprintf(stderr,"\t-t mb|m|b|M|Mb|MB|SNVMix1\n\t\t\tFilter type (def: mb)\n");
	fprintf(stderr,"\t\t\tmb\tLowest between map and base quality\n");
	fprintf(stderr,"\t\t\tm\tFilter on map, and use as surrogate for base quality\n");
	fprintf(stderr,"\t\t\tb\tFilter on base quality, take map quality as 1\n");
	fprintf(stderr,"\t\t\tM\tFilter on map quality but use only base quality\n");
	fprintf(stderr,"\t\t\tMb\tFilter on map quality and use both map and base qualities\n");
	fprintf(stderr,"\t\t\tMB\tFilter on map quality AND base quality\n");
	fprintf(stderr,"\t\t\tSNVMix1\tFilter on base quality and map quality, afterwards consider them perfect\n");
	fprintf(stderr,"\t-q #\t\tCutoff Phred value for Base Quality, anything <= this value is ignored (def: Q%d)\n",default_params.bQ);
	fprintf(stderr,"\t-Q #\t\tCutoff Phred value for Map Quality, anything <= this value is ignored (def: Q%d)\n",default_params.mQ);
	fprintf(stderr,"\n\tTraining parameters:\n");
	fprintf(stderr,"\t-M <file>\tProvide a file containing training parameters\n");
	fprintf(stderr,"\t\t\tValues are space-separated:\n");
	fprintf(stderr,"\t\t\talpha # # #\n");
	fprintf(stderr,"\t\t\tbeta # # #\n");
	fprintf(stderr,"\t\t\tdelta # # #\n");
	fprintf(stderr,"\n\t-h\t\tthis message\n");
	fprintf(stderr,"\nImplementation: Rodrigo Goya, 2009. Version %s\n",VERSION);
	exit(0);
}

// Based on classify pileup, but reads the entire file to memory for training purposes.
void snvmixGetTrainSet_pileup(param_struct *params) {
//MAQ:
//	1       554484  C       1752    @,,.,.,, @xxx @xxx xxxxx
//SAM:
//	1       554484  C       1752    ,,.,.,,	xxx xxxx

	FILE *fptrIN;

	char *line = NULL;
	size_t line_len = 0, prev_len = 0;
	int read_len = 0;
	int col_num = 0;
	long int line_num = 0;

	char *col, *qual;
	char *col_sep = "\t", *col_str, *col_ptr;
	char *qual_sep = ",", *qual_str, *qual_ptr;
	char *chr, *pos, *ref, nref, call;	

	char *bQ, *mQ;
	int *calls, *tmpQ, pos_int;
	int depth = 0, qual_num,  maxp;
	int depth_allocated = 0;

	int trainset = 0;
	int trainset_allocated = 0;

	int nref_count = 0, ref_count = 0;
	int i, call_i;

	fptrIN = params->input;

	if( (params->trainP.bQ = malloc(START_QNUM * sizeof (unsigned char **))) == NULL) {
		perror("ERROR allocating initial space for train.bQ"); exit(1);
	}
	if( (params->trainP.mQ = malloc(START_QNUM * sizeof (unsigned char **))) == NULL) {
		perror("ERROR allocating initial space for train.mQ"); exit(1);
	}
	if( (params->trainP.calls = malloc(START_QNUM * sizeof (signed char **))) == NULL) {
		perror("ERROR allocating initial space for train.calls"); exit(1);
	}
	if( (params->trainP.depth = malloc(START_QNUM * sizeof (int *))) == NULL) {
		perror("ERROR allocating initial space for train.depth"); exit(1);
	}
	trainset_allocated = START_QNUM;

	if( (calls = malloc(START_QNUM * sizeof (int))) == NULL) {
		perror("ERROR allocating initial space for train.depth"); exit(1);
	}
	depth_allocated = START_QNUM;
#if defined __linux__ || defined _GNU_SOURCE
	while( (read_len = getline(&line,&line_len,fptrIN)) != -1 )
#endif
#ifdef __APPLE__
	while( (line = fgetln(fptrIN,&line_len)) != NULL )
#endif
	{
		line[line_len-1] = '\0';
		depth = 0;
		nref = 0;
		col_ptr = NULL;
		for(col_num = 0, col_str = line;  ; col_num++, col_str = NULL) {
			col = strtok_r(col_str, "\t", &col_ptr);
			if(col == NULL) {
				break;
			}
			if(col_num == 0) {
				chr = col;
			} else if(col_num == 1) {
				pos = col;
				pos_int = strtol(pos, NULL, 10);
			} else if(col_num == 2) {
				ref = col;
			} else if(col_num == 3) {
				depth = atoi(col);
				if(depth > depth_allocated) {
					if( (calls = realloc(calls, sizeof (int) * (depth + START_QNUM))) == NULL) {
						perror("ERROR allocating space for calls"); exit(1);
					}
					depth_allocated = depth + START_QNUM;
				}
			} else if(col_num == 4) {
				if(params->input_type == M_PILEUP)
					col = col+sizeof(char);
					snvmixGetCallString(col, calls, depth, &nref);
			} else if(col_num == 5) {
				bQ = col;
				// If it's a MAQ pileup, we need to skip the @ sign
				if(params->input_type == M_PILEUP)
					bQ = bQ + sizeof(char);
				for(call_i = 0; call_i < depth; call_i++)
					bQ[call_i] = bQ[call_i]-33;
			} else if(col_num == 6) {
				mQ = col;
				// If it's a MAQ pileup, we need to skip the @ sign
				if(params->input_type == M_PILEUP)
					mQ = mQ + sizeof(char);
				for(call_i = 0; call_i < depth; call_i++)
					mQ[call_i] = mQ[call_i]-33;
			}
		}
		if(line_num >= trainset_allocated) {
			if( ( params->trainP.bQ = realloc(params->trainP.bQ, (line_num + START_QNUM) * sizeof (unsigned char *)) ) == NULL ) {
				perror("ERROR reallocating space for trainP.bQ"); exit(1);
			}
			if( ( params->trainP.mQ = realloc(params->trainP.mQ, (line_num + START_QNUM) * sizeof (unsigned char *)) ) == NULL ) {
				perror("ERROR reallocating space for trainP.mQ"); exit(1);
			}
			if( ( params->trainP.calls = realloc(params->trainP.calls, (line_num + START_QNUM) * sizeof (signed char *)) ) == NULL) {
				perror("ERROR reallocating space for trainP.calls"); exit(1);
			}
			if( ( params->trainP.depth = realloc(params->trainP.depth, (line_num + START_QNUM) * sizeof (int)) ) == NULL) {
				perror("ERROR reallocating space for trainP.depth"); exit(1);
			}
			trainset_allocated = line_num + START_QNUM;
		}

		nref = snvmixFilterCalls(calls,depth,bQ,mQ,params);
		nref_count = 0;
		ref_count = 0;
		for(qual_num = 0; qual_num < depth; qual_num++) {
			if(calls[qual_num] == -1)
				continue;
			if(calls[qual_num] == 1)
				ref_count++;
			if(calls[qual_num] == nref)
				nref_count++;
		}
		params->trainP.depth[line_num] = ref_count + nref_count;

		if( (params->trainP.bQ[line_num] = malloc(sizeof(unsigned char) * params->trainP.depth[line_num])) == NULL ) {
			perror("ERROR allocating space for trainP.bQ"); exit(1);
		}
		if( (params->trainP.mQ[line_num] = malloc(sizeof(unsigned char) * params->trainP.depth[line_num])) == NULL ) {
			perror("ERROR allocating space for trainP.mQ"); exit(1);
		}
		if( (params->trainP.calls[line_num] = malloc(sizeof(signed char) * params->trainP.depth[line_num])) == NULL ) {
			perror("ERROR allocating space for trainP.calls"); exit(1);
		}

		call_i = 0;
		for(qual_num = 0; qual_num < depth; qual_num++) {
			if(calls[qual_num] == -1)
				continue;
	
			params->trainP.calls[line_num][call_i] = (signed char) calls[qual_num];
			params->trainP.bQ[line_num][call_i] = (unsigned char) bQ[qual_num];
			params->trainP.mQ[line_num][call_i] = (unsigned char) mQ[qual_num];
			call_i++;
		}
		if( params->trainP.depth[line_num] != call_i ) {
			fprintf(stderr, "ERROR: mismatch between trainP.depth and call_i\n"); exit(1);
		}
		line_num++;
	}
	params->trainP.len = line_num;
	params->trainP.len = line_num;
	fclose(fptrIN);
	free(line); free(calls);
}

// Use EM algorithm on a memory-located data set to train the parameters
void snvmixTrain_pileup(param_struct *params) {
	int line_num = 0, call_i = 0, k = 0, states;
	int iter = 0;
	FILE *fptrOUT;

	long double phred[PHRED_MAX + 1];
	long double bsum = 0, msum = 0, *b, *mu, *notmu, *pi, *log_pi, z = 0;
	long double **pG, **xr;
	long double *xrhat, *sum_pG;
	long double Y, not_Y, M;
	int N_sum;
	int strength;

	long double ll, prev_ll = 0;
	//initPhred(phred, PHRED_MAX+1);
	initPhred(phred, PHRED_MAX);

	states = params->states;

	// Initialize local values of parameters
	mu = params->mu;
	pi = params->pi;

	if( ( b = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate b\n"); exit(1); }
	if( ( notmu = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate notmu\n"); exit(1); }
	if( ( log_pi = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate log_pi\n"); exit(1); }
	if( ( xrhat = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate xrhat\n"); exit(1); }
	if( ( sum_pG = malloc(states * sizeof(long double)) ) == NULL) { perror("[snvmixClassify_pileup] Could not allocate sum_pG\n"); exit(1); }

	snvmixGetTrainSet_pileup(params);

	if(params->debug) {
	for(line_num = 0; line_num < params->trainP.len; line_num++) {
		fprintf(stderr, "line_num: %d", line_num);
		for(call_i = 0; call_i < params->trainP.depth[line_num]; call_i++) {
			fprintf(stderr, "\t(%d,bQ%d,mQ%d)", params->trainP.calls[line_num][call_i], params->trainP.bQ[line_num][call_i], params->trainP.mQ[line_num][call_i]);
		}
		fprintf(stderr, "\n\n");
	}
	}
	// Initialize mu and pi

	fprintf(stderr, "Initializing mu\n");
	for(k = 0; k < states; k++) {
		params->mu[k] = (long double) params->trainP.alpha[k] / (params->trainP.alpha[k] + params->trainP.beta[k]);
		fprintf(stderr,"\talpha[%d] = %Lf,\tbeta[%d] = %Lf,\tmu[%d] = %Lf\n", k, params->trainP.alpha[k], k, params->trainP.beta[k], k, params->mu[k]);
	}

	fprintf(stderr, "Initializing pi\n");
	z = (long double) params->trainP.delta[0] + params->trainP.delta[1] + params->trainP.delta[2];
	if(!z) { z = 1; }
	for(k = 0; k < states; k++) {
		params->pi[k] = (long double) params->trainP.delta[k]  / z;
		fprintf(stderr,"\tdelta[%d] = %Lf,\tpi[%d] = %Lf\n", k, params->trainP.delta[k], k, params->pi[k]);
	}

	strength = params->trainP.len;

	// Initialize matrices
	if( (pG = malloc(sizeof(long double *) * params->trainP.len)) == NULL) {
		perror("ERROR allocating initial space for pG"); exit(1);
	}
	if( (xr = malloc(sizeof(long double *) * params->trainP.len)) == NULL) {
		perror("ERROR allocating initial space for xr"); exit(1);
	}
	for(line_num = 0; line_num < params->trainP.len; line_num++) {
		// [states] cells for each line_num
		if( (pG[line_num] = malloc(sizeof(long double) * states)) == NULL) {
			perror("ERROR allocating space for pG"); exit(1);
		}
		if( (xr[line_num] = malloc(sizeof(long double) * states)) == NULL) {
			perror("ERROR allocating space for xr"); exit(1);
		}
	}

	for(iter = 0; iter < params->trainP.max_iter; iter++) {
		// Reset values for this iteration
		for(k = 0; k < states; k++) {
			notmu[k] = 1 - mu[k];
			log_pi[k] = logl(pi[k]);

			sum_pG[k] = 0;
			xrhat[k] = 0;

		}
		N_sum = 0;
		ll = 0;

		// E step
		for(line_num = 0; line_num < params->trainP.len; line_num++) {
			if(params->trainP.depth == 0)
				continue;
			for(k = 0; k < states; k++) {
				xr[line_num][k] = 0;
				b[k] = 0;
			}

			for(call_i = 0; call_i < params->trainP.depth[line_num]; call_i++) {
				if(params->trainP.calls[line_num][call_i] == 1) {
					Y = phred[params->trainP.bQ[line_num][call_i]];
					not_Y = 1-phred[params->trainP.bQ[line_num][call_i]];
				} else {
					Y = (1-phred[params->trainP.bQ[line_num][call_i]])/3;
					not_Y = phred[params->trainP.bQ[line_num][call_i]] + 2*(1-phred[params->trainP.bQ[line_num][call_i]])/3;
				}
				M =  phred[params->trainP.mQ[line_num][call_i]];
				for(k = 0; k < states; k++) {
					b[k] = b[k] + logl( (1 - M) * 0.5 + M * (Y * mu[k] + not_Y * notmu[k]) );
					xr[line_num][k] = xr[line_num][k] + Y;
				}
			}
			z = 0;
			for(k = 0; k < states; k++) {
				b[k] = expl(b[k] + log_pi[k]);
				z = z + b[k];
			}
			if(!z) { z=1; }
			for(k = 0; k < states; k++) {
				pG[line_num][k] = b[k] / z;
			}

			ll = ll + logl(z);

			N_sum = N_sum + params->trainP.depth[line_num];

			for(k = 0; k < states; k++) {
				sum_pG[k] = sum_pG[k] + pG[line_num][k];
				xrhat[k] = xrhat[k] + xr[line_num][k] * pG[line_num][k];
			}
		}

		// Check log likelihood
		if(iter == 0)
			prev_ll = ll;
		else if(ll <= prev_ll)
			break;
		prev_ll = ll;

		// M step
		z = 0;
		for(k = 0; k < states; k++) {
			z = z + sum_pG[k] + params->trainP.delta[k];
		}
		if(!z) { z=1; }
		for(k = 0; k < states; k++) {
			// Update pi
			params->pi[k] = (sum_pG[k] + params->trainP.delta[k]) / z;
			// Update mu
			params->mu[k] = (xrhat[k] + params->trainP.alpha[k]*strength-1) / (N_sum + params->trainP.alpha[k]*strength + params->trainP.beta[k]*strength-2);
		}
	}

	if(params->modelfile == NULL) {
		fptrOUT = stdout;
	} else {
		fptrOUT = fopen(params->modelfile, "w");
		if(fptrOUT == NULL) {
			perror("ERROR: could not open modelfile for writing, using STDOUT");
			fptrOUT = stdout;
		}
	}
	fprintf(fptrOUT,"mu");
	for(k = 0; k < states; k++) {
		fprintf(fptrOUT, " %0.30Lf", params->mu[k]);
	}
	fprintf(fptrOUT,"\n");
	fprintf(fptrOUT,"pi");
	for(k = 0; k < states; k++) {
		fprintf(fptrOUT, " %0.30Lf", params->pi[k]);
	}
	fprintf(fptrOUT,"\n");

	/* free unused memory */
	free(b); free(notmu); free(log_pi);
	free(xrhat); free(sum_pG);
	for(line_num = 0; line_num < params->trainP.len; line_num++) {
		// free [states] cells for each line_num
		free(pG[line_num]);
		free(xr[line_num]);
	}
	free(pG); free(xr);
}