diff SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.h @ 0:74f5ea818cea

Uploaded
author ryanmorin
date Wed, 12 Oct 2011 19:50:38 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.h	Wed Oct 12 19:50:38 2011 -0400
@@ -0,0 +1,234 @@
+/* The MIT License
+
+   Copyright (c) 2009, by Sohrab Shah <sshah@bccrc.ca> and Rodrigo Goya <rgoya@bcgsc.ca>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include "sam.h"
+#include "faidx.h"
+#include "khash.h"
+
+#define TYPE_mb 0
+#define TYPE_m 1
+#define TYPE_b 2
+#define TYPE_M 3
+#define TYPE_Mb 4
+#define TYPE_MB 5
+#define TYPE_SNVMix1 6
+
+#define Q_PILEUP 0
+#define M_PILEUP 1
+#define S_PILEUP 2
+#define BAM_FILE 3
+
+#define PHRED_MAX 200
+
+#define N 0
+#define A 1
+#define G 2
+#define C 3
+#define T 4
+
+char base_code[] = {'N','A','G','C','T'};
+
+struct params_train {
+	long double *alpha;
+	long double *beta;
+	long double *delta;
+	char *param_file;
+	int max_iter;
+	unsigned char **bQ;
+	unsigned char **mQ;
+	signed char **calls;
+	char *ref;
+	int *pos;
+	int *depth;
+	int len;
+};
+
+typedef struct {
+	FILE *input;
+	FILE *output;
+	char *inputfile;
+	char *outputfile;
+	char *modelfile;
+	char *bamfile;
+	char *fastafile;
+	char *listfile;
+	int filter_type;
+	int filter_chast;
+	int filter_dup;
+	int train;
+	int classify;
+	int filter;
+	int full;
+	int input_type; // 0 = processed, 1 = maq pileup, 2 = sam pileup
+	long double *mu;
+	long double *pi;
+	int max_iter;
+	int bQ;
+	int mQ;
+	int debug;
+	//struct {
+	//	int alpha[3];
+	//	int beta[3];
+	//} train;
+	struct params_train trainP;
+	int states;
+	int window;
+} param_struct;
+
+typedef int *indel_list_t;
+KHASH_MAP_INIT_INT64(64, indel_list_t)
+
+typedef struct{
+	bool in_use;
+	uint32_t tid; // get with snvmix_pl->in->header->target_name[tid],
+	uint32_t pos;
+	char ref;
+	char nref;
+	int ref_count;
+	int nref_count;
+	long double *p;
+	int maxP;
+	// 0 = REF    ,     1 = NREF
+	int forward[2];
+	int reverse[2];
+	int good_pair[2];
+	int bad_pair[2];
+	int c_clean[2];
+	int c_ins[2];
+	int c_del[2];
+	int c_junc[2];
+	int nref_edges;    // FIXME: fixed 5 bp edges
+	int nref_center;    //
+	int indel_pos;     // How many reads have a deletion at that site
+	int indel_near;    // How many reasd have a deletion overall
+	int aln_unique_pos;
+	int full_depth;    // Full depth at this position
+	int raw_cvg[5];
+	int thr_cvg[5];
+} snvmix_call_t;
+
+typedef struct {
+	param_struct *params;
+	int begin, end;
+	samfile_t *in;
+	faidx_t *fai;
+	int tid, len;
+	char *ref;
+	/* might want to move this elsewhere */
+	khash_t(64) *hash;
+	long double *notmu, *log_pi, *b;
+	long double phred[PHRED_MAX + 1];
+	int *calls, depth_allocated;
+	snvmix_call_t *buffer;
+	int n_buffer, b_start, b_end;
+	// Extra flags
+} snvmix_pl_t;
+
+
+void updatePhred(long double *phredTable);
+void initPhred(long double *phredTable, int elem);
+
+void resetParams(param_struct *params);
+void initSNVMix(int argc , char **argv, param_struct *params);
+void usage(char *selfname);
+
+void allocateParameters(param_struct *params);
+void setTrainingParameters(param_struct *params);
+void setClassificationParameters(param_struct *params);
+void readParamFile(param_struct *params, char type);
+
+
+void snvmixClassify_qualities(param_struct *params);
+void snvmixClassify_pileup(param_struct *params);
+
+int snvmixClassify_bamfile(param_struct *params);
+static int snvmixClassify_pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data);
+
+inline void snvmixGetCallString(char *col, int *calls, int depth, char *nref);
+
+inline int snvmixFilterCalls(int *calls, int depth, char *bQ, char *mQ, param_struct *params);
+inline int snvmixSkipCall(int *calls, int qual_num, param_struct *params, char *bQ, char *mQ);
+inline int snvmixSkipCall_alt(param_struct *params, int call, char bQ, char mQ);
+
+void snvmixTrain_qualities(param_struct *params);
+void snvmixGetTrainSet_pileup(param_struct *params);
+void snvmixTrain_pileup(param_struct *params);
+
+long double normalise(long double *values, int len);
+
+char **__bam_get_lines(const char *fn, int *_n);
+static khash_t(64) *load_pos(const char *fn, bam_header_t *h)
+{
+	char **list;
+	int i, j, n, *fields, max_fields;
+	khash_t(64) *hash;
+	bam_init_header_hash(h);
+	list = __bam_get_lines(fn, &n);
+	fprintf(stderr,"got %d lines\n", n);
+	hash = kh_init(64);
+	max_fields = 0; fields = 0;
+	for (i = 0; i < n; ++i) {
+		char *str = list[i];
+		int chr, n_fields, ret;
+		khint_t k;
+		uint64_t x;
+		n_fields = ksplit_core(str, 0, &max_fields, &fields);
+		if (n_fields < 2) continue;
+		chr = bam_get_tid(h, str + fields[0]);
+		if (chr < 0) {
+			fprintf(stderr, "[load_pos] unknown reference sequence name: %s\n", str + fields[0]);
+			continue;
+		}
+		x = (uint64_t)chr << 32 | (atoi(str + fields[1]) - 1);
+		k = kh_put(64, hash, x, &ret);
+		if (ret == 0) {
+			fprintf(stderr, "[load_pos] position %s:%s has been loaded.\n", str+fields[0], str+fields[1]);
+			continue;
+		}
+		kh_val(hash, k) = 0;
+		if (n_fields > 2) {
+			// count
+			for (j = 2; j < n_fields; ++j) {
+				char *s = str + fields[j];
+				if ((*s != '+' && *s != '-') || !isdigit(s[1])) break;
+ 			}
+			if (j > 2) { // update kh_val()
+				int *q, y, z;
+				q = kh_val(hash, k) = (int*)calloc(j - 1, sizeof(int));
+				q[0] = j - 2; z = j; y = 1;
+				for (j = 2; j < z; ++j)
+					q[y++] = atoi(str + fields[j]);
+			}
+		}
+		free(str);
+	}
+	free(list); free(fields);
+	return hash;
+}