Mercurial > repos > ryanmorin > nextgen_variant_identification
diff SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.h @ 0:74f5ea818cea
Uploaded
author | ryanmorin |
---|---|
date | Wed, 12 Oct 2011 19:50:38 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SNV/SNVMix2_source/SNVMix2-v0.12.1-rc1/SNVMix2.h Wed Oct 12 19:50:38 2011 -0400 @@ -0,0 +1,234 @@ +/* The MIT License + + Copyright (c) 2009, by Sohrab Shah <sshah@bccrc.ca> and Rodrigo Goya <rgoya@bcgsc.ca> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <unistd.h> +#include <stdbool.h> +#include "sam.h" +#include "faidx.h" +#include "khash.h" + +#define TYPE_mb 0 +#define TYPE_m 1 +#define TYPE_b 2 +#define TYPE_M 3 +#define TYPE_Mb 4 +#define TYPE_MB 5 +#define TYPE_SNVMix1 6 + +#define Q_PILEUP 0 +#define M_PILEUP 1 +#define S_PILEUP 2 +#define BAM_FILE 3 + +#define PHRED_MAX 200 + +#define N 0 +#define A 1 +#define G 2 +#define C 3 +#define T 4 + +char base_code[] = {'N','A','G','C','T'}; + +struct params_train { + long double *alpha; + long double *beta; + long double *delta; + char *param_file; + int max_iter; + unsigned char **bQ; + unsigned char **mQ; + signed char **calls; + char *ref; + int *pos; + int *depth; + int len; +}; + +typedef struct { + FILE *input; + FILE *output; + char *inputfile; + char *outputfile; + char *modelfile; + char *bamfile; + char *fastafile; + char *listfile; + int filter_type; + int filter_chast; + int filter_dup; + int train; + int classify; + int filter; + int full; + int input_type; // 0 = processed, 1 = maq pileup, 2 = sam pileup + long double *mu; + long double *pi; + int max_iter; + int bQ; + int mQ; + int debug; + //struct { + // int alpha[3]; + // int beta[3]; + //} train; + struct params_train trainP; + int states; + int window; +} param_struct; + +typedef int *indel_list_t; +KHASH_MAP_INIT_INT64(64, indel_list_t) + +typedef struct{ + bool in_use; + uint32_t tid; // get with snvmix_pl->in->header->target_name[tid], + uint32_t pos; + char ref; + char nref; + int ref_count; + int nref_count; + long double *p; + int maxP; + // 0 = REF , 1 = NREF + int forward[2]; + int reverse[2]; + int good_pair[2]; + int bad_pair[2]; + int c_clean[2]; + int c_ins[2]; + int c_del[2]; + int c_junc[2]; + int nref_edges; // FIXME: fixed 5 bp edges + int nref_center; // + int indel_pos; // How many reads have a deletion at that site + int indel_near; // How many reasd have a deletion overall + int aln_unique_pos; + int full_depth; // Full depth at this position + int raw_cvg[5]; + int thr_cvg[5]; +} snvmix_call_t; + +typedef struct { + param_struct *params; + int begin, end; + samfile_t *in; + faidx_t *fai; + int tid, len; + char *ref; + /* might want to move this elsewhere */ + khash_t(64) *hash; + long double *notmu, *log_pi, *b; + long double phred[PHRED_MAX + 1]; + int *calls, depth_allocated; + snvmix_call_t *buffer; + int n_buffer, b_start, b_end; + // Extra flags +} snvmix_pl_t; + + +void updatePhred(long double *phredTable); +void initPhred(long double *phredTable, int elem); + +void resetParams(param_struct *params); +void initSNVMix(int argc , char **argv, param_struct *params); +void usage(char *selfname); + +void allocateParameters(param_struct *params); +void setTrainingParameters(param_struct *params); +void setClassificationParameters(param_struct *params); +void readParamFile(param_struct *params, char type); + + +void snvmixClassify_qualities(param_struct *params); +void snvmixClassify_pileup(param_struct *params); + +int snvmixClassify_bamfile(param_struct *params); +static int snvmixClassify_pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); + +inline void snvmixGetCallString(char *col, int *calls, int depth, char *nref); + +inline int snvmixFilterCalls(int *calls, int depth, char *bQ, char *mQ, param_struct *params); +inline int snvmixSkipCall(int *calls, int qual_num, param_struct *params, char *bQ, char *mQ); +inline int snvmixSkipCall_alt(param_struct *params, int call, char bQ, char mQ); + +void snvmixTrain_qualities(param_struct *params); +void snvmixGetTrainSet_pileup(param_struct *params); +void snvmixTrain_pileup(param_struct *params); + +long double normalise(long double *values, int len); + +char **__bam_get_lines(const char *fn, int *_n); +static khash_t(64) *load_pos(const char *fn, bam_header_t *h) +{ + char **list; + int i, j, n, *fields, max_fields; + khash_t(64) *hash; + bam_init_header_hash(h); + list = __bam_get_lines(fn, &n); + fprintf(stderr,"got %d lines\n", n); + hash = kh_init(64); + max_fields = 0; fields = 0; + for (i = 0; i < n; ++i) { + char *str = list[i]; + int chr, n_fields, ret; + khint_t k; + uint64_t x; + n_fields = ksplit_core(str, 0, &max_fields, &fields); + if (n_fields < 2) continue; + chr = bam_get_tid(h, str + fields[0]); + if (chr < 0) { + fprintf(stderr, "[load_pos] unknown reference sequence name: %s\n", str + fields[0]); + continue; + } + x = (uint64_t)chr << 32 | (atoi(str + fields[1]) - 1); + k = kh_put(64, hash, x, &ret); + if (ret == 0) { + fprintf(stderr, "[load_pos] position %s:%s has been loaded.\n", str+fields[0], str+fields[1]); + continue; + } + kh_val(hash, k) = 0; + if (n_fields > 2) { + // count + for (j = 2; j < n_fields; ++j) { + char *s = str + fields[j]; + if ((*s != '+' && *s != '-') || !isdigit(s[1])) break; + } + if (j > 2) { // update kh_val() + int *q, y, z; + q = kh_val(hash, k) = (int*)calloc(j - 1, sizeof(int)); + q[0] = j - 2; z = j; y = 1; + for (j = 2; j < z; ++j) + q[y++] = atoi(str + fields[j]); + } + } + free(str); + } + free(list); free(fields); + return hash; +}