Mercurial > repos > vipints > rdiff
view rDiff/mex/get_reads.cpp @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
line wrap: on
line source
/* written by Jonas Behr, Regina Bohnert, Philipp Drewe and Gunnar Raetsch, FML Tuebingen, Germany, 2012 */ #include <stdio.h> #include <signal.h> #include <mex.h> #include <vector> using std::vector; #include "get_reads_direct.h" #include "mex_input.h" #include "read.h" #include <sys/types.h> #define MAXLINE 10000 /* * input: * 1 bam file * 2 chromosome * 3 region start (1-based index) * 4 region end (1-based index) * 5 strand (either '+' or '-' or '0') * [6] collapse flag: if true the reads are collapsed to a coverage track * [7] subsample percentage: percentage of reads to be subsampled (in per mill) * [8] intron length filter * [9] exon length filter * [10] mismatch filter * * output: * 1 coverage * [2] intron cell array * * example call: * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30); */ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { if (nrhs<5 || nrhs>10 || (nlhs!=1 && nlhs!=2)) { fprintf(stderr, "usage: [x [introns]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches]);\n"); return; } /* obligatory arguments * **********************/ char *fname = get_string(prhs[0]); //fprintf(stdout, "arg1: %s\n", fname); char *chr = get_string(prhs[1]); //fprintf(stdout, "arg2: %s\n", chr); int from_pos = get_int(prhs[2]); //fprintf(stdout, "arg3: %d\n", from_pos); int to_pos = get_int(prhs[3]); //fprintf(stdout, "arg4: %d\n", to_pos); char *strand = get_string(prhs[4]); //fprintf(stdout, "arg5: %s\n", strand); if (from_pos>to_pos) mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n"); if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0') mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0"); /* optional arguments * ******************/ int collapse = 0; if (nrhs>=6) collapse = get_int(prhs[5]); int subsample = 1000; if (nrhs>=7) subsample = get_int(prhs[6]); int intron_len_filter = 1e9; if (nrhs>=8) intron_len_filter = get_int(prhs[7]); int exon_len_filter = -1; if (nrhs>=9) exon_len_filter = get_int(prhs[8]); int filter_mismatch = 1e9; if (nrhs>=10) filter_mismatch = get_int(prhs[9]); /* call function to get reads * **************************/ char region[MAXLINE]; sprintf(region, "%s:%i-%i", chr, from_pos, to_pos); vector<CRead*> all_reads; get_reads_from_bam(fname, region, &all_reads, strand[0], subsample); /* filter reads * **************/ vector<CRead*> reads; for (int i=0; i<all_reads.size(); i++) { if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && all_reads[i]->multiple_alignment_index==0) reads.push_back(all_reads[i]); } /* prepare output * **************/ int num_rows = reads.size(); int num_pos = to_pos-from_pos+1; // read coverages collapsed if (collapse) { plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL); uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]); if (num_pos>0 && mask_ret==NULL) mexErrMsgTxt("Error allocating memory\n"); for (int i=0; i<reads.size(); i++) { reads[i]->get_coverage(from_pos, to_pos, mask_ret); } } // reads not collapsed else { uint32_t nzmax = 0; // maximal number of nonzero elements int len = to_pos-from_pos+1; for (uint i=0; i<reads.size(); i++) { for (uint n = 0; n < reads[i]->block_starts.size(); n++) { uint32_t from, to; if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0) from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos; else from = 0; if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0) to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n]; else to = 0; for (int bp=from; bp<to&bp<len; bp++) { nzmax++; } } } // 1st row: row indices // 2nd row: column indices plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL); double *mask_ret = (double*) mxGetData(plhs[0]); if (nzmax>0 && mask_ret==NULL) mexErrMsgTxt("Error allocating memory\n"); uint32_t mask_ret_c = 0; // counter for (uint i=0; i<reads.size(); i++) { reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i); } if (mask_ret_c!=2*nzmax) mexErrMsgTxt("Error filling index arrays for sparse matrix\n"); } // introns if (nlhs==2) { vector<int> intron_list; for (int i=0; i<reads.size(); i++) { reads[i]->get_introns(&intron_list); } plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL); uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]); for (int p = 0; p<intron_list.size(); p++) { p_intron_list[p] = intron_list[p]; } intron_list.clear(); } //// introns //if (nlhs==2) //{ // // use x = [introns{:}]' in matlab to get the array of introns // const int cnum_rows = num_rows; // plhs[1] = mxCreateCellArray(1, &cnum_rows); // mxArray *intron_cell = plhs[1]; // if (num_rows>0 && intron_cell==NULL) // mexErrMsgTxt("Error allocating memory\n"); // vector<int> intron_list; // for (int i=0; i<reads.size(); i++) // { // reads[i]->get_introns(&intron_list); // mxArray * mx_intron_list = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL); // uint32_t *p_intron_list = (uint32_t*) mxGetData(mx_intron_list); // // for (int p = 0; p<intron_list.size(); p++) // { // p_intron_list[p] = intron_list[p]; // } // mxSetCell(intron_cell, i, mx_intron_list); // intron_list.clear(); // } //} for (int i=0; i<all_reads.size(); i++) delete all_reads[i]; }