diff rDiff/mex/get_reads.cpp @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rDiff/mex/get_reads.cpp	Thu Feb 14 23:38:36 2013 -0500
@@ -0,0 +1,197 @@
+/* written by Jonas Behr, Regina Bohnert, Philipp Drewe and Gunnar Raetsch, FML Tuebingen, Germany, 2012 */
+
+#include <stdio.h>
+#include <signal.h>
+#include <mex.h>
+#include <vector>
+	using std::vector;
+#include "get_reads_direct.h"
+#include "mex_input.h"
+#include "read.h"
+#include <sys/types.h>
+#define MAXLINE 10000
+
+/*
+ * input: 
+ * 1 bam file
+ * 2 chromosome
+ * 3 region start (1-based index)
+ * 4 region end (1-based index)
+ * 5 strand (either '+' or '-' or '0')
+ * [6] collapse flag: if true the reads are collapsed to a coverage track
+ * [7] subsample percentage: percentage of reads to be subsampled (in per mill)
+ * [8] intron length filter
+ * [9] exon length filter
+ * [10] mismatch filter
+ *
+ * output: 
+ * 1 coverage
+ * [2] intron cell array
+ *
+ * example call: 
+ * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30);
+ */
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
+  
+  if (nrhs<5 || nrhs>10 || (nlhs!=1 && nlhs!=2)) {
+    fprintf(stderr, "usage: [x [introns]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches]);\n");
+    return; 
+  }
+  
+  /* obligatory arguments
+   * **********************/
+  char *fname = get_string(prhs[0]);
+	//fprintf(stdout, "arg1: %s\n", fname);
+  char *chr = get_string(prhs[1]);
+	//fprintf(stdout, "arg2: %s\n", chr);
+  int from_pos = get_int(prhs[2]);
+	//fprintf(stdout, "arg3: %d\n", from_pos);
+  int to_pos = get_int(prhs[3]);
+	//fprintf(stdout, "arg4: %d\n", to_pos);
+  char *strand = get_string(prhs[4]);
+	//fprintf(stdout, "arg5: %s\n", strand);
+
+  if (from_pos>to_pos)
+     mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n");
+
+  if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0') 
+    mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0");
+
+  /* optional arguments
+   * ******************/	
+  int collapse = 0;
+  if (nrhs>=6)
+    collapse = get_int(prhs[5]);
+  
+  int subsample = 1000;	
+  if (nrhs>=7)
+    subsample = get_int(prhs[6]);
+	  
+  int intron_len_filter = 1e9;
+  if (nrhs>=8)
+    intron_len_filter = get_int(prhs[7]);
+
+  int exon_len_filter = -1;
+  if (nrhs>=9)
+    exon_len_filter = get_int(prhs[8]);
+
+  int filter_mismatch = 1e9;
+  if (nrhs>=10)
+    filter_mismatch = get_int(prhs[9]);
+
+  /* call function to get reads
+   * **************************/	
+  char region[MAXLINE];
+  sprintf(region, "%s:%i-%i", chr, from_pos, to_pos);
+
+  vector<CRead*> all_reads;
+  
+  get_reads_from_bam(fname, region, &all_reads, strand[0], subsample);
+ 
+	/* filter reads
+	* **************/	
+	vector<CRead*> reads;
+	for (int i=0; i<all_reads.size(); i++) 
+	{
+		if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && all_reads[i]->multiple_alignment_index==0)
+			reads.push_back(all_reads[i]);
+	}
+
+ 
+  /* prepare output
+   * **************/	
+  int num_rows = reads.size();
+  int num_pos = to_pos-from_pos+1;
+  
+  // read coverages collapsed 
+  if (collapse) {
+    plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL);
+    uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]);
+    if (num_pos>0 && mask_ret==NULL)
+      mexErrMsgTxt("Error allocating memory\n");
+    for (int i=0; i<reads.size(); i++) 
+	{
+        reads[i]->get_coverage(from_pos, to_pos, mask_ret);
+    }
+  }
+  // reads not collapsed
+  else {
+    uint32_t nzmax = 0; // maximal number of nonzero elements 
+    int len = to_pos-from_pos+1;
+    for (uint i=0; i<reads.size(); i++) {
+	for (uint n = 0; n < reads[i]->block_starts.size(); n++) {
+	  uint32_t from, to;
+	  if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0)
+	    from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos;
+	  else
+	    from = 0;
+	  if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0)
+	    to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n];
+	  else
+	    to = 0;
+	  for (int bp=from; bp<to&bp<len; bp++) {
+	    nzmax++;
+	  }
+	}
+    }
+    // 1st row: row indices
+    // 2nd row: column indices
+    plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL);
+    double *mask_ret = (double*) mxGetData(plhs[0]);
+    if (nzmax>0 && mask_ret==NULL)
+      mexErrMsgTxt("Error allocating memory\n");
+    uint32_t mask_ret_c = 0; // counter
+    for (uint i=0; i<reads.size(); i++) 
+	{
+		reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i);
+    }
+    if (mask_ret_c!=2*nzmax)
+      mexErrMsgTxt("Error filling index arrays for sparse matrix\n");
+    
+  }
+  // introns
+  if (nlhs==2) 
+  {
+    vector<int> intron_list;
+	for (int i=0; i<reads.size(); i++) 
+	{
+		reads[i]->get_introns(&intron_list);
+	}
+
+    plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
+	uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]);
+	for (int p = 0; p<intron_list.size(); p++) 
+	{	
+		p_intron_list[p] = intron_list[p];
+	}
+	intron_list.clear();	
+  }
+  //// introns
+  //if (nlhs==2) 
+  //{
+  //  // use x = [introns{:}]' in matlab to get the array of introns
+  //  const int cnum_rows = num_rows;
+  //  plhs[1] = mxCreateCellArray(1, &cnum_rows);
+  //  mxArray *intron_cell = plhs[1];
+  //  if (num_rows>0 && intron_cell==NULL)
+  //    mexErrMsgTxt("Error allocating memory\n");
+  //  vector<int> intron_list;
+  //  for (int i=0; i<reads.size(); i++) 
+  //  {
+  //    reads[i]->get_introns(&intron_list);
+  //    mxArray * mx_intron_list = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);		
+  //    uint32_t *p_intron_list = (uint32_t*) mxGetData(mx_intron_list);
+  //    
+  //    for (int p = 0; p<intron_list.size(); p++) 
+  //    {
+  //      p_intron_list[p] = intron_list[p];
+  //    }
+  //    mxSetCell(intron_cell, i, mx_intron_list);
+  //    intron_list.clear();	
+  //  }
+  //}
+	for (int i=0; i<all_reads.size(); i++)
+		delete all_reads[i];
+
+}
+