| 
0
 | 
     1 /* written by Jonas Behr, Regina Bohnert, Philipp Drewe and Gunnar Raetsch, FML Tuebingen, Germany, 2012 */
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 #include <stdio.h>
 | 
| 
 | 
     4 #include <signal.h>
 | 
| 
 | 
     5 #include <mex.h>
 | 
| 
 | 
     6 #include <vector>
 | 
| 
 | 
     7 	using std::vector;
 | 
| 
 | 
     8 #include "get_reads_direct.h"
 | 
| 
 | 
     9 #include "mex_input.h"
 | 
| 
 | 
    10 #include "read.h"
 | 
| 
 | 
    11 #include <sys/types.h>
 | 
| 
 | 
    12 #define MAXLINE 10000
 | 
| 
 | 
    13 
 | 
| 
 | 
    14 /*
 | 
| 
 | 
    15  * input: 
 | 
| 
 | 
    16  * 1 bam file
 | 
| 
 | 
    17  * 2 chromosome
 | 
| 
 | 
    18  * 3 region start (1-based index)
 | 
| 
 | 
    19  * 4 region end (1-based index)
 | 
| 
 | 
    20  * 5 strand (either '+' or '-' or '0')
 | 
| 
 | 
    21  * [6] collapse flag: if true the reads are collapsed to a coverage track
 | 
| 
 | 
    22  * [7] subsample percentage: percentage of reads to be subsampled (in per mill)
 | 
| 
 | 
    23  * [8] intron length filter
 | 
| 
 | 
    24  * [9] exon length filter
 | 
| 
 | 
    25  * [10] mismatch filter
 | 
| 
 | 
    26  *
 | 
| 
 | 
    27  * output: 
 | 
| 
 | 
    28  * 1 coverage
 | 
| 
 | 
    29  * [2] intron cell array
 | 
| 
 | 
    30  *
 | 
| 
 | 
    31  * example call: 
 | 
| 
 | 
    32  * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30);
 | 
| 
 | 
    33  */
 | 
| 
 | 
    34 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
 | 
| 
 | 
    35   
 | 
| 
 | 
    36   if (nrhs<5 || nrhs>10 || (nlhs!=1 && nlhs!=2)) {
 | 
| 
 | 
    37     fprintf(stderr, "usage: [x [introns]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches]);\n");
 | 
| 
 | 
    38     return; 
 | 
| 
 | 
    39   }
 | 
| 
 | 
    40   
 | 
| 
 | 
    41   /* obligatory arguments
 | 
| 
 | 
    42    * **********************/
 | 
| 
 | 
    43   char *fname = get_string(prhs[0]);
 | 
| 
 | 
    44 	//fprintf(stdout, "arg1: %s\n", fname);
 | 
| 
 | 
    45   char *chr = get_string(prhs[1]);
 | 
| 
 | 
    46 	//fprintf(stdout, "arg2: %s\n", chr);
 | 
| 
 | 
    47   int from_pos = get_int(prhs[2]);
 | 
| 
 | 
    48 	//fprintf(stdout, "arg3: %d\n", from_pos);
 | 
| 
 | 
    49   int to_pos = get_int(prhs[3]);
 | 
| 
 | 
    50 	//fprintf(stdout, "arg4: %d\n", to_pos);
 | 
| 
 | 
    51   char *strand = get_string(prhs[4]);
 | 
| 
 | 
    52 	//fprintf(stdout, "arg5: %s\n", strand);
 | 
| 
 | 
    53 
 | 
| 
 | 
    54   if (from_pos>to_pos)
 | 
| 
 | 
    55      mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n");
 | 
| 
 | 
    56 
 | 
| 
 | 
    57   if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0') 
 | 
| 
 | 
    58     mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0");
 | 
| 
 | 
    59 
 | 
| 
 | 
    60   /* optional arguments
 | 
| 
 | 
    61    * ******************/	
 | 
| 
 | 
    62   int collapse = 0;
 | 
| 
 | 
    63   if (nrhs>=6)
 | 
| 
 | 
    64     collapse = get_int(prhs[5]);
 | 
| 
 | 
    65   
 | 
| 
 | 
    66   int subsample = 1000;	
 | 
| 
 | 
    67   if (nrhs>=7)
 | 
| 
 | 
    68     subsample = get_int(prhs[6]);
 | 
| 
 | 
    69 	  
 | 
| 
 | 
    70   int intron_len_filter = 1e9;
 | 
| 
 | 
    71   if (nrhs>=8)
 | 
| 
 | 
    72     intron_len_filter = get_int(prhs[7]);
 | 
| 
 | 
    73 
 | 
| 
 | 
    74   int exon_len_filter = -1;
 | 
| 
 | 
    75   if (nrhs>=9)
 | 
| 
 | 
    76     exon_len_filter = get_int(prhs[8]);
 | 
| 
 | 
    77 
 | 
| 
 | 
    78   int filter_mismatch = 1e9;
 | 
| 
 | 
    79   if (nrhs>=10)
 | 
| 
 | 
    80     filter_mismatch = get_int(prhs[9]);
 | 
| 
 | 
    81 
 | 
| 
 | 
    82   /* call function to get reads
 | 
| 
 | 
    83    * **************************/	
 | 
| 
 | 
    84   char region[MAXLINE];
 | 
| 
 | 
    85   sprintf(region, "%s:%i-%i", chr, from_pos, to_pos);
 | 
| 
 | 
    86 
 | 
| 
 | 
    87   vector<CRead*> all_reads;
 | 
| 
 | 
    88   
 | 
| 
 | 
    89   get_reads_from_bam(fname, region, &all_reads, strand[0], subsample);
 | 
| 
 | 
    90  
 | 
| 
 | 
    91 	/* filter reads
 | 
| 
 | 
    92 	* **************/	
 | 
| 
 | 
    93 	vector<CRead*> reads;
 | 
| 
 | 
    94 	for (int i=0; i<all_reads.size(); i++) 
 | 
| 
 | 
    95 	{
 | 
| 
 | 
    96 		if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && all_reads[i]->multiple_alignment_index==0)
 | 
| 
 | 
    97 			reads.push_back(all_reads[i]);
 | 
| 
 | 
    98 	}
 | 
| 
 | 
    99 
 | 
| 
 | 
   100  
 | 
| 
 | 
   101   /* prepare output
 | 
| 
 | 
   102    * **************/	
 | 
| 
 | 
   103   int num_rows = reads.size();
 | 
| 
 | 
   104   int num_pos = to_pos-from_pos+1;
 | 
| 
 | 
   105   
 | 
| 
 | 
   106   // read coverages collapsed 
 | 
| 
 | 
   107   if (collapse) {
 | 
| 
 | 
   108     plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL);
 | 
| 
 | 
   109     uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]);
 | 
| 
 | 
   110     if (num_pos>0 && mask_ret==NULL)
 | 
| 
 | 
   111       mexErrMsgTxt("Error allocating memory\n");
 | 
| 
 | 
   112     for (int i=0; i<reads.size(); i++) 
 | 
| 
 | 
   113 	{
 | 
| 
 | 
   114         reads[i]->get_coverage(from_pos, to_pos, mask_ret);
 | 
| 
 | 
   115     }
 | 
| 
 | 
   116   }
 | 
| 
 | 
   117   // reads not collapsed
 | 
| 
 | 
   118   else {
 | 
| 
 | 
   119     uint32_t nzmax = 0; // maximal number of nonzero elements 
 | 
| 
 | 
   120     int len = to_pos-from_pos+1;
 | 
| 
 | 
   121     for (uint i=0; i<reads.size(); i++) {
 | 
| 
 | 
   122 	for (uint n = 0; n < reads[i]->block_starts.size(); n++) {
 | 
| 
 | 
   123 	  uint32_t from, to;
 | 
| 
 | 
   124 	  if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0)
 | 
| 
 | 
   125 	    from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos;
 | 
| 
 | 
   126 	  else
 | 
| 
 | 
   127 	    from = 0;
 | 
| 
 | 
   128 	  if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0)
 | 
| 
 | 
   129 	    to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n];
 | 
| 
 | 
   130 	  else
 | 
| 
 | 
   131 	    to = 0;
 | 
| 
 | 
   132 	  for (int bp=from; bp<to&bp<len; bp++) {
 | 
| 
 | 
   133 	    nzmax++;
 | 
| 
 | 
   134 	  }
 | 
| 
 | 
   135 	}
 | 
| 
 | 
   136     }
 | 
| 
 | 
   137     // 1st row: row indices
 | 
| 
 | 
   138     // 2nd row: column indices
 | 
| 
 | 
   139     plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL);
 | 
| 
 | 
   140     double *mask_ret = (double*) mxGetData(plhs[0]);
 | 
| 
 | 
   141     if (nzmax>0 && mask_ret==NULL)
 | 
| 
 | 
   142       mexErrMsgTxt("Error allocating memory\n");
 | 
| 
 | 
   143     uint32_t mask_ret_c = 0; // counter
 | 
| 
 | 
   144     for (uint i=0; i<reads.size(); i++) 
 | 
| 
 | 
   145 	{
 | 
| 
 | 
   146 		reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i);
 | 
| 
 | 
   147     }
 | 
| 
 | 
   148     if (mask_ret_c!=2*nzmax)
 | 
| 
 | 
   149       mexErrMsgTxt("Error filling index arrays for sparse matrix\n");
 | 
| 
 | 
   150     
 | 
| 
 | 
   151   }
 | 
| 
 | 
   152   // introns
 | 
| 
 | 
   153   if (nlhs==2) 
 | 
| 
 | 
   154   {
 | 
| 
 | 
   155     vector<int> intron_list;
 | 
| 
 | 
   156 	for (int i=0; i<reads.size(); i++) 
 | 
| 
 | 
   157 	{
 | 
| 
 | 
   158 		reads[i]->get_introns(&intron_list);
 | 
| 
 | 
   159 	}
 | 
| 
 | 
   160 
 | 
| 
 | 
   161     plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
 | 
| 
 | 
   162 	uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]);
 | 
| 
 | 
   163 	for (int p = 0; p<intron_list.size(); p++) 
 | 
| 
 | 
   164 	{	
 | 
| 
 | 
   165 		p_intron_list[p] = intron_list[p];
 | 
| 
 | 
   166 	}
 | 
| 
 | 
   167 	intron_list.clear();	
 | 
| 
 | 
   168   }
 | 
| 
 | 
   169   //// introns
 | 
| 
 | 
   170   //if (nlhs==2) 
 | 
| 
 | 
   171   //{
 | 
| 
 | 
   172   //  // use x = [introns{:}]' in matlab to get the array of introns
 | 
| 
 | 
   173   //  const int cnum_rows = num_rows;
 | 
| 
 | 
   174   //  plhs[1] = mxCreateCellArray(1, &cnum_rows);
 | 
| 
 | 
   175   //  mxArray *intron_cell = plhs[1];
 | 
| 
 | 
   176   //  if (num_rows>0 && intron_cell==NULL)
 | 
| 
 | 
   177   //    mexErrMsgTxt("Error allocating memory\n");
 | 
| 
 | 
   178   //  vector<int> intron_list;
 | 
| 
 | 
   179   //  for (int i=0; i<reads.size(); i++) 
 | 
| 
 | 
   180   //  {
 | 
| 
 | 
   181   //    reads[i]->get_introns(&intron_list);
 | 
| 
 | 
   182   //    mxArray * mx_intron_list = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);		
 | 
| 
 | 
   183   //    uint32_t *p_intron_list = (uint32_t*) mxGetData(mx_intron_list);
 | 
| 
 | 
   184   //    
 | 
| 
 | 
   185   //    for (int p = 0; p<intron_list.size(); p++) 
 | 
| 
 | 
   186   //    {
 | 
| 
 | 
   187   //      p_intron_list[p] = intron_list[p];
 | 
| 
 | 
   188   //    }
 | 
| 
 | 
   189   //    mxSetCell(intron_cell, i, mx_intron_list);
 | 
| 
 | 
   190   //    intron_list.clear();	
 | 
| 
 | 
   191   //  }
 | 
| 
 | 
   192   //}
 | 
| 
 | 
   193 	for (int i=0; i<all_reads.size(); i++)
 | 
| 
 | 
   194 		delete all_reads[i];
 | 
| 
 | 
   195 
 | 
| 
 | 
   196 }
 | 
| 
 | 
   197 
 |