Mercurial > repos > vipints > rdiff
comparison rDiff/mex/get_reads.cpp @ 0:0f80a5141704
version 0.3 uploaded
| author | vipints | 
|---|---|
| date | Thu, 14 Feb 2013 23:38:36 -0500 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:0f80a5141704 | 
|---|---|
| 1 /* written by Jonas Behr, Regina Bohnert, Philipp Drewe and Gunnar Raetsch, FML Tuebingen, Germany, 2012 */ | |
| 2 | |
| 3 #include <stdio.h> | |
| 4 #include <signal.h> | |
| 5 #include <mex.h> | |
| 6 #include <vector> | |
| 7 using std::vector; | |
| 8 #include "get_reads_direct.h" | |
| 9 #include "mex_input.h" | |
| 10 #include "read.h" | |
| 11 #include <sys/types.h> | |
| 12 #define MAXLINE 10000 | |
| 13 | |
| 14 /* | |
| 15 * input: | |
| 16 * 1 bam file | |
| 17 * 2 chromosome | |
| 18 * 3 region start (1-based index) | |
| 19 * 4 region end (1-based index) | |
| 20 * 5 strand (either '+' or '-' or '0') | |
| 21 * [6] collapse flag: if true the reads are collapsed to a coverage track | |
| 22 * [7] subsample percentage: percentage of reads to be subsampled (in per mill) | |
| 23 * [8] intron length filter | |
| 24 * [9] exon length filter | |
| 25 * [10] mismatch filter | |
| 26 * | |
| 27 * output: | |
| 28 * 1 coverage | |
| 29 * [2] intron cell array | |
| 30 * | |
| 31 * example call: | |
| 32 * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30); | |
| 33 */ | |
| 34 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { | |
| 35 | |
| 36 if (nrhs<5 || nrhs>10 || (nlhs!=1 && nlhs!=2)) { | |
| 37 fprintf(stderr, "usage: [x [introns]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches]);\n"); | |
| 38 return; | |
| 39 } | |
| 40 | |
| 41 /* obligatory arguments | |
| 42 * **********************/ | |
| 43 char *fname = get_string(prhs[0]); | |
| 44 //fprintf(stdout, "arg1: %s\n", fname); | |
| 45 char *chr = get_string(prhs[1]); | |
| 46 //fprintf(stdout, "arg2: %s\n", chr); | |
| 47 int from_pos = get_int(prhs[2]); | |
| 48 //fprintf(stdout, "arg3: %d\n", from_pos); | |
| 49 int to_pos = get_int(prhs[3]); | |
| 50 //fprintf(stdout, "arg4: %d\n", to_pos); | |
| 51 char *strand = get_string(prhs[4]); | |
| 52 //fprintf(stdout, "arg5: %s\n", strand); | |
| 53 | |
| 54 if (from_pos>to_pos) | |
| 55 mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n"); | |
| 56 | |
| 57 if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0') | |
| 58 mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0"); | |
| 59 | |
| 60 /* optional arguments | |
| 61 * ******************/ | |
| 62 int collapse = 0; | |
| 63 if (nrhs>=6) | |
| 64 collapse = get_int(prhs[5]); | |
| 65 | |
| 66 int subsample = 1000; | |
| 67 if (nrhs>=7) | |
| 68 subsample = get_int(prhs[6]); | |
| 69 | |
| 70 int intron_len_filter = 1e9; | |
| 71 if (nrhs>=8) | |
| 72 intron_len_filter = get_int(prhs[7]); | |
| 73 | |
| 74 int exon_len_filter = -1; | |
| 75 if (nrhs>=9) | |
| 76 exon_len_filter = get_int(prhs[8]); | |
| 77 | |
| 78 int filter_mismatch = 1e9; | |
| 79 if (nrhs>=10) | |
| 80 filter_mismatch = get_int(prhs[9]); | |
| 81 | |
| 82 /* call function to get reads | |
| 83 * **************************/ | |
| 84 char region[MAXLINE]; | |
| 85 sprintf(region, "%s:%i-%i", chr, from_pos, to_pos); | |
| 86 | |
| 87 vector<CRead*> all_reads; | |
| 88 | |
| 89 get_reads_from_bam(fname, region, &all_reads, strand[0], subsample); | |
| 90 | |
| 91 /* filter reads | |
| 92 * **************/ | |
| 93 vector<CRead*> reads; | |
| 94 for (int i=0; i<all_reads.size(); i++) | |
| 95 { | |
| 96 if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && all_reads[i]->multiple_alignment_index==0) | |
| 97 reads.push_back(all_reads[i]); | |
| 98 } | |
| 99 | |
| 100 | |
| 101 /* prepare output | |
| 102 * **************/ | |
| 103 int num_rows = reads.size(); | |
| 104 int num_pos = to_pos-from_pos+1; | |
| 105 | |
| 106 // read coverages collapsed | |
| 107 if (collapse) { | |
| 108 plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL); | |
| 109 uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]); | |
| 110 if (num_pos>0 && mask_ret==NULL) | |
| 111 mexErrMsgTxt("Error allocating memory\n"); | |
| 112 for (int i=0; i<reads.size(); i++) | |
| 113 { | |
| 114 reads[i]->get_coverage(from_pos, to_pos, mask_ret); | |
| 115 } | |
| 116 } | |
| 117 // reads not collapsed | |
| 118 else { | |
| 119 uint32_t nzmax = 0; // maximal number of nonzero elements | |
| 120 int len = to_pos-from_pos+1; | |
| 121 for (uint i=0; i<reads.size(); i++) { | |
| 122 for (uint n = 0; n < reads[i]->block_starts.size(); n++) { | |
| 123 uint32_t from, to; | |
| 124 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0) | |
| 125 from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos; | |
| 126 else | |
| 127 from = 0; | |
| 128 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0) | |
| 129 to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n]; | |
| 130 else | |
| 131 to = 0; | |
| 132 for (int bp=from; bp<to&bp<len; bp++) { | |
| 133 nzmax++; | |
| 134 } | |
| 135 } | |
| 136 } | |
| 137 // 1st row: row indices | |
| 138 // 2nd row: column indices | |
| 139 plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL); | |
| 140 double *mask_ret = (double*) mxGetData(plhs[0]); | |
| 141 if (nzmax>0 && mask_ret==NULL) | |
| 142 mexErrMsgTxt("Error allocating memory\n"); | |
| 143 uint32_t mask_ret_c = 0; // counter | |
| 144 for (uint i=0; i<reads.size(); i++) | |
| 145 { | |
| 146 reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i); | |
| 147 } | |
| 148 if (mask_ret_c!=2*nzmax) | |
| 149 mexErrMsgTxt("Error filling index arrays for sparse matrix\n"); | |
| 150 | |
| 151 } | |
| 152 // introns | |
| 153 if (nlhs==2) | |
| 154 { | |
| 155 vector<int> intron_list; | |
| 156 for (int i=0; i<reads.size(); i++) | |
| 157 { | |
| 158 reads[i]->get_introns(&intron_list); | |
| 159 } | |
| 160 | |
| 161 plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL); | |
| 162 uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]); | |
| 163 for (int p = 0; p<intron_list.size(); p++) | |
| 164 { | |
| 165 p_intron_list[p] = intron_list[p]; | |
| 166 } | |
| 167 intron_list.clear(); | |
| 168 } | |
| 169 //// introns | |
| 170 //if (nlhs==2) | |
| 171 //{ | |
| 172 // // use x = [introns{:}]' in matlab to get the array of introns | |
| 173 // const int cnum_rows = num_rows; | |
| 174 // plhs[1] = mxCreateCellArray(1, &cnum_rows); | |
| 175 // mxArray *intron_cell = plhs[1]; | |
| 176 // if (num_rows>0 && intron_cell==NULL) | |
| 177 // mexErrMsgTxt("Error allocating memory\n"); | |
| 178 // vector<int> intron_list; | |
| 179 // for (int i=0; i<reads.size(); i++) | |
| 180 // { | |
| 181 // reads[i]->get_introns(&intron_list); | |
| 182 // mxArray * mx_intron_list = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL); | |
| 183 // uint32_t *p_intron_list = (uint32_t*) mxGetData(mx_intron_list); | |
| 184 // | |
| 185 // for (int p = 0; p<intron_list.size(); p++) | |
| 186 // { | |
| 187 // p_intron_list[p] = intron_list[p]; | |
| 188 // } | |
| 189 // mxSetCell(intron_cell, i, mx_intron_list); | |
| 190 // intron_list.clear(); | |
| 191 // } | |
| 192 //} | |
| 193 for (int i=0; i<all_reads.size(); i++) | |
| 194 delete all_reads[i]; | |
| 195 | |
| 196 } | |
| 197 | 
