comparison rDiff/mex/get_reads.cpp @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:0f80a5141704
1 /* written by Jonas Behr, Regina Bohnert, Philipp Drewe and Gunnar Raetsch, FML Tuebingen, Germany, 2012 */
2
3 #include <stdio.h>
4 #include <signal.h>
5 #include <mex.h>
6 #include <vector>
7 using std::vector;
8 #include "get_reads_direct.h"
9 #include "mex_input.h"
10 #include "read.h"
11 #include <sys/types.h>
12 #define MAXLINE 10000
13
14 /*
15 * input:
16 * 1 bam file
17 * 2 chromosome
18 * 3 region start (1-based index)
19 * 4 region end (1-based index)
20 * 5 strand (either '+' or '-' or '0')
21 * [6] collapse flag: if true the reads are collapsed to a coverage track
22 * [7] subsample percentage: percentage of reads to be subsampled (in per mill)
23 * [8] intron length filter
24 * [9] exon length filter
25 * [10] mismatch filter
26 *
27 * output:
28 * 1 coverage
29 * [2] intron cell array
30 *
31 * example call:
32 * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30);
33 */
34 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
35
36 if (nrhs<5 || nrhs>10 || (nlhs!=1 && nlhs!=2)) {
37 fprintf(stderr, "usage: [x [introns]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches]);\n");
38 return;
39 }
40
41 /* obligatory arguments
42 * **********************/
43 char *fname = get_string(prhs[0]);
44 //fprintf(stdout, "arg1: %s\n", fname);
45 char *chr = get_string(prhs[1]);
46 //fprintf(stdout, "arg2: %s\n", chr);
47 int from_pos = get_int(prhs[2]);
48 //fprintf(stdout, "arg3: %d\n", from_pos);
49 int to_pos = get_int(prhs[3]);
50 //fprintf(stdout, "arg4: %d\n", to_pos);
51 char *strand = get_string(prhs[4]);
52 //fprintf(stdout, "arg5: %s\n", strand);
53
54 if (from_pos>to_pos)
55 mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n");
56
57 if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0')
58 mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0");
59
60 /* optional arguments
61 * ******************/
62 int collapse = 0;
63 if (nrhs>=6)
64 collapse = get_int(prhs[5]);
65
66 int subsample = 1000;
67 if (nrhs>=7)
68 subsample = get_int(prhs[6]);
69
70 int intron_len_filter = 1e9;
71 if (nrhs>=8)
72 intron_len_filter = get_int(prhs[7]);
73
74 int exon_len_filter = -1;
75 if (nrhs>=9)
76 exon_len_filter = get_int(prhs[8]);
77
78 int filter_mismatch = 1e9;
79 if (nrhs>=10)
80 filter_mismatch = get_int(prhs[9]);
81
82 /* call function to get reads
83 * **************************/
84 char region[MAXLINE];
85 sprintf(region, "%s:%i-%i", chr, from_pos, to_pos);
86
87 vector<CRead*> all_reads;
88
89 get_reads_from_bam(fname, region, &all_reads, strand[0], subsample);
90
91 /* filter reads
92 * **************/
93 vector<CRead*> reads;
94 for (int i=0; i<all_reads.size(); i++)
95 {
96 if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && all_reads[i]->multiple_alignment_index==0)
97 reads.push_back(all_reads[i]);
98 }
99
100
101 /* prepare output
102 * **************/
103 int num_rows = reads.size();
104 int num_pos = to_pos-from_pos+1;
105
106 // read coverages collapsed
107 if (collapse) {
108 plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL);
109 uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]);
110 if (num_pos>0 && mask_ret==NULL)
111 mexErrMsgTxt("Error allocating memory\n");
112 for (int i=0; i<reads.size(); i++)
113 {
114 reads[i]->get_coverage(from_pos, to_pos, mask_ret);
115 }
116 }
117 // reads not collapsed
118 else {
119 uint32_t nzmax = 0; // maximal number of nonzero elements
120 int len = to_pos-from_pos+1;
121 for (uint i=0; i<reads.size(); i++) {
122 for (uint n = 0; n < reads[i]->block_starts.size(); n++) {
123 uint32_t from, to;
124 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0)
125 from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos;
126 else
127 from = 0;
128 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0)
129 to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n];
130 else
131 to = 0;
132 for (int bp=from; bp<to&bp<len; bp++) {
133 nzmax++;
134 }
135 }
136 }
137 // 1st row: row indices
138 // 2nd row: column indices
139 plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL);
140 double *mask_ret = (double*) mxGetData(plhs[0]);
141 if (nzmax>0 && mask_ret==NULL)
142 mexErrMsgTxt("Error allocating memory\n");
143 uint32_t mask_ret_c = 0; // counter
144 for (uint i=0; i<reads.size(); i++)
145 {
146 reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i);
147 }
148 if (mask_ret_c!=2*nzmax)
149 mexErrMsgTxt("Error filling index arrays for sparse matrix\n");
150
151 }
152 // introns
153 if (nlhs==2)
154 {
155 vector<int> intron_list;
156 for (int i=0; i<reads.size(); i++)
157 {
158 reads[i]->get_introns(&intron_list);
159 }
160
161 plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
162 uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]);
163 for (int p = 0; p<intron_list.size(); p++)
164 {
165 p_intron_list[p] = intron_list[p];
166 }
167 intron_list.clear();
168 }
169 //// introns
170 //if (nlhs==2)
171 //{
172 // // use x = [introns{:}]' in matlab to get the array of introns
173 // const int cnum_rows = num_rows;
174 // plhs[1] = mxCreateCellArray(1, &cnum_rows);
175 // mxArray *intron_cell = plhs[1];
176 // if (num_rows>0 && intron_cell==NULL)
177 // mexErrMsgTxt("Error allocating memory\n");
178 // vector<int> intron_list;
179 // for (int i=0; i<reads.size(); i++)
180 // {
181 // reads[i]->get_introns(&intron_list);
182 // mxArray * mx_intron_list = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
183 // uint32_t *p_intron_list = (uint32_t*) mxGetData(mx_intron_list);
184 //
185 // for (int p = 0; p<intron_list.size(); p++)
186 // {
187 // p_intron_list[p] = intron_list[p];
188 // }
189 // mxSetCell(intron_cell, i, mx_intron_list);
190 // intron_list.clear();
191 // }
192 //}
193 for (int i=0; i<all_reads.size(); i++)
194 delete all_reads[i];
195
196 }
197