0
|
1 /* written by Jonas Behr, Regina Bohnert, Philipp Drewe and Gunnar Raetsch, FML Tuebingen, Germany, 2012 */
|
|
2
|
|
3 #include <stdio.h>
|
|
4 #include <signal.h>
|
|
5 #include <mex.h>
|
|
6 #include <vector>
|
|
7 using std::vector;
|
|
8 #include "get_reads_direct.h"
|
|
9 #include "mex_input.h"
|
|
10 #include "read.h"
|
|
11 #include <sys/types.h>
|
|
12 #define MAXLINE 10000
|
|
13
|
|
14 /*
|
|
15 * input:
|
|
16 * 1 bam file
|
|
17 * 2 chromosome
|
|
18 * 3 region start (1-based index)
|
|
19 * 4 region end (1-based index)
|
|
20 * 5 strand (either '+' or '-' or '0')
|
|
21 * [6] collapse flag: if true the reads are collapsed to a coverage track
|
|
22 * [7] subsample percentage: percentage of reads to be subsampled (in per mill)
|
|
23 * [8] intron length filter
|
|
24 * [9] exon length filter
|
|
25 * [10] mismatch filter
|
|
26 *
|
|
27 * output:
|
|
28 * 1 coverage
|
|
29 * [2] intron cell array
|
|
30 *
|
|
31 * example call:
|
|
32 * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30);
|
|
33 */
|
|
34 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
|
|
35
|
|
36 if (nrhs<5 || nrhs>10 || (nlhs!=1 && nlhs!=2)) {
|
|
37 fprintf(stderr, "usage: [x [introns]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches]);\n");
|
|
38 return;
|
|
39 }
|
|
40
|
|
41 /* obligatory arguments
|
|
42 * **********************/
|
|
43 char *fname = get_string(prhs[0]);
|
|
44 //fprintf(stdout, "arg1: %s\n", fname);
|
|
45 char *chr = get_string(prhs[1]);
|
|
46 //fprintf(stdout, "arg2: %s\n", chr);
|
|
47 int from_pos = get_int(prhs[2]);
|
|
48 //fprintf(stdout, "arg3: %d\n", from_pos);
|
|
49 int to_pos = get_int(prhs[3]);
|
|
50 //fprintf(stdout, "arg4: %d\n", to_pos);
|
|
51 char *strand = get_string(prhs[4]);
|
|
52 //fprintf(stdout, "arg5: %s\n", strand);
|
|
53
|
|
54 if (from_pos>to_pos)
|
|
55 mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n");
|
|
56
|
|
57 if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0')
|
|
58 mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0");
|
|
59
|
|
60 /* optional arguments
|
|
61 * ******************/
|
|
62 int collapse = 0;
|
|
63 if (nrhs>=6)
|
|
64 collapse = get_int(prhs[5]);
|
|
65
|
|
66 int subsample = 1000;
|
|
67 if (nrhs>=7)
|
|
68 subsample = get_int(prhs[6]);
|
|
69
|
|
70 int intron_len_filter = 1e9;
|
|
71 if (nrhs>=8)
|
|
72 intron_len_filter = get_int(prhs[7]);
|
|
73
|
|
74 int exon_len_filter = -1;
|
|
75 if (nrhs>=9)
|
|
76 exon_len_filter = get_int(prhs[8]);
|
|
77
|
|
78 int filter_mismatch = 1e9;
|
|
79 if (nrhs>=10)
|
|
80 filter_mismatch = get_int(prhs[9]);
|
|
81
|
|
82 /* call function to get reads
|
|
83 * **************************/
|
|
84 char region[MAXLINE];
|
|
85 sprintf(region, "%s:%i-%i", chr, from_pos, to_pos);
|
|
86
|
|
87 vector<CRead*> all_reads;
|
|
88
|
|
89 get_reads_from_bam(fname, region, &all_reads, strand[0], subsample);
|
|
90
|
|
91 /* filter reads
|
|
92 * **************/
|
|
93 vector<CRead*> reads;
|
|
94 for (int i=0; i<all_reads.size(); i++)
|
|
95 {
|
|
96 if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && all_reads[i]->multiple_alignment_index==0)
|
|
97 reads.push_back(all_reads[i]);
|
|
98 }
|
|
99
|
|
100
|
|
101 /* prepare output
|
|
102 * **************/
|
|
103 int num_rows = reads.size();
|
|
104 int num_pos = to_pos-from_pos+1;
|
|
105
|
|
106 // read coverages collapsed
|
|
107 if (collapse) {
|
|
108 plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL);
|
|
109 uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]);
|
|
110 if (num_pos>0 && mask_ret==NULL)
|
|
111 mexErrMsgTxt("Error allocating memory\n");
|
|
112 for (int i=0; i<reads.size(); i++)
|
|
113 {
|
|
114 reads[i]->get_coverage(from_pos, to_pos, mask_ret);
|
|
115 }
|
|
116 }
|
|
117 // reads not collapsed
|
|
118 else {
|
|
119 uint32_t nzmax = 0; // maximal number of nonzero elements
|
|
120 int len = to_pos-from_pos+1;
|
|
121 for (uint i=0; i<reads.size(); i++) {
|
|
122 for (uint n = 0; n < reads[i]->block_starts.size(); n++) {
|
|
123 uint32_t from, to;
|
|
124 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0)
|
|
125 from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos;
|
|
126 else
|
|
127 from = 0;
|
|
128 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0)
|
|
129 to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n];
|
|
130 else
|
|
131 to = 0;
|
|
132 for (int bp=from; bp<to&bp<len; bp++) {
|
|
133 nzmax++;
|
|
134 }
|
|
135 }
|
|
136 }
|
|
137 // 1st row: row indices
|
|
138 // 2nd row: column indices
|
|
139 plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL);
|
|
140 double *mask_ret = (double*) mxGetData(plhs[0]);
|
|
141 if (nzmax>0 && mask_ret==NULL)
|
|
142 mexErrMsgTxt("Error allocating memory\n");
|
|
143 uint32_t mask_ret_c = 0; // counter
|
|
144 for (uint i=0; i<reads.size(); i++)
|
|
145 {
|
|
146 reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i);
|
|
147 }
|
|
148 if (mask_ret_c!=2*nzmax)
|
|
149 mexErrMsgTxt("Error filling index arrays for sparse matrix\n");
|
|
150
|
|
151 }
|
|
152 // introns
|
|
153 if (nlhs==2)
|
|
154 {
|
|
155 vector<int> intron_list;
|
|
156 for (int i=0; i<reads.size(); i++)
|
|
157 {
|
|
158 reads[i]->get_introns(&intron_list);
|
|
159 }
|
|
160
|
|
161 plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
|
|
162 uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]);
|
|
163 for (int p = 0; p<intron_list.size(); p++)
|
|
164 {
|
|
165 p_intron_list[p] = intron_list[p];
|
|
166 }
|
|
167 intron_list.clear();
|
|
168 }
|
|
169 //// introns
|
|
170 //if (nlhs==2)
|
|
171 //{
|
|
172 // // use x = [introns{:}]' in matlab to get the array of introns
|
|
173 // const int cnum_rows = num_rows;
|
|
174 // plhs[1] = mxCreateCellArray(1, &cnum_rows);
|
|
175 // mxArray *intron_cell = plhs[1];
|
|
176 // if (num_rows>0 && intron_cell==NULL)
|
|
177 // mexErrMsgTxt("Error allocating memory\n");
|
|
178 // vector<int> intron_list;
|
|
179 // for (int i=0; i<reads.size(); i++)
|
|
180 // {
|
|
181 // reads[i]->get_introns(&intron_list);
|
|
182 // mxArray * mx_intron_list = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
|
|
183 // uint32_t *p_intron_list = (uint32_t*) mxGetData(mx_intron_list);
|
|
184 //
|
|
185 // for (int p = 0; p<intron_list.size(); p++)
|
|
186 // {
|
|
187 // p_intron_list[p] = intron_list[p];
|
|
188 // }
|
|
189 // mxSetCell(intron_cell, i, mx_intron_list);
|
|
190 // intron_list.clear();
|
|
191 // }
|
|
192 //}
|
|
193 for (int i=0; i<all_reads.size(); i++)
|
|
194 delete all_reads[i];
|
|
195
|
|
196 }
|
|
197
|