annotate rDiff/mex/get_reads.cpp @ 0:0f80a5141704

version 0.3 uploaded
author vipints
date Thu, 14 Feb 2013 23:38:36 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
1 /* written by Jonas Behr, Regina Bohnert, Philipp Drewe and Gunnar Raetsch, FML Tuebingen, Germany, 2012 */
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
2
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
3 #include <stdio.h>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
4 #include <signal.h>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
5 #include <mex.h>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
6 #include <vector>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
7 using std::vector;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
8 #include "get_reads_direct.h"
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
9 #include "mex_input.h"
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
10 #include "read.h"
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
11 #include <sys/types.h>
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
12 #define MAXLINE 10000
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
13
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
14 /*
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
15 * input:
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
16 * 1 bam file
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
17 * 2 chromosome
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
18 * 3 region start (1-based index)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
19 * 4 region end (1-based index)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
20 * 5 strand (either '+' or '-' or '0')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
21 * [6] collapse flag: if true the reads are collapsed to a coverage track
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
22 * [7] subsample percentage: percentage of reads to be subsampled (in per mill)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
23 * [8] intron length filter
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
24 * [9] exon length filter
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
25 * [10] mismatch filter
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
26 *
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
27 * output:
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
28 * 1 coverage
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
29 * [2] intron cell array
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
30 *
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
31 * example call:
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
32 * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
33 */
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
34 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
35
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
36 if (nrhs<5 || nrhs>10 || (nlhs!=1 && nlhs!=2)) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
37 fprintf(stderr, "usage: [x [introns]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches]);\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
38 return;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
39 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
40
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
41 /* obligatory arguments
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
42 * **********************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
43 char *fname = get_string(prhs[0]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
44 //fprintf(stdout, "arg1: %s\n", fname);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
45 char *chr = get_string(prhs[1]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
46 //fprintf(stdout, "arg2: %s\n", chr);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
47 int from_pos = get_int(prhs[2]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
48 //fprintf(stdout, "arg3: %d\n", from_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
49 int to_pos = get_int(prhs[3]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
50 //fprintf(stdout, "arg4: %d\n", to_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
51 char *strand = get_string(prhs[4]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
52 //fprintf(stdout, "arg5: %s\n", strand);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
53
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
54 if (from_pos>to_pos)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
55 mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
56
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
57 if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0')
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
58 mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
59
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
60 /* optional arguments
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
61 * ******************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
62 int collapse = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
63 if (nrhs>=6)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
64 collapse = get_int(prhs[5]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
65
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
66 int subsample = 1000;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
67 if (nrhs>=7)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
68 subsample = get_int(prhs[6]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
69
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
70 int intron_len_filter = 1e9;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
71 if (nrhs>=8)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
72 intron_len_filter = get_int(prhs[7]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
73
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
74 int exon_len_filter = -1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
75 if (nrhs>=9)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
76 exon_len_filter = get_int(prhs[8]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
77
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
78 int filter_mismatch = 1e9;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
79 if (nrhs>=10)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
80 filter_mismatch = get_int(prhs[9]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
81
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
82 /* call function to get reads
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
83 * **************************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
84 char region[MAXLINE];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
85 sprintf(region, "%s:%i-%i", chr, from_pos, to_pos);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
86
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
87 vector<CRead*> all_reads;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
88
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
89 get_reads_from_bam(fname, region, &all_reads, strand[0], subsample);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
90
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
91 /* filter reads
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
92 * **************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
93 vector<CRead*> reads;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
94 for (int i=0; i<all_reads.size(); i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
95 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
96 if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && all_reads[i]->multiple_alignment_index==0)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
97 reads.push_back(all_reads[i]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
98 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
99
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
100
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
101 /* prepare output
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
102 * **************/
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
103 int num_rows = reads.size();
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
104 int num_pos = to_pos-from_pos+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
105
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
106 // read coverages collapsed
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
107 if (collapse) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
108 plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
109 uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
110 if (num_pos>0 && mask_ret==NULL)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
111 mexErrMsgTxt("Error allocating memory\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
112 for (int i=0; i<reads.size(); i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
113 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
114 reads[i]->get_coverage(from_pos, to_pos, mask_ret);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
115 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
116 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
117 // reads not collapsed
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
118 else {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
119 uint32_t nzmax = 0; // maximal number of nonzero elements
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
120 int len = to_pos-from_pos+1;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
121 for (uint i=0; i<reads.size(); i++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
122 for (uint n = 0; n < reads[i]->block_starts.size(); n++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
123 uint32_t from, to;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
124 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
125 from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
126 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
127 from = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
128 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
129 to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
130 else
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
131 to = 0;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
132 for (int bp=from; bp<to&bp<len; bp++) {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
133 nzmax++;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
134 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
135 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
136 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
137 // 1st row: row indices
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
138 // 2nd row: column indices
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
139 plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
140 double *mask_ret = (double*) mxGetData(plhs[0]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
141 if (nzmax>0 && mask_ret==NULL)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
142 mexErrMsgTxt("Error allocating memory\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
143 uint32_t mask_ret_c = 0; // counter
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
144 for (uint i=0; i<reads.size(); i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
145 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
146 reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
147 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
148 if (mask_ret_c!=2*nzmax)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
149 mexErrMsgTxt("Error filling index arrays for sparse matrix\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
150
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
151 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
152 // introns
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
153 if (nlhs==2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
154 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
155 vector<int> intron_list;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
156 for (int i=0; i<reads.size(); i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
157 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
158 reads[i]->get_introns(&intron_list);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
159 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
160
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
161 plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
162 uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
163 for (int p = 0; p<intron_list.size(); p++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
164 {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
165 p_intron_list[p] = intron_list[p];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
166 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
167 intron_list.clear();
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
168 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
169 //// introns
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
170 //if (nlhs==2)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
171 //{
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
172 // // use x = [introns{:}]' in matlab to get the array of introns
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
173 // const int cnum_rows = num_rows;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
174 // plhs[1] = mxCreateCellArray(1, &cnum_rows);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
175 // mxArray *intron_cell = plhs[1];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
176 // if (num_rows>0 && intron_cell==NULL)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
177 // mexErrMsgTxt("Error allocating memory\n");
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
178 // vector<int> intron_list;
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
179 // for (int i=0; i<reads.size(); i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
180 // {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
181 // reads[i]->get_introns(&intron_list);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
182 // mxArray * mx_intron_list = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
183 // uint32_t *p_intron_list = (uint32_t*) mxGetData(mx_intron_list);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
184 //
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
185 // for (int p = 0; p<intron_list.size(); p++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
186 // {
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
187 // p_intron_list[p] = intron_list[p];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
188 // }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
189 // mxSetCell(intron_cell, i, mx_intron_list);
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
190 // intron_list.clear();
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
191 // }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
192 //}
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
193 for (int i=0; i<all_reads.size(); i++)
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
194 delete all_reads[i];
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
195
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
196 }
0f80a5141704 version 0.3 uploaded
vipints
parents:
diff changeset
197