Mercurial > repos > vipints > rdiff
comparison rDiff/mex/get_reads.cpp @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0f80a5141704 |
---|---|
1 /* written by Jonas Behr, Regina Bohnert, Philipp Drewe and Gunnar Raetsch, FML Tuebingen, Germany, 2012 */ | |
2 | |
3 #include <stdio.h> | |
4 #include <signal.h> | |
5 #include <mex.h> | |
6 #include <vector> | |
7 using std::vector; | |
8 #include "get_reads_direct.h" | |
9 #include "mex_input.h" | |
10 #include "read.h" | |
11 #include <sys/types.h> | |
12 #define MAXLINE 10000 | |
13 | |
14 /* | |
15 * input: | |
16 * 1 bam file | |
17 * 2 chromosome | |
18 * 3 region start (1-based index) | |
19 * 4 region end (1-based index) | |
20 * 5 strand (either '+' or '-' or '0') | |
21 * [6] collapse flag: if true the reads are collapsed to a coverage track | |
22 * [7] subsample percentage: percentage of reads to be subsampled (in per mill) | |
23 * [8] intron length filter | |
24 * [9] exon length filter | |
25 * [10] mismatch filter | |
26 * | |
27 * output: | |
28 * 1 coverage | |
29 * [2] intron cell array | |
30 * | |
31 * example call: | |
32 * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30); | |
33 */ | |
34 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { | |
35 | |
36 if (nrhs<5 || nrhs>10 || (nlhs!=1 && nlhs!=2)) { | |
37 fprintf(stderr, "usage: [x [introns]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches]);\n"); | |
38 return; | |
39 } | |
40 | |
41 /* obligatory arguments | |
42 * **********************/ | |
43 char *fname = get_string(prhs[0]); | |
44 //fprintf(stdout, "arg1: %s\n", fname); | |
45 char *chr = get_string(prhs[1]); | |
46 //fprintf(stdout, "arg2: %s\n", chr); | |
47 int from_pos = get_int(prhs[2]); | |
48 //fprintf(stdout, "arg3: %d\n", from_pos); | |
49 int to_pos = get_int(prhs[3]); | |
50 //fprintf(stdout, "arg4: %d\n", to_pos); | |
51 char *strand = get_string(prhs[4]); | |
52 //fprintf(stdout, "arg5: %s\n", strand); | |
53 | |
54 if (from_pos>to_pos) | |
55 mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n"); | |
56 | |
57 if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0') | |
58 mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0"); | |
59 | |
60 /* optional arguments | |
61 * ******************/ | |
62 int collapse = 0; | |
63 if (nrhs>=6) | |
64 collapse = get_int(prhs[5]); | |
65 | |
66 int subsample = 1000; | |
67 if (nrhs>=7) | |
68 subsample = get_int(prhs[6]); | |
69 | |
70 int intron_len_filter = 1e9; | |
71 if (nrhs>=8) | |
72 intron_len_filter = get_int(prhs[7]); | |
73 | |
74 int exon_len_filter = -1; | |
75 if (nrhs>=9) | |
76 exon_len_filter = get_int(prhs[8]); | |
77 | |
78 int filter_mismatch = 1e9; | |
79 if (nrhs>=10) | |
80 filter_mismatch = get_int(prhs[9]); | |
81 | |
82 /* call function to get reads | |
83 * **************************/ | |
84 char region[MAXLINE]; | |
85 sprintf(region, "%s:%i-%i", chr, from_pos, to_pos); | |
86 | |
87 vector<CRead*> all_reads; | |
88 | |
89 get_reads_from_bam(fname, region, &all_reads, strand[0], subsample); | |
90 | |
91 /* filter reads | |
92 * **************/ | |
93 vector<CRead*> reads; | |
94 for (int i=0; i<all_reads.size(); i++) | |
95 { | |
96 if (all_reads[i]->max_intron_len()<intron_len_filter && all_reads[i]->min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch && all_reads[i]->multiple_alignment_index==0) | |
97 reads.push_back(all_reads[i]); | |
98 } | |
99 | |
100 | |
101 /* prepare output | |
102 * **************/ | |
103 int num_rows = reads.size(); | |
104 int num_pos = to_pos-from_pos+1; | |
105 | |
106 // read coverages collapsed | |
107 if (collapse) { | |
108 plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL); | |
109 uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]); | |
110 if (num_pos>0 && mask_ret==NULL) | |
111 mexErrMsgTxt("Error allocating memory\n"); | |
112 for (int i=0; i<reads.size(); i++) | |
113 { | |
114 reads[i]->get_coverage(from_pos, to_pos, mask_ret); | |
115 } | |
116 } | |
117 // reads not collapsed | |
118 else { | |
119 uint32_t nzmax = 0; // maximal number of nonzero elements | |
120 int len = to_pos-from_pos+1; | |
121 for (uint i=0; i<reads.size(); i++) { | |
122 for (uint n = 0; n < reads[i]->block_starts.size(); n++) { | |
123 uint32_t from, to; | |
124 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0) | |
125 from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos; | |
126 else | |
127 from = 0; | |
128 if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0) | |
129 to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n]; | |
130 else | |
131 to = 0; | |
132 for (int bp=from; bp<to&bp<len; bp++) { | |
133 nzmax++; | |
134 } | |
135 } | |
136 } | |
137 // 1st row: row indices | |
138 // 2nd row: column indices | |
139 plhs[0] = mxCreateDoubleMatrix(2, nzmax, mxREAL); | |
140 double *mask_ret = (double*) mxGetData(plhs[0]); | |
141 if (nzmax>0 && mask_ret==NULL) | |
142 mexErrMsgTxt("Error allocating memory\n"); | |
143 uint32_t mask_ret_c = 0; // counter | |
144 for (uint i=0; i<reads.size(); i++) | |
145 { | |
146 reads[i]->get_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i); | |
147 } | |
148 if (mask_ret_c!=2*nzmax) | |
149 mexErrMsgTxt("Error filling index arrays for sparse matrix\n"); | |
150 | |
151 } | |
152 // introns | |
153 if (nlhs==2) | |
154 { | |
155 vector<int> intron_list; | |
156 for (int i=0; i<reads.size(); i++) | |
157 { | |
158 reads[i]->get_introns(&intron_list); | |
159 } | |
160 | |
161 plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL); | |
162 uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]); | |
163 for (int p = 0; p<intron_list.size(); p++) | |
164 { | |
165 p_intron_list[p] = intron_list[p]; | |
166 } | |
167 intron_list.clear(); | |
168 } | |
169 //// introns | |
170 //if (nlhs==2) | |
171 //{ | |
172 // // use x = [introns{:}]' in matlab to get the array of introns | |
173 // const int cnum_rows = num_rows; | |
174 // plhs[1] = mxCreateCellArray(1, &cnum_rows); | |
175 // mxArray *intron_cell = plhs[1]; | |
176 // if (num_rows>0 && intron_cell==NULL) | |
177 // mexErrMsgTxt("Error allocating memory\n"); | |
178 // vector<int> intron_list; | |
179 // for (int i=0; i<reads.size(); i++) | |
180 // { | |
181 // reads[i]->get_introns(&intron_list); | |
182 // mxArray * mx_intron_list = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL); | |
183 // uint32_t *p_intron_list = (uint32_t*) mxGetData(mx_intron_list); | |
184 // | |
185 // for (int p = 0; p<intron_list.size(); p++) | |
186 // { | |
187 // p_intron_list[p] = intron_list[p]; | |
188 // } | |
189 // mxSetCell(intron_cell, i, mx_intron_list); | |
190 // intron_list.clear(); | |
191 // } | |
192 //} | |
193 for (int i=0; i<all_reads.size(); i++) | |
194 delete all_reads[i]; | |
195 | |
196 } | |
197 |