comparison deseq-hts_2.0/mex/interval_overlap.cpp @ 10:2fe512c7bfdf draft

DESeq2 version 1.0.19 added to the repo
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 08:15:34 -0400
parents
children
comparison
equal deleted inserted replaced
9:e27b4f7811c2 10:2fe512c7bfdf
1 /*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 3 of the License, or
5 * (at your option) any later version.
6 *
7 * Written (W) 2010-2011 Jonas Behr
8 * Copyright (C) 2010-2011 Max Planck Society
9 */
10
11
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <errno.h>
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <fcntl.h>
18 #include <ctype.h>
19 #include <sys/stat.h>
20 #include <stdlib.h>
21 #include <unistd.h>
22 #include <string.h>
23 #include <sys/mman.h>
24 #include <time.h>
25 #include <math.h>
26 #include <limits>
27 #include <mex.h>
28 #include <assert.h>
29 #include <vector>
30 using std::vector;
31 #include <algorithm>
32 using std::sort;
33 using std::min;
34 using std::max;
35
36 typedef struct {
37 int start;
38 int stop;
39 int idx;
40 int set_id;
41 } interval_t;
42
43 bool compare (interval_t i, interval_t j)
44 {
45 return (i.start<j.start);
46 }
47
48 bool overlaps(interval_t a, interval_t b)
49 {
50 int v = min(a.stop,b.stop) - max(a.start,b.start) + 1;
51 return (v >= 1);
52 }
53 bool leftOf(interval_t a, interval_t b)
54 {
55 return (a.stop < b.start);
56 }
57
58 void scan(interval_t f, vector<interval_t>* Wf, interval_t g, vector<interval_t>* Wg, vector<int>* overlap)
59 {
60 vector<interval_t>::iterator i;
61 i=Wg->begin();
62 while (i<Wg->end())
63 {
64 interval_t g2 = *i;
65 if (leftOf(g2,f))
66 {
67 Wg->erase(i);// inefficient if Wg is large
68 // this moves all elements, therefore i is not incremented
69 }
70 else if (overlaps(g2,f))
71 {
72 if (g2.set_id==1)
73 {
74 //printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", g2.idx, g2.start, g2.stop, f.idx, f.start, f.stop);
75 overlap->push_back(g2.idx);
76 overlap->push_back(f.idx);
77 }
78 else if (f.set_id==1)
79 {
80 //printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", f.idx, f.start, f.stop, g2.idx, g2.start, g2.stop);
81 overlap->push_back(f.idx);
82 overlap->push_back(g2.idx);
83 }
84 i++;
85 }
86 else
87 {
88 printf("never happens??\n");
89 i++;
90 }
91 }
92 if (!leftOf(f, g))
93 {
94 Wf->push_back(f);
95 //printf("push: [%i, %i] size:%i\n", f.start, f.stop, Wf->size());
96 }
97 }
98
99 /*
100 * prhs[0] first interval set starts
101 * prhs[1] first interval set stops
102 * prhs[2] second interval set starts
103 * prhs[3] second interval set stops
104 *
105 * return:
106 * plhs[0] one based index in first interval set overlapping with a interval in the second set
107 * plhs[1] corresponding index in the second set
108 *
109 */
110 void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
111 {
112 if (nrhs!=4)
113 mexErrMsgTxt("Expected 4 arguments: starts1, stops1, starts2, stops2 \n");
114 if (nlhs!=2)
115 mexErrMsgTxt("Expected 2 output arguments \n");
116
117 int num_intervals1 = mxGetNumberOfElements(prhs[0]);
118 assert(num_intervals1 == mxGetNumberOfElements(prhs[1]));
119 int num_intervals2 = mxGetNumberOfElements(prhs[2]);
120 assert(num_intervals2 == mxGetNumberOfElements(prhs[3]));
121
122 //printf("num_intervals1: %i\n", num_intervals1);
123 //printf("num_intervals2: %i\n", num_intervals2);
124
125 double* starts1 = mxGetPr(prhs[0]);
126 double* stops1 = mxGetPr(prhs[1]);
127 double* starts2 = mxGetPr(prhs[2]);
128 double* stops2 = mxGetPr(prhs[3]);
129
130 vector<interval_t> intervals1;
131 for (int i=0; i<num_intervals1; i++)
132 {
133 interval_t interval;
134 interval.start = starts1[i];
135 interval.stop = stops1[i];
136 interval.set_id = 1;
137 interval.idx = i+1;
138 intervals1.push_back(interval);
139 //printf("int1: [%i, %i] \n",intervals1[i].start, intervals1[i].stop);
140 }
141 interval_t i;
142 i.start = std::numeric_limits<int>::max();
143 i.stop = std::numeric_limits<int>::max();
144 i.set_id = std::numeric_limits<int>::max();
145 i.idx = std::numeric_limits<int>::max();
146 intervals1.push_back(i);
147
148 //printf("num_intervals1: %i\n", intervals1.size());
149 vector<interval_t> intervals2;
150 for (int i=0; i<num_intervals2; i++)
151 {
152 interval_t interval;
153 interval.start = starts2[i];
154 interval.stop = stops2[i];
155 interval.set_id = 2;
156 interval.idx = i+1;
157 intervals2.push_back(interval);
158 //printf("int2: [%i, %i] \n",intervals2[i].start, intervals2[i].stop);
159 }
160 intervals2.push_back(i);
161 //printf("num_intervals2: %i\n", intervals2.size());
162
163 sort(intervals1.begin(), intervals1.end(), compare);
164 sort(intervals2.begin(), intervals2.end(), compare);
165
166
167 vector<int> overlap;
168 vector<interval_t> Wx;
169 vector<interval_t> Wy;
170 vector<interval_t>::iterator x = intervals1.begin();
171 vector<interval_t>::iterator y = intervals2.begin();
172 while (x<intervals1.end() && y<intervals2.end())
173 {
174 //vector<interval_t>::iterator x;
175 //vector<interval_t>::iterator y;
176 //if (it1>intervals1.end())
177 // x = inf_interval();
178 //else
179 // x = it1;
180 //if (it2>intervals2.end())
181 // y = inf_interval();
182 //else
183 // y=it2;
184
185 if (x->start <= y->start)
186 {
187 scan(*x, &Wx, *y, &Wy, &overlap);
188 x++;
189 }
190 else
191 {
192 if (y<=intervals2.end())
193 {
194 scan(*y, &Wy, *x, &Wx, &overlap);
195 y++;
196 }
197 }
198 }
199
200 plhs[0] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL);
201 double* idx1 = mxGetPr(plhs[0]);
202
203 plhs[1] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL);
204 double* idx2 = mxGetPr(plhs[1]);
205
206 for (int i=0; i<overlap.size(); i+=2)
207 {
208 //printf("ov: %i [%i, %i] \n", i, overlap[i], overlap[i+1]);
209 idx1[i/2] = (double) overlap[i];
210 idx2[i/2] = (double) overlap[i+1];
211 }
212 }
213
214
215
216
217