diff deseq-hts_2.0/mex/interval_overlap.cpp @ 10:2fe512c7bfdf draft

DESeq2 version 1.0.19 added to the repo
author vipints <vipin@cbio.mskcc.org>
date Tue, 08 Oct 2013 08:15:34 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_2.0/mex/interval_overlap.cpp	Tue Oct 08 08:15:34 2013 -0400
@@ -0,0 +1,217 @@
+/*
+*  This program is free software; you can redistribute it and/or modify
+*  it under the terms of the GNU General Public License as published by
+*  the Free Software Foundation; either version 3 of the License, or
+*  (at your option) any later version.
+*
+*   Written (W) 2010-2011 Jonas Behr
+*   Copyright (C) 2010-2011 Max Planck Society
+*/
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <math.h>
+#include <limits>
+#include <mex.h>
+#include <assert.h>
+#include <vector>
+  using std::vector;
+#include <algorithm>
+  using std::sort;
+  using std::min;
+  using std::max;
+
+typedef struct {
+    int start;
+    int stop;
+	int idx;
+	int set_id;
+} interval_t;
+
+bool compare (interval_t i, interval_t j) 
+{ 
+	return (i.start<j.start); 
+}
+
+bool overlaps(interval_t a, interval_t b)
+{
+	int v = min(a.stop,b.stop) - max(a.start,b.start) + 1;
+	return (v >= 1);
+}
+bool leftOf(interval_t a, interval_t b)
+{
+	return (a.stop < b.start);
+}
+
+void scan(interval_t f, vector<interval_t>* Wf, interval_t g, vector<interval_t>* Wg, vector<int>* overlap)
+{
+	vector<interval_t>::iterator i;
+	i=Wg->begin();
+	while (i<Wg->end())
+	{
+		interval_t g2 = *i;
+		if (leftOf(g2,f))
+		{
+			Wg->erase(i);// inefficient if Wg is large
+			// this moves all elements, therefore i is not incremented
+		}
+		else if (overlaps(g2,f))
+		{
+			if (g2.set_id==1)
+			{
+				//printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", g2.idx, g2.start, g2.stop, f.idx, f.start, f.stop);
+				overlap->push_back(g2.idx);
+				overlap->push_back(f.idx);
+			}
+			else if (f.set_id==1)
+			{
+				//printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", f.idx, f.start, f.stop, g2.idx, g2.start, g2.stop);
+				overlap->push_back(f.idx);
+				overlap->push_back(g2.idx);
+			}	
+			i++;
+		}
+		else
+		{
+			printf("never happens??\n");
+			i++;
+		}
+	}
+	if (!leftOf(f, g))
+	{
+		Wf->push_back(f);
+		//printf("push: [%i, %i] size:%i\n", f.start, f.stop, Wf->size());
+	}
+}
+
+/*
+ * prhs[0] first interval set starts
+ * prhs[1] first interval set stops
+ * prhs[2] second interval set starts
+ * prhs[3] second interval set stops
+ *
+ * return:
+ * plhs[0] one based index in first interval set overlapping with a interval in the second set
+ * plhs[1] corresponding index in the second set
+ *
+*/
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{
+	if (nrhs!=4)
+		mexErrMsgTxt("Expected 4 arguments: starts1, stops1, starts2, stops2 \n");
+	if (nlhs!=2)
+		mexErrMsgTxt("Expected 2 output arguments \n");
+
+	int num_intervals1 = mxGetNumberOfElements(prhs[0]);
+	assert(num_intervals1 == mxGetNumberOfElements(prhs[1]));
+	int num_intervals2 = mxGetNumberOfElements(prhs[2]);
+	assert(num_intervals2 == mxGetNumberOfElements(prhs[3]));
+	
+	//printf("num_intervals1: %i\n", num_intervals1);
+	//printf("num_intervals2: %i\n", num_intervals2);
+
+	double* starts1 = mxGetPr(prhs[0]);
+	double* stops1  = mxGetPr(prhs[1]);
+	double* starts2 = mxGetPr(prhs[2]);
+	double* stops2  = mxGetPr(prhs[3]);
+	
+	vector<interval_t> intervals1;
+	for (int i=0; i<num_intervals1; i++)
+	{
+		interval_t interval;
+		interval.start = starts1[i];
+		interval.stop = stops1[i];
+		interval.set_id = 1;
+		interval.idx = i+1;
+		intervals1.push_back(interval);
+		//printf("int1: [%i, %i] \n",intervals1[i].start, intervals1[i].stop);
+	}
+	interval_t i;
+	i.start = std::numeric_limits<int>::max();
+	i.stop = std::numeric_limits<int>::max();
+	i.set_id = std::numeric_limits<int>::max();
+	i.idx = std::numeric_limits<int>::max();
+	intervals1.push_back(i);
+
+	//printf("num_intervals1: %i\n", intervals1.size());
+	vector<interval_t> intervals2;
+	for (int i=0; i<num_intervals2; i++)
+	{
+		interval_t interval;
+		interval.start = starts2[i];
+		interval.stop = stops2[i];
+		interval.set_id = 2;
+		interval.idx = i+1;
+		intervals2.push_back(interval);
+		//printf("int2: [%i, %i] \n",intervals2[i].start, intervals2[i].stop);
+	}
+	intervals2.push_back(i);
+	//printf("num_intervals2: %i\n", intervals2.size());
+
+	sort(intervals1.begin(), intervals1.end(), compare);	
+	sort(intervals2.begin(), intervals2.end(), compare);	
+
+
+	vector<int> overlap;
+	vector<interval_t> Wx;
+	vector<interval_t> Wy;
+	vector<interval_t>::iterator x = intervals1.begin();
+	vector<interval_t>::iterator y = intervals2.begin();
+	while (x<intervals1.end() && y<intervals2.end())
+	{
+		//vector<interval_t>::iterator x;
+		//vector<interval_t>::iterator y;
+		//if (it1>intervals1.end())
+		//	x = inf_interval();
+		//else
+		//	x = it1;
+		//if (it2>intervals2.end())
+		//	y = inf_interval();
+		//else
+		//	y=it2;
+
+		if (x->start <= y->start)
+		{
+				scan(*x, &Wx, *y, &Wy, &overlap);
+				x++;
+		}
+		else
+		{
+			if (y<=intervals2.end())
+			{
+				scan(*y, &Wy, *x, &Wx, &overlap);
+				y++;
+			}
+		}
+	}
+
+	plhs[0] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL);
+    double* idx1 = mxGetPr(plhs[0]);
+
+	plhs[1] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL);
+    double* idx2 = mxGetPr(plhs[1]);
+
+	for (int i=0; i<overlap.size(); i+=2)
+	{
+		//printf("ov: %i [%i, %i] \n", i, overlap[i], overlap[i+1]);
+		idx1[i/2] = (double) overlap[i];
+		idx2[i/2] = (double) overlap[i+1];
+	}
+}
+
+
+
+
+