diff PsiCLASS-1.0.2/SubexonCorrelation.hpp @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PsiCLASS-1.0.2/SubexonCorrelation.hpp	Fri Mar 26 16:52:45 2021 +0000
@@ -0,0 +1,270 @@
+#ifndef _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER
+#define _MOURISL_CLASSES_SUBEXONCORRELATION_HEADER
+
+#include "SubexonGraph.hpp"
+
+#include <stdio.h>
+#include <math.h>
+
+class SubexonCorrelation
+{
+private:
+	struct _subexon *lastSubexons ;
+	std::vector<FILE *> fileList ;
+	int offset ;
+	int prevSeCnt ;
+	int seCnt ;
+
+	double **correlation ;
+		
+	int OverlapSize( int s0, int e0, int s1, int e1 )
+	{
+		int s = -1, e = -1 ;
+		if ( e0 < s1 || s0 > e1 )
+			return 0 ;
+		s = s0 > s1 ? s0 : s1 ;
+		e = e0 < e1 ? e0 : e1 ;
+		return e - s + 1 ;
+	}
+public:
+	SubexonCorrelation() 
+	{
+		offset = 0 ;
+		lastSubexons = NULL ;
+		correlation = NULL ;
+		prevSeCnt = 0 ;
+	}
+
+	~SubexonCorrelation()
+	{
+		int cnt = fileList.size() ;
+		int i ;
+		for ( i = 0 ; i < cnt ; ++i )
+			fclose( fileList[i] ) ;
+		delete[] lastSubexons ;
+	}
+
+	void Initialize( char *f )
+	{
+		FILE *fpSl ;
+		fpSl = fopen( f, "r" ) ;
+		char buffer[1024] ;
+		while ( fgets( buffer, sizeof( buffer ), fpSl ) != NULL )
+		{
+			int len = strlen( buffer ) ;
+			if ( buffer[len - 1] == '\n' )
+			{
+				buffer[len - 1] = '\0' ;
+				--len ;
+
+			}
+			FILE *fp = fopen( buffer, "r" ) ;
+			fileList.push_back( fp ) ;
+		}
+
+		lastSubexons = new struct _subexon[ fileList.size() ] ;
+		int i, cnt ;
+		cnt = fileList.size() ;
+		for ( i = 0 ; i < cnt ; ++i )
+			lastSubexons[i].chrId = -1 ;
+	
+	}
+	
+	// We assume the subexons only contains the subexons we are interested in.
+	// And we assume that each time we call this function, the subexons we are interested are 
+	// sorted in order.
+	void ComputeCorrelation( struct _subexon *subexons, int cnt, Alignments &alignments )
+	{
+		int sampleCnt = fileList.size() ;
+		if ( sampleCnt <= 1 )
+			return ;
+		int i, j, k ;
+		seCnt = cnt ;
+
+		// Obtain the depth matrix.
+		double **depth ;
+		depth = new double* [sampleCnt] ; 
+		for ( i = 0 ; i < sampleCnt ; ++i )
+		{
+			depth[i] = new double[ cnt ] ; 
+			memset( depth[i], 0, sizeof( double ) * cnt ) ;
+		}
+		
+		char buffer[2048] ;
+		for ( i = 0 ; i < sampleCnt ; ++i )
+		{
+			struct _subexon se ;
+			bool useLastSubexon = false ;
+			if ( lastSubexons[i].chrId != -1 )
+			{
+				se = lastSubexons[i] ;
+				useLastSubexon = true ;
+			}
+			else
+				se.chrId = -1 ;
+
+			// Locate the subexon
+			while ( 1 )
+			{
+				if ( se.chrId < subexons[0].chrId 
+					|| ( se.chrId == subexons[0].chrId && se.end < subexons[0].start ) )
+				{
+					if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL )
+					{
+						buffer[0] = '\0' ;
+						break ;
+					}
+					if ( buffer[0] == '#' )
+						continue ;
+					
+					SubexonGraph::InputSubexon( buffer, alignments, se ) ;
+					--se.start ; --se.end ;
+					useLastSubexon = false ;
+					continue ;
+				}
+				break ;
+			}
+			if ( buffer[0] == '\0' )
+			{
+				lastSubexons[i] = se ;	
+				continue ;
+			}
+
+			// Process the subexon.
+			int tag = 0 ;
+			while ( 1 )
+			{
+				lastSubexons[i] = se ;
+				
+				if ( useLastSubexon == false )
+				{
+					SubexonGraph::InputSubexon( buffer, alignments, se ) ; 
+					--se.start ; --se.end ;
+				}
+
+				if ( se.chrId > subexons[cnt - 1].chrId || se.start > subexons[cnt - 1].end )
+					break ;
+
+				while ( 1 )	
+				{
+					if ( tag > cnt || subexons[tag].end >= se.start )	
+						break ;
+					++tag ;
+				}
+
+				for ( j = tag ; j < cnt && subexons[j].start <= se.end ; ++j )
+				{
+					int overlap = OverlapSize( se.start, se.end, subexons[j].start, subexons[j].end ) ; 
+					depth[i][j] += overlap * se.avgDepth ;		
+				}
+				
+				if ( fgets( buffer, sizeof( buffer ), fileList[i] ) == NULL )
+					break ;
+			}
+		}
+		// Normalize the depth
+		for ( i = 0 ; i < sampleCnt ; ++i )
+		{
+			for ( j = 0 ; j < cnt ; ++j )
+				depth[i][j] /= ( subexons[j].end - subexons[j].start + 1 ) ;
+		}
+
+		// Compute the correlation.
+		double *avg = new double[cnt] ;
+		double *var = new double[cnt] ;
+		if ( correlation != NULL )
+		{
+			for ( i = 0 ; i < prevSeCnt ; ++i )
+				delete[] correlation[i] ;
+			delete[] correlation ;
+		}
+
+		correlation = new double*[cnt] ;
+		for ( i = 0 ; i < cnt ; ++i )
+			correlation[i] = new double[cnt] ;
+		
+		memset( avg, 0, sizeof( double ) * cnt ) ;
+		memset( var, 0, sizeof( double ) * cnt ) ;
+		for ( i = 0 ; i < cnt ; ++i ) 
+		{
+			//avg[i] = 0 ;
+			//var[i] = 0 ;
+			memset( correlation[i], 0, sizeof( double ) * cnt ) ;
+			correlation[i][i] = 1 ;
+		}
+
+		for ( i = 0 ; i < sampleCnt ; ++i )
+			for ( j = 0 ; j < cnt ; ++j )
+			{
+				avg[j] += depth[i][j] ;
+				var[j] += depth[i][j] * depth[i][j] ;
+			}
+		for ( i = 0 ; i < cnt ; ++i )
+		{
+			avg[i] /= sampleCnt ;
+			var[i] = var[i] / sampleCnt - avg[i] * avg[i] ;
+		}
+		
+		for ( i = 0 ; i < sampleCnt ; ++i )
+			for ( j = 0 ; j < cnt ; ++j )
+				for ( k = j + 1 ; k < cnt ; ++k )
+				{
+					//if ( subexons[j].geneId == subexons[k].geneId )
+					correlation[j][k] += depth[i][j] * depth[i][k] ;	
+				}
+		for ( j = 0 ; j < cnt ; ++j )
+			for ( k = j + 1 ; k < cnt ; ++k )
+			{
+				if ( var[j] > 1e-6 && var[k] > 1e-6 )
+				{
+					correlation[j][k] = ( correlation[j][k] / sampleCnt - avg[j] * avg[k] ) / sqrt( var[j] * var[k] ) ;
+				}
+				else
+					correlation[j][k] = 0 ;
+				//printf( "%d %d %d\n", j, k, cnt ) ;
+				correlation[k][j] = correlation[j][k] ;
+			}
+		//printf( "%lf\n", correlation[0][1] ) ;
+		// Release the memory.
+		for ( i = 0 ; i < sampleCnt ; ++i )
+			delete[] depth[i] ;
+		delete[] depth ;
+		delete[] avg ;
+		delete[] var ;
+
+		prevSeCnt = cnt ;
+	}
+
+	double Query( int i, int j )
+	{
+		if ( fileList.size() > 1 )
+			return correlation[i][j] ;
+		else
+			return 0 ;
+	}
+
+	void Assign( const SubexonCorrelation &c )
+	{
+		int i ;
+		fileList = c.fileList ;
+		if ( fileList.size() <= 1 )
+			return ;
+
+		if ( correlation != NULL )
+		{
+			for ( i = 0 ; i < seCnt ; ++i )
+				delete[] correlation[i] ;
+			delete[] correlation ;
+		}
+		
+		seCnt = c.seCnt ;
+		correlation = new double*[seCnt] ;
+		for ( i = 0 ; i < seCnt ; ++i )
+		{
+			correlation[i] = new double[seCnt] ;
+			memcpy( correlation[i], c.correlation[i], sizeof( double ) * seCnt ) ;
+		}
+	}
+} ;
+
+#endif