diff alignCustomAmplicon/utils.cpp @ 0:d32bddcff685 draft

Uploaded
author fcaramia
date Wed, 09 Jan 2013 00:24:09 -0500
parents
children 0aaf65fbb48a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/alignCustomAmplicon/utils.cpp	Wed Jan 09 00:24:09 2013 -0500
@@ -0,0 +1,498 @@
+#include <stdio.h>
+#include <string>
+#include <vector>
+
+#define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ;
+#define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ;
+using namespace std;
+
+void printMatrix (double* m, int rows, int cols)
+{
+	for(int i = 0; i < rows; i++)
+	{
+		for(int j = 0; j < cols; j++)
+		{
+			printf("%.2f ", *(m + i*cols +j));
+		}
+		printf("\n");
+	}
+
+}
+
+int levenshteinDistance(string s, string t)
+{
+	//Calculate Edit distance of 2 strings
+	if(s == t)
+		return 0;
+	int size1 = s.length()+1;
+	int size2 = t.length()+1;	
+	int d[size1][size2];
+
+	for (int i=0; i<size1; i++)d[i][0] = i;                    
+	for (int i=0; i<size2; i++)d[0][i] = i;                    
+
+	for (int i=1; i<size1; i++)
+		for (int j=1; j<size2; j++)
+	{
+		if(s[i-1] == t[j-1] )
+			d[i][j] = d[i-1][j-1]; //if equeal no operation
+		else
+			d[i][j] = min(d[i-1][j] + 1,d[i][j-1] + 1,d[i-1][j-1] + 1 ); //Addition, deletion or substitution
+		
+	}	
+
+	return d[size1-1][size2-1]; // return edit distance
+}
+
+double nw_align2 (string &seq1, string &seq2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug)
+{
+	//Do NW alignment of 2 strings...
+	int size1 = seq1.length();
+	int size2 = seq2.length();
+		
+	double scores[size1+1][size2+1];		
+	char dir[size1+1][size2+1];
+	char gaps[size1+1][size2+1];
+	
+	for(int i = 0; i <= size1; i++) 
+	{
+		scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen;
+		dir[i][0] = 'U';
+		gaps[i][0] = 'O';
+	}
+	for(int j = 0; j <= size2; j++)
+	{
+		scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen;
+		dir[0][j] = 'L';
+		gaps[0][j] = 'O';
+	}
+	
+	scores[0][0] = 0;
+	gaps[0][0] = 'N';
+	
+	if (noFrontGapPenalty)
+	{
+		for(int i = 0; i <= size1; i++) scores[i][0] = 0;
+		for(int j = 0; j <= size2; j++) scores[0][j] = 0;
+	}
+			
+	double match;
+	double del;
+	double insert;
+	
+	for(int i = 1; i <= size1; i++)
+		for(int j = 1; j <= size2; j++)
+		{
+			if (seq1[i-1] == 'N' || seq2[j-1] == 'N')
+			{
+				match = scores[i-1][j-1];
+			}
+			else
+			{
+				match = scores[i-1][j-1] + (double)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty);  //1.9f clustalw
+			}
+			del = scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt);	
+			insert = scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt);
+			//debug
+			//printf("I:%d J:%d match = %f del %f insert:%f\n",i,j,match,del,insert);
+			
+			if (match>del && match > insert) 
+			{
+				scores[i][j] = match;
+				dir[i][j] = 'D';
+				gaps[i][j] = 'N';
+			}
+			else if(del > insert)
+			{
+				scores[i][j] = del;
+				dir[i][j] = 'U';
+				gaps[i][j] = 'O';
+			}
+			else 
+			{
+				scores[i][j] = insert;
+				dir[i][j] = 'L';
+				gaps[i][j] = 'O';
+			}
+		}
+		
+	//debug
+	//if (debug)
+	//	printMatrix(*scores, size1+1, size2+1);
+	//printMatrix(*dir, size1+1, size2+1);
+	
+	string alignment1="";
+	string alignment2="";
+
+	
+	int cont1 = size1;
+	int cont2 = size2;
+	int max1 = size1;
+	int max2 = size2;
+	double num = scores[size1][size2];;
+ 	
+	if (noEndGapPenalty) 
+	{
+		//Search Maxes
+		
+		for(int i = 1; i <= size1; i++)
+		{
+			if(num<scores[i][size2])
+			{
+				num = scores[i][size2];
+				max1 = i;
+				max2 = size2;
+			}
+		}	
+		for(int j = 1; j <= size2; j++)
+		{
+			if(num<scores[size1][j])
+			{
+				num = scores[size1][j];
+				max1 = size1;
+				max2 = j;
+			}
+		}	
+		cont1 = max1;
+		cont2 = max2;
+	}
+	
+	while (cont1 > 0 && cont2 > 0)
+	{
+		if(dir[cont1][cont2] == 'D')
+		{
+			alignment1+= seq1[cont1-1];
+			alignment2+= seq2[cont2-1];
+			cont1--;
+			cont2--;
+			
+		}		
+		else if(dir[cont1][cont2] == 'L')
+		{
+			alignment1+= '-';
+			alignment2+= seq2[cont2-1];
+			cont2--;
+		}
+		else 
+		{
+			alignment1+= seq1[cont1-1];
+			alignment2+= '-';
+			cont1--;
+			
+		}
+	}
+
+	while (cont1 > 0)
+	{
+		alignment1+= seq1[cont1-1];
+		alignment2+= '-';
+		cont1--;
+	}
+	
+	while (cont2 > 0)
+	{
+		alignment1+= '-';
+		alignment2+= seq2[cont2-1];
+		cont2--;
+	}	
+	
+	alignment1 = string (alignment1.rbegin(),alignment1.rend());
+	alignment2 = string (alignment2.rbegin(),alignment2.rend());
+	
+	
+	if (noEndGapPenalty) 
+	{
+		//Need to fill out rest of alignment... 
+		if (max1 != size1)
+		{
+			for (int i=max1; i<size1; ++i )
+			{
+				alignment1+= seq1 [i];
+				alignment2+= '-';
+				
+			}
+		}
+	
+		if (max2 != size2) 
+		{
+			
+			for (int i=max2; i<size2; ++i )
+			{
+				alignment2+= seq2 [i];
+				alignment1+= '-';
+				
+			}
+		}
+			
+	}
+	if (debug)
+	{
+		printf("Seq1: %s\n", alignment1.c_str());
+		printf("Seq2: %s\n\n", alignment2.c_str());
+	
+	}
+	
+	//Returns
+	seq1 = alignment1;
+	seq2 = alignment2; 
+	
+	return num;
+
+}
+
+vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug)
+{
+	//Do NW alignment of 2 strings...
+	int size1 = a1[0].length();
+	int size2 = a2[0].length();
+	int v1 = a1.size();
+	int v2 = a2.size();
+	int combs = v1 * v2;
+
+	vector<string> msa;
+	msa.clear();	
+	vector<string> alignment1, alignment2;
+	
+	for (int i=0;i<v1;++i)
+		alignment1.push_back(string());
+	for (int i=0;i<v2;++i)
+		alignment2.push_back(string());
+
+
+
+	double scores[size1+1][size2+1];		
+	char dir[size1+1][size2+1];
+	char gaps[size1+1][size2+1];
+	
+	for(int i = 0; i <= size1; i++) 
+	{
+		scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen;
+		dir[i][0] = 'U';
+		gaps[i][0] = 'O';
+	}
+	for(int j = 0; j <= size2; j++)
+	{
+		scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen;
+		dir[0][j] = 'L';
+		gaps[0][j] = 'O';
+	}
+	
+	scores[0][0] = 0;
+	gaps[0][0] = 'N';
+	
+	if (noFrontGapPenalty)
+	{
+		for(int i = 0; i <= size1; i++) scores[i][0] = 0;
+		for(int j = 0; j <= size2; j++) scores[0][j] = 0;
+	}
+			
+	double match;
+	double del;
+	double insert;
+	
+	for(int i = 1; i <= size1; i++)
+		for(int j = 1; j <= size2; j++)
+		{
+
+			match = del = insert = 0.0f;
+			for(int z=0;z<v1;++z)
+			{
+				for(int w=0;w<v2;++w)
+				{
+					if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N')
+					{
+						match += scores[i-1][j-1];
+					}
+					else
+					{
+						match += scores[i-1][j-1] + (double)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty);
+					}
+					del += scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt);	
+					insert += scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt);
+				}			
+			}
+			
+			match /= combs;
+			del /= combs;
+			insert /= combs;
+
+			if (match > del && match > insert) 
+			{
+				scores[i][j] = match;
+				dir[i][j] = 'D';
+				gaps[i][j] = 'N';
+			}
+			else if(del > insert)
+			{
+				scores[i][j] = del;
+				dir[i][j] = 'U';
+				gaps[i][j] = 'O';
+			}
+			else 
+			{
+				scores[i][j] = insert;
+				dir[i][j] = 'L';
+				gaps[i][j] = 'O';
+			}
+		}
+
+	//debug
+//	if (debug)
+//		printMatrix(*scores, size1+1, size2+1);
+	        //printMatrix(*dir, size1+1, size2+1);
+	
+	
+	int cont1 = size1;
+	int cont2 = size2;
+	int max1 = size1;
+	int max2 = size2;
+	double num = scores[size1][size2];
+ 	
+	if (noEndGapPenalty) 
+	{
+		//Search Maxes
+		
+		for(int i = 1; i <= size1; i++)
+		{
+			if(num<scores[i][size2])
+			{
+				num = scores[i][size2];
+				max1 = i;
+				max2 = size2;
+			}
+		}	
+		for(int j = 1; j <= size2; j++)
+		{
+			if(num<scores[size1][j])
+			{
+				num = scores[size1][j];
+				max1 = size1;
+				max2 = j;
+			}
+		}	
+		cont1 = max1;
+		cont2 = max2;
+	}
+	
+	while (cont1 > 0 && cont2 > 0)
+	{
+		if(dir[cont1][cont2] == 'D')
+		{
+			
+			for(int z=0;z<v1;++z)
+				alignment1[z]+= a1[z][cont1-1];
+			
+			for(int w=0;w<v2;++w)
+				alignment2[w]+= a2[w][cont2-1];
+			
+			cont1--;
+			cont2--;
+			
+		}		
+		else if(dir[cont1][cont2] == 'L')
+		{
+			for(int z=0;z<v1;++z)
+				alignment1[z]+= '-';
+			for(int w=0;w<v2;++w)
+				alignment2[w]+= a2[w][cont2-1];
+						
+			cont2--;
+		}
+		else 
+		{
+			for(int z=0;z<v1;++z)
+				alignment1[z]+= a1[z][cont1-1];
+			
+			for(int w=0;w<v2;++w)
+				alignment2[w]+= '-';
+						
+			cont1--;
+			
+		}
+	}
+
+	while (cont1 > 0)
+	{
+		for(int z=0;z<v1;++z)
+			alignment1[z]+= a1[z][cont1-1];
+		for(int w=0;w<v2;++w)
+			alignment2[w]+= '-';
+				
+		cont1--;
+	}
+	
+	while (cont2 > 0)
+	{
+		for(int z=0;z<v1;++z)
+			alignment1[z]+= '-';
+		for(int w=0;w<v2;++w)
+			alignment2[w]+= a2[w][cont2-1];
+		
+		cont2--;
+	}	
+	
+	
+	
+
+	for(int z=0;z<v1;++z)
+	{
+		alignment1[z] = string (alignment1[z].rbegin(),alignment1[z].rend());
+	}
+	for(int w=0;w<v2;++w)
+	{
+		alignment2[w] = string (alignment2[w].rbegin(),alignment2[w].rend());
+	}
+
+	
+	
+	if (noEndGapPenalty) 
+	{
+		//Need to fill out rest of alignment... 
+		if (max1 != size1)
+		{
+			for (int i=max1; i<size1; ++i )
+			{
+				for(int z=0;z<v1;++z)
+					alignment1[z]+= a1[z][i];
+				for(int w=0;w<v2;++w)
+					alignment2[w]+= '-';
+				
+			}
+		}
+	
+		if (max2 != size2) 
+		{
+			
+			for (int i=max2; i<size2; ++i )
+			{
+				for(int z=0;z<v1;++z)
+					alignment1[z]+= '-';
+				for(int w=0;w<v2;++w)
+					alignment2[w]+= a2[w][i];
+			
+			}
+		}
+			
+	}
+	
+	//returns
+	for(int z=0;z<v1;++z)
+		msa.push_back(alignment1[z]);
+	for(int w=0;w<v2;++w)
+		msa.push_back(alignment2[w]);
+	
+	if (debug)
+	{
+		for(int z=0;z<v1;++z)
+			printf("%s\n",alignment1[z].c_str());
+		for(int w=0;w<v2;++w)
+			printf("%s\n",alignment2[w].c_str());
+		
+	}
+
+	return msa;
+
+}
+
+
+