view alignCustomAmplicon/utils.cpp @ 5:0aaf65fbb48a draft default tip

Uploaded
author fcaramia
date Wed, 20 Mar 2013 00:22:08 -0400
parents d32bddcff685
children
line wrap: on
line source

#include <stdio.h>
#include <string>
#include <vector>

#define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ;
#define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ;
using namespace std;

void printMatrix (float* m, int rows, int cols)
{
	for(int i = 0; i < rows; i++)
	{
		for(int j = 0; j < cols; j++)
		{
			printf("%.2f ", *(m + i*cols +j));
		}
		printf("\n");
	}

}

int levenshteinDistance(string s, string t)
{
	//Calculate Edit distance of 2 strings
	if(s == t)
		return 0;
	int size1 = s.length()+1;
	int size2 = t.length()+1;	
	int d[size1][size2];

	for (int i=0; i<size1; i++)d[i][0] = i;                    
	for (int i=0; i<size2; i++)d[0][i] = i;                    

	for (int i=1; i<size1; i++)
		for (int j=1; j<size2; j++)
	{
		if(s[i-1] == t[j-1] )
			d[i][j] = d[i-1][j-1]; //if equeal no operation
		else
			d[i][j] = min(d[i-1][j] + 1,d[i][j-1] + 1,d[i-1][j-1] + 1 ); //Addition, deletion or substitution
		
	}	

	return d[size1-1][size2-1]; // return edit distance
}

float nw_align2 (string &seq1, string &seq2, float matchScore, float missmatch_penalty, float gapPenOpen, float gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug)
{
	//Do NW alignment of 2 strings...
	int size1 = seq1.length();
	int size2 = seq2.length();
		
	float scores[size1+1][size2+1];		
	char dir[size1+1][size2+1];
	char gaps[size1+1][size2+1];
	
	for(int i = 0; i <= size1; i++) 
	{
		scores[i][0] = (float)(i-1)*gapPenExt + gapPenOpen;
		dir[i][0] = 'U';
		gaps[i][0] = 'D';
	}
	for(int j = 0; j <= size2; j++)
	{
		scores[0][j] = (float)(j-1)*gapPenExt + gapPenOpen;
		dir[0][j] = 'L';
		gaps[0][j] = 'I';
	}
	
	scores[0][0] = 0;
	gaps[0][0] = 'N';
	
	if (noFrontGapPenalty)
	{
		for(int i = 0; i <= size1; i++) scores[i][0] = 0;
		for(int j = 0; j <= size2; j++) scores[0][j] = 0;
	}
			
	float match;
	float del;
	float insert;
	
	for(int i = 1; i <= size1; i++)
		for(int j = 1; j <= size2; j++)
		{
			if (seq1[i-1] == 'N' || seq2[j-1] == 'N')
			{
				match = scores[i-1][j-1];
			}
			else
			{
				match = scores[i-1][j-1] + (float)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty);  //1.9f clustalw
			}
			del = scores[i-1][j] + (gaps[i-1][j] != 'D'? gapPenOpen:gapPenExt);	
			insert = scores[i][j-1] + (gaps[i][j-1] != 'I'? gapPenOpen:gapPenExt);
			
			//debug
			//if (match == del || match == insert)
			//{
				//printf("I:%d J:%d match = %f del %f insert %f %c %c\n",i,j,match,del,insert,seq1[i-1],seq2[j-1]);
			//}
			
			if (match > del && match > insert) 
			{
				scores[i][j] = match;
				dir[i][j] = 'D';
				gaps[i][j] = 'N';
			}
			else if(match >= del && match >= insert && seq1[i-1] != seq2[j-1])
			{
				scores[i][j] = match;
				dir[i][j] = 'D';
				gaps[i][j] = 'N';
			}
			else if(del > insert)
			{
				scores[i][j] = del;
				dir[i][j] = 'U';
				gaps[i][j] = 'D';
			}
			else 
			{
				scores[i][j] = insert;
				dir[i][j] = 'L';
				gaps[i][j] = 'I';
			}
		}
		
	//debug
	//if (debug)
	//	printMatrix(*scores, size1+1, size2+1);
	//printMatrix(*dir, size1+1, size2+1);
	
	string alignment1="";
	string alignment2="";

	
	int cont1 = size1;
	int cont2 = size2;
	int max1 = size1;
	int max2 = size2;
	float num = scores[size1][size2];;
 	
	if (noEndGapPenalty) 
	{
		//Search Maxes
		
		for(int i = 1; i <= size1; i++)
		{
			if(num<scores[i][size2])
			{
				num = scores[i][size2];
				max1 = i;
				max2 = size2;
			}
		}	
		for(int j = 1; j <= size2; j++)
		{
			if(num<scores[size1][j])
			{
				num = scores[size1][j];
				max1 = size1;
				max2 = j;
			}
		}	
		cont1 = max1;
		cont2 = max2;
	}
	
	while (cont1 > 0 && cont2 > 0)
	{
		if(dir[cont1][cont2] == 'D')
		{
			alignment1+= seq1[cont1-1];
			alignment2+= seq2[cont2-1];
			cont1--;
			cont2--;
			
		}		
		else if(dir[cont1][cont2] == 'L')
		{
			alignment1+= '-';
			alignment2+= seq2[cont2-1];
			cont2--;
		}
		else 
		{
			alignment1+= seq1[cont1-1];
			alignment2+= '-';
			cont1--;
			
		}
	}

	while (cont1 > 0)
	{
		alignment1+= seq1[cont1-1];
		alignment2+= '-';
		cont1--;
	}
	
	while (cont2 > 0)
	{
		alignment1+= '-';
		alignment2+= seq2[cont2-1];
		cont2--;
	}	
	
	alignment1 = string (alignment1.rbegin(),alignment1.rend());
	alignment2 = string (alignment2.rbegin(),alignment2.rend());
	
	
	if (noEndGapPenalty) 
	{
		//Need to fill out rest of alignment... 
		if (max1 != size1)
		{
			for (int i=max1; i<size1; ++i )
			{
				alignment1+= seq1 [i];
				alignment2+= '-';
				
			}
		}
	
		if (max2 != size2) 
		{
			
			for (int i=max2; i<size2; ++i )
			{
				alignment2+= seq2 [i];
				alignment1+= '-';
				
			}
		}
			
	}
	if (debug)
	{
		printf("Seq1: %s\n", alignment1.c_str());
		printf("Seq2: %s\n\n", alignment2.c_str());
	
	}
	
	//Returns
	seq1 = alignment1;
	seq2 = alignment2; 
	
	return num;

}

vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, float matchScore, float missmatch_penalty, float gapPenOpen, float gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug)
{
	//Do NW alignment of 2 strings...
	int size1 = a1[0].length();
	int size2 = a2[0].length();
	int v1 = a1.size();
	int v2 = a2.size();
	int combs = v1 * v2;

	vector<string> msa;
	msa.clear();	
	vector<string> alignment1, alignment2;
	
	for (int i=0;i<v1;++i)
		alignment1.push_back(string());
	for (int i=0;i<v2;++i)
		alignment2.push_back(string());



	float scores[size1+1][size2+1];		
	char dir[size1+1][size2+1];
	char gaps[size1+1][size2+1];
	
	for(int i = 0; i <= size1; i++) 
	{
		scores[i][0] = (float)(i-1)*gapPenExt + gapPenOpen;
		dir[i][0] = 'U';
		gaps[i][0] = 'D';
	}
	for(int j = 0; j <= size2; j++)
	{
		scores[0][j] = (float)(j-1)*gapPenExt + gapPenOpen;
		dir[0][j] = 'L';
		gaps[0][j] = 'I';
	}
	
	scores[0][0] = 0;
	gaps[0][0] = 'N';
	
	if (noFrontGapPenalty)
	{
		for(int i = 0; i <= size1; i++) scores[i][0] = 0;
		for(int j = 0; j <= size2; j++) scores[0][j] = 0;
	}
			
	float match;
	float del;
	float insert;
	bool missmatch;
	
	for(int i = 1; i <= size1; i++)
		for(int j = 1; j <= size2; j++)
		{

			match = del = insert = 0.0f;
			missmatch = true;
			for(int z=0;z<v1;++z)
			{
				for(int w=0;w<v2;++w)
				{
					if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N')
					{
						match += scores[i-1][j-1];
					}
					else
					{
						match += scores[i-1][j-1] + (float)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty);
						if(a1[z][i-1] == a2[w][j-1])
						{
							missmatch = false;
						}
					}
					del += scores[i-1][j] + (gaps[i-1][j] != 'D'? gapPenOpen:gapPenExt);	
					insert += scores[i][j-1] + (gaps[i][j-1] != 'I'? gapPenOpen:gapPenExt);
				}			
			}
			
			match /= combs;
			del /= combs;
			insert /= combs;

			if (match > del && match > insert) 
			{
				scores[i][j] = match;
				dir[i][j] = 'D';
				gaps[i][j] = 'N';
			}
			else if(match >= del && match >= insert && missmatch)
			{
				scores[i][j] = match;
				dir[i][j] = 'D';
				gaps[i][j] = 'N';
			}
			else if(del > insert)
			{
				scores[i][j] = del;
				dir[i][j] = 'U';
				gaps[i][j] = 'D';
			}
			else 
			{
				scores[i][j] = insert;
				dir[i][j] = 'L';
				gaps[i][j] = 'I';
			}
		}

	//debug
//	if (debug)
//		printMatrix(*scores, size1+1, size2+1);
	        //printMatrix(*dir, size1+1, size2+1);
	
	
	int cont1 = size1;
	int cont2 = size2;
	int max1 = size1;
	int max2 = size2;
	float num = scores[size1][size2];
 	
	if (noEndGapPenalty) 
	{
		//Search Maxes
		
		for(int i = 1; i <= size1; i++)
		{
			if(num<scores[i][size2])
			{
				num = scores[i][size2];
				max1 = i;
				max2 = size2;
			}
		}	
		for(int j = 1; j <= size2; j++)
		{
			if(num<scores[size1][j])
			{
				num = scores[size1][j];
				max1 = size1;
				max2 = j;
			}
		}	
		cont1 = max1;
		cont2 = max2;
	}
	
	while (cont1 > 0 && cont2 > 0)
	{
		if(dir[cont1][cont2] == 'D')
		{
			
			for(int z=0;z<v1;++z)
				alignment1[z]+= a1[z][cont1-1];
			
			for(int w=0;w<v2;++w)
				alignment2[w]+= a2[w][cont2-1];
			
			cont1--;
			cont2--;
			
		}		
		else if(dir[cont1][cont2] == 'L')
		{
			for(int z=0;z<v1;++z)
				alignment1[z]+= '-';
			for(int w=0;w<v2;++w)
				alignment2[w]+= a2[w][cont2-1];
						
			cont2--;
		}
		else 
		{
			for(int z=0;z<v1;++z)
				alignment1[z]+= a1[z][cont1-1];
			
			for(int w=0;w<v2;++w)
				alignment2[w]+= '-';
						
			cont1--;
			
		}
	}

	while (cont1 > 0)
	{
		for(int z=0;z<v1;++z)
			alignment1[z]+= a1[z][cont1-1];
		for(int w=0;w<v2;++w)
			alignment2[w]+= '-';
				
		cont1--;
	}
	
	while (cont2 > 0)
	{
		for(int z=0;z<v1;++z)
			alignment1[z]+= '-';
		for(int w=0;w<v2;++w)
			alignment2[w]+= a2[w][cont2-1];
		
		cont2--;
	}	
	
	
	

	for(int z=0;z<v1;++z)
	{
		alignment1[z] = string (alignment1[z].rbegin(),alignment1[z].rend());
	}
	for(int w=0;w<v2;++w)
	{
		alignment2[w] = string (alignment2[w].rbegin(),alignment2[w].rend());
	}

	
	
	if (noEndGapPenalty) 
	{
		//Need to fill out rest of alignment... 
		if (max1 != size1)
		{
			for (int i=max1; i<size1; ++i )
			{
				for(int z=0;z<v1;++z)
					alignment1[z]+= a1[z][i];
				for(int w=0;w<v2;++w)
					alignment2[w]+= '-';
				
			}
		}
	
		if (max2 != size2) 
		{
			
			for (int i=max2; i<size2; ++i )
			{
				for(int z=0;z<v1;++z)
					alignment1[z]+= '-';
				for(int w=0;w<v2;++w)
					alignment2[w]+= a2[w][i];
			
			}
		}
			
	}
	
	//returns
	for(int z=0;z<v1;++z)
		msa.push_back(alignment1[z]);
	for(int w=0;w<v2;++w)
		msa.push_back(alignment2[w]);
	
	if (debug)
	{
		for(int z=0;z<v1;++z)
			printf("%s\n",alignment1[z].c_str());
		for(int w=0;w<v2;++w)
			printf("%s\n",alignment2[w].c_str());
		
	}

	return msa;

}