Mercurial > repos > fcaramia > custom_amplicon_alignment
view alignCustomAmplicon/utils.cpp @ 0:d32bddcff685 draft
Uploaded
author | fcaramia |
---|---|
date | Wed, 09 Jan 2013 00:24:09 -0500 |
parents | |
children | 0aaf65fbb48a |
line wrap: on
line source
#include <stdio.h> #include <string> #include <vector> #define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ; #define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ; using namespace std; void printMatrix (double* m, int rows, int cols) { for(int i = 0; i < rows; i++) { for(int j = 0; j < cols; j++) { printf("%.2f ", *(m + i*cols +j)); } printf("\n"); } } int levenshteinDistance(string s, string t) { //Calculate Edit distance of 2 strings if(s == t) return 0; int size1 = s.length()+1; int size2 = t.length()+1; int d[size1][size2]; for (int i=0; i<size1; i++)d[i][0] = i; for (int i=0; i<size2; i++)d[0][i] = i; for (int i=1; i<size1; i++) for (int j=1; j<size2; j++) { if(s[i-1] == t[j-1] ) d[i][j] = d[i-1][j-1]; //if equeal no operation else d[i][j] = min(d[i-1][j] + 1,d[i][j-1] + 1,d[i-1][j-1] + 1 ); //Addition, deletion or substitution } return d[size1-1][size2-1]; // return edit distance } double nw_align2 (string &seq1, string &seq2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) { //Do NW alignment of 2 strings... int size1 = seq1.length(); int size2 = seq2.length(); double scores[size1+1][size2+1]; char dir[size1+1][size2+1]; char gaps[size1+1][size2+1]; for(int i = 0; i <= size1; i++) { scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; dir[i][0] = 'U'; gaps[i][0] = 'O'; } for(int j = 0; j <= size2; j++) { scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; dir[0][j] = 'L'; gaps[0][j] = 'O'; } scores[0][0] = 0; gaps[0][0] = 'N'; if (noFrontGapPenalty) { for(int i = 0; i <= size1; i++) scores[i][0] = 0; for(int j = 0; j <= size2; j++) scores[0][j] = 0; } double match; double del; double insert; for(int i = 1; i <= size1; i++) for(int j = 1; j <= size2; j++) { if (seq1[i-1] == 'N' || seq2[j-1] == 'N') { match = scores[i-1][j-1]; } else { match = scores[i-1][j-1] + (double)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty); //1.9f clustalw } del = scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); insert = scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); //debug //printf("I:%d J:%d match = %f del %f insert:%f\n",i,j,match,del,insert); if (match>del && match > insert) { scores[i][j] = match; dir[i][j] = 'D'; gaps[i][j] = 'N'; } else if(del > insert) { scores[i][j] = del; dir[i][j] = 'U'; gaps[i][j] = 'O'; } else { scores[i][j] = insert; dir[i][j] = 'L'; gaps[i][j] = 'O'; } } //debug //if (debug) // printMatrix(*scores, size1+1, size2+1); //printMatrix(*dir, size1+1, size2+1); string alignment1=""; string alignment2=""; int cont1 = size1; int cont2 = size2; int max1 = size1; int max2 = size2; double num = scores[size1][size2];; if (noEndGapPenalty) { //Search Maxes for(int i = 1; i <= size1; i++) { if(num<scores[i][size2]) { num = scores[i][size2]; max1 = i; max2 = size2; } } for(int j = 1; j <= size2; j++) { if(num<scores[size1][j]) { num = scores[size1][j]; max1 = size1; max2 = j; } } cont1 = max1; cont2 = max2; } while (cont1 > 0 && cont2 > 0) { if(dir[cont1][cont2] == 'D') { alignment1+= seq1[cont1-1]; alignment2+= seq2[cont2-1]; cont1--; cont2--; } else if(dir[cont1][cont2] == 'L') { alignment1+= '-'; alignment2+= seq2[cont2-1]; cont2--; } else { alignment1+= seq1[cont1-1]; alignment2+= '-'; cont1--; } } while (cont1 > 0) { alignment1+= seq1[cont1-1]; alignment2+= '-'; cont1--; } while (cont2 > 0) { alignment1+= '-'; alignment2+= seq2[cont2-1]; cont2--; } alignment1 = string (alignment1.rbegin(),alignment1.rend()); alignment2 = string (alignment2.rbegin(),alignment2.rend()); if (noEndGapPenalty) { //Need to fill out rest of alignment... if (max1 != size1) { for (int i=max1; i<size1; ++i ) { alignment1+= seq1 [i]; alignment2+= '-'; } } if (max2 != size2) { for (int i=max2; i<size2; ++i ) { alignment2+= seq2 [i]; alignment1+= '-'; } } } if (debug) { printf("Seq1: %s\n", alignment1.c_str()); printf("Seq2: %s\n\n", alignment2.c_str()); } //Returns seq1 = alignment1; seq2 = alignment2; return num; } vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) { //Do NW alignment of 2 strings... int size1 = a1[0].length(); int size2 = a2[0].length(); int v1 = a1.size(); int v2 = a2.size(); int combs = v1 * v2; vector<string> msa; msa.clear(); vector<string> alignment1, alignment2; for (int i=0;i<v1;++i) alignment1.push_back(string()); for (int i=0;i<v2;++i) alignment2.push_back(string()); double scores[size1+1][size2+1]; char dir[size1+1][size2+1]; char gaps[size1+1][size2+1]; for(int i = 0; i <= size1; i++) { scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; dir[i][0] = 'U'; gaps[i][0] = 'O'; } for(int j = 0; j <= size2; j++) { scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; dir[0][j] = 'L'; gaps[0][j] = 'O'; } scores[0][0] = 0; gaps[0][0] = 'N'; if (noFrontGapPenalty) { for(int i = 0; i <= size1; i++) scores[i][0] = 0; for(int j = 0; j <= size2; j++) scores[0][j] = 0; } double match; double del; double insert; for(int i = 1; i <= size1; i++) for(int j = 1; j <= size2; j++) { match = del = insert = 0.0f; for(int z=0;z<v1;++z) { for(int w=0;w<v2;++w) { if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N') { match += scores[i-1][j-1]; } else { match += scores[i-1][j-1] + (double)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty); } del += scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); insert += scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); } } match /= combs; del /= combs; insert /= combs; if (match > del && match > insert) { scores[i][j] = match; dir[i][j] = 'D'; gaps[i][j] = 'N'; } else if(del > insert) { scores[i][j] = del; dir[i][j] = 'U'; gaps[i][j] = 'O'; } else { scores[i][j] = insert; dir[i][j] = 'L'; gaps[i][j] = 'O'; } } //debug // if (debug) // printMatrix(*scores, size1+1, size2+1); //printMatrix(*dir, size1+1, size2+1); int cont1 = size1; int cont2 = size2; int max1 = size1; int max2 = size2; double num = scores[size1][size2]; if (noEndGapPenalty) { //Search Maxes for(int i = 1; i <= size1; i++) { if(num<scores[i][size2]) { num = scores[i][size2]; max1 = i; max2 = size2; } } for(int j = 1; j <= size2; j++) { if(num<scores[size1][j]) { num = scores[size1][j]; max1 = size1; max2 = j; } } cont1 = max1; cont2 = max2; } while (cont1 > 0 && cont2 > 0) { if(dir[cont1][cont2] == 'D') { for(int z=0;z<v1;++z) alignment1[z]+= a1[z][cont1-1]; for(int w=0;w<v2;++w) alignment2[w]+= a2[w][cont2-1]; cont1--; cont2--; } else if(dir[cont1][cont2] == 'L') { for(int z=0;z<v1;++z) alignment1[z]+= '-'; for(int w=0;w<v2;++w) alignment2[w]+= a2[w][cont2-1]; cont2--; } else { for(int z=0;z<v1;++z) alignment1[z]+= a1[z][cont1-1]; for(int w=0;w<v2;++w) alignment2[w]+= '-'; cont1--; } } while (cont1 > 0) { for(int z=0;z<v1;++z) alignment1[z]+= a1[z][cont1-1]; for(int w=0;w<v2;++w) alignment2[w]+= '-'; cont1--; } while (cont2 > 0) { for(int z=0;z<v1;++z) alignment1[z]+= '-'; for(int w=0;w<v2;++w) alignment2[w]+= a2[w][cont2-1]; cont2--; } for(int z=0;z<v1;++z) { alignment1[z] = string (alignment1[z].rbegin(),alignment1[z].rend()); } for(int w=0;w<v2;++w) { alignment2[w] = string (alignment2[w].rbegin(),alignment2[w].rend()); } if (noEndGapPenalty) { //Need to fill out rest of alignment... if (max1 != size1) { for (int i=max1; i<size1; ++i ) { for(int z=0;z<v1;++z) alignment1[z]+= a1[z][i]; for(int w=0;w<v2;++w) alignment2[w]+= '-'; } } if (max2 != size2) { for (int i=max2; i<size2; ++i ) { for(int z=0;z<v1;++z) alignment1[z]+= '-'; for(int w=0;w<v2;++w) alignment2[w]+= a2[w][i]; } } } //returns for(int z=0;z<v1;++z) msa.push_back(alignment1[z]); for(int w=0;w<v2;++w) msa.push_back(alignment2[w]); if (debug) { for(int z=0;z<v1;++z) printf("%s\n",alignment1[z].c_str()); for(int w=0;w<v2;++w) printf("%s\n",alignment2[w].c_str()); } return msa; }