Mercurial > repos > fcaramia > custom_amplicon_alignment
comparison alignCustomAmplicon/utils.cpp @ 5:0aaf65fbb48a draft default tip
Uploaded
| author | fcaramia |
|---|---|
| date | Wed, 20 Mar 2013 00:22:08 -0400 |
| parents | d32bddcff685 |
| children |
comparison
equal
deleted
inserted
replaced
| 4:22f35f830f48 | 5:0aaf65fbb48a |
|---|---|
| 4 | 4 |
| 5 #define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ; | 5 #define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ; |
| 6 #define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ; | 6 #define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ; |
| 7 using namespace std; | 7 using namespace std; |
| 8 | 8 |
| 9 void printMatrix (double* m, int rows, int cols) | 9 void printMatrix (float* m, int rows, int cols) |
| 10 { | 10 { |
| 11 for(int i = 0; i < rows; i++) | 11 for(int i = 0; i < rows; i++) |
| 12 { | 12 { |
| 13 for(int j = 0; j < cols; j++) | 13 for(int j = 0; j < cols; j++) |
| 14 { | 14 { |
| 42 } | 42 } |
| 43 | 43 |
| 44 return d[size1-1][size2-1]; // return edit distance | 44 return d[size1-1][size2-1]; // return edit distance |
| 45 } | 45 } |
| 46 | 46 |
| 47 double nw_align2 (string &seq1, string &seq2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) | 47 float nw_align2 (string &seq1, string &seq2, float matchScore, float missmatch_penalty, float gapPenOpen, float gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) |
| 48 { | 48 { |
| 49 //Do NW alignment of 2 strings... | 49 //Do NW alignment of 2 strings... |
| 50 int size1 = seq1.length(); | 50 int size1 = seq1.length(); |
| 51 int size2 = seq2.length(); | 51 int size2 = seq2.length(); |
| 52 | 52 |
| 53 double scores[size1+1][size2+1]; | 53 float scores[size1+1][size2+1]; |
| 54 char dir[size1+1][size2+1]; | 54 char dir[size1+1][size2+1]; |
| 55 char gaps[size1+1][size2+1]; | 55 char gaps[size1+1][size2+1]; |
| 56 | 56 |
| 57 for(int i = 0; i <= size1; i++) | 57 for(int i = 0; i <= size1; i++) |
| 58 { | 58 { |
| 59 scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; | 59 scores[i][0] = (float)(i-1)*gapPenExt + gapPenOpen; |
| 60 dir[i][0] = 'U'; | 60 dir[i][0] = 'U'; |
| 61 gaps[i][0] = 'O'; | 61 gaps[i][0] = 'D'; |
| 62 } | 62 } |
| 63 for(int j = 0; j <= size2; j++) | 63 for(int j = 0; j <= size2; j++) |
| 64 { | 64 { |
| 65 scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; | 65 scores[0][j] = (float)(j-1)*gapPenExt + gapPenOpen; |
| 66 dir[0][j] = 'L'; | 66 dir[0][j] = 'L'; |
| 67 gaps[0][j] = 'O'; | 67 gaps[0][j] = 'I'; |
| 68 } | 68 } |
| 69 | 69 |
| 70 scores[0][0] = 0; | 70 scores[0][0] = 0; |
| 71 gaps[0][0] = 'N'; | 71 gaps[0][0] = 'N'; |
| 72 | 72 |
| 74 { | 74 { |
| 75 for(int i = 0; i <= size1; i++) scores[i][0] = 0; | 75 for(int i = 0; i <= size1; i++) scores[i][0] = 0; |
| 76 for(int j = 0; j <= size2; j++) scores[0][j] = 0; | 76 for(int j = 0; j <= size2; j++) scores[0][j] = 0; |
| 77 } | 77 } |
| 78 | 78 |
| 79 double match; | 79 float match; |
| 80 double del; | 80 float del; |
| 81 double insert; | 81 float insert; |
| 82 | 82 |
| 83 for(int i = 1; i <= size1; i++) | 83 for(int i = 1; i <= size1; i++) |
| 84 for(int j = 1; j <= size2; j++) | 84 for(int j = 1; j <= size2; j++) |
| 85 { | 85 { |
| 86 if (seq1[i-1] == 'N' || seq2[j-1] == 'N') | 86 if (seq1[i-1] == 'N' || seq2[j-1] == 'N') |
| 87 { | 87 { |
| 88 match = scores[i-1][j-1]; | 88 match = scores[i-1][j-1]; |
| 89 } | 89 } |
| 90 else | 90 else |
| 91 { | 91 { |
| 92 match = scores[i-1][j-1] + (double)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty); //1.9f clustalw | 92 match = scores[i-1][j-1] + (float)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty); //1.9f clustalw |
| 93 } | 93 } |
| 94 del = scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); | 94 del = scores[i-1][j] + (gaps[i-1][j] != 'D'? gapPenOpen:gapPenExt); |
| 95 insert = scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); | 95 insert = scores[i][j-1] + (gaps[i][j-1] != 'I'? gapPenOpen:gapPenExt); |
| 96 | |
| 96 //debug | 97 //debug |
| 97 //printf("I:%d J:%d match = %f del %f insert:%f\n",i,j,match,del,insert); | 98 //if (match == del || match == insert) |
| 98 | 99 //{ |
| 99 if (match>del && match > insert) | 100 //printf("I:%d J:%d match = %f del %f insert %f %c %c\n",i,j,match,del,insert,seq1[i-1],seq2[j-1]); |
| 101 //} | |
| 102 | |
| 103 if (match > del && match > insert) | |
| 100 { | 104 { |
| 101 scores[i][j] = match; | 105 scores[i][j] = match; |
| 102 dir[i][j] = 'D'; | 106 dir[i][j] = 'D'; |
| 103 gaps[i][j] = 'N'; | 107 gaps[i][j] = 'N'; |
| 104 } | 108 } |
| 109 else if(match >= del && match >= insert && seq1[i-1] != seq2[j-1]) | |
| 110 { | |
| 111 scores[i][j] = match; | |
| 112 dir[i][j] = 'D'; | |
| 113 gaps[i][j] = 'N'; | |
| 114 } | |
| 105 else if(del > insert) | 115 else if(del > insert) |
| 106 { | 116 { |
| 107 scores[i][j] = del; | 117 scores[i][j] = del; |
| 108 dir[i][j] = 'U'; | 118 dir[i][j] = 'U'; |
| 109 gaps[i][j] = 'O'; | 119 gaps[i][j] = 'D'; |
| 110 } | 120 } |
| 111 else | 121 else |
| 112 { | 122 { |
| 113 scores[i][j] = insert; | 123 scores[i][j] = insert; |
| 114 dir[i][j] = 'L'; | 124 dir[i][j] = 'L'; |
| 115 gaps[i][j] = 'O'; | 125 gaps[i][j] = 'I'; |
| 116 } | 126 } |
| 117 } | 127 } |
| 118 | 128 |
| 119 //debug | 129 //debug |
| 120 //if (debug) | 130 //if (debug) |
| 127 | 137 |
| 128 int cont1 = size1; | 138 int cont1 = size1; |
| 129 int cont2 = size2; | 139 int cont2 = size2; |
| 130 int max1 = size1; | 140 int max1 = size1; |
| 131 int max2 = size2; | 141 int max2 = size2; |
| 132 double num = scores[size1][size2];; | 142 float num = scores[size1][size2];; |
| 133 | 143 |
| 134 if (noEndGapPenalty) | 144 if (noEndGapPenalty) |
| 135 { | 145 { |
| 136 //Search Maxes | 146 //Search Maxes |
| 137 | 147 |
| 238 | 248 |
| 239 return num; | 249 return num; |
| 240 | 250 |
| 241 } | 251 } |
| 242 | 252 |
| 243 vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) | 253 vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, float matchScore, float missmatch_penalty, float gapPenOpen, float gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) |
| 244 { | 254 { |
| 245 //Do NW alignment of 2 strings... | 255 //Do NW alignment of 2 strings... |
| 246 int size1 = a1[0].length(); | 256 int size1 = a1[0].length(); |
| 247 int size2 = a2[0].length(); | 257 int size2 = a2[0].length(); |
| 248 int v1 = a1.size(); | 258 int v1 = a1.size(); |
| 258 for (int i=0;i<v2;++i) | 268 for (int i=0;i<v2;++i) |
| 259 alignment2.push_back(string()); | 269 alignment2.push_back(string()); |
| 260 | 270 |
| 261 | 271 |
| 262 | 272 |
| 263 double scores[size1+1][size2+1]; | 273 float scores[size1+1][size2+1]; |
| 264 char dir[size1+1][size2+1]; | 274 char dir[size1+1][size2+1]; |
| 265 char gaps[size1+1][size2+1]; | 275 char gaps[size1+1][size2+1]; |
| 266 | 276 |
| 267 for(int i = 0; i <= size1; i++) | 277 for(int i = 0; i <= size1; i++) |
| 268 { | 278 { |
| 269 scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; | 279 scores[i][0] = (float)(i-1)*gapPenExt + gapPenOpen; |
| 270 dir[i][0] = 'U'; | 280 dir[i][0] = 'U'; |
| 271 gaps[i][0] = 'O'; | 281 gaps[i][0] = 'D'; |
| 272 } | 282 } |
| 273 for(int j = 0; j <= size2; j++) | 283 for(int j = 0; j <= size2; j++) |
| 274 { | 284 { |
| 275 scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; | 285 scores[0][j] = (float)(j-1)*gapPenExt + gapPenOpen; |
| 276 dir[0][j] = 'L'; | 286 dir[0][j] = 'L'; |
| 277 gaps[0][j] = 'O'; | 287 gaps[0][j] = 'I'; |
| 278 } | 288 } |
| 279 | 289 |
| 280 scores[0][0] = 0; | 290 scores[0][0] = 0; |
| 281 gaps[0][0] = 'N'; | 291 gaps[0][0] = 'N'; |
| 282 | 292 |
| 284 { | 294 { |
| 285 for(int i = 0; i <= size1; i++) scores[i][0] = 0; | 295 for(int i = 0; i <= size1; i++) scores[i][0] = 0; |
| 286 for(int j = 0; j <= size2; j++) scores[0][j] = 0; | 296 for(int j = 0; j <= size2; j++) scores[0][j] = 0; |
| 287 } | 297 } |
| 288 | 298 |
| 289 double match; | 299 float match; |
| 290 double del; | 300 float del; |
| 291 double insert; | 301 float insert; |
| 302 bool missmatch; | |
| 292 | 303 |
| 293 for(int i = 1; i <= size1; i++) | 304 for(int i = 1; i <= size1; i++) |
| 294 for(int j = 1; j <= size2; j++) | 305 for(int j = 1; j <= size2; j++) |
| 295 { | 306 { |
| 296 | 307 |
| 297 match = del = insert = 0.0f; | 308 match = del = insert = 0.0f; |
| 309 missmatch = true; | |
| 298 for(int z=0;z<v1;++z) | 310 for(int z=0;z<v1;++z) |
| 299 { | 311 { |
| 300 for(int w=0;w<v2;++w) | 312 for(int w=0;w<v2;++w) |
| 301 { | 313 { |
| 302 if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N') | 314 if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N') |
| 303 { | 315 { |
| 304 match += scores[i-1][j-1]; | 316 match += scores[i-1][j-1]; |
| 305 } | 317 } |
| 306 else | 318 else |
| 307 { | 319 { |
| 308 match += scores[i-1][j-1] + (double)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty); | 320 match += scores[i-1][j-1] + (float)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty); |
| 321 if(a1[z][i-1] == a2[w][j-1]) | |
| 322 { | |
| 323 missmatch = false; | |
| 324 } | |
| 309 } | 325 } |
| 310 del += scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); | 326 del += scores[i-1][j] + (gaps[i-1][j] != 'D'? gapPenOpen:gapPenExt); |
| 311 insert += scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); | 327 insert += scores[i][j-1] + (gaps[i][j-1] != 'I'? gapPenOpen:gapPenExt); |
| 312 } | 328 } |
| 313 } | 329 } |
| 314 | 330 |
| 315 match /= combs; | 331 match /= combs; |
| 316 del /= combs; | 332 del /= combs; |
| 320 { | 336 { |
| 321 scores[i][j] = match; | 337 scores[i][j] = match; |
| 322 dir[i][j] = 'D'; | 338 dir[i][j] = 'D'; |
| 323 gaps[i][j] = 'N'; | 339 gaps[i][j] = 'N'; |
| 324 } | 340 } |
| 341 else if(match >= del && match >= insert && missmatch) | |
| 342 { | |
| 343 scores[i][j] = match; | |
| 344 dir[i][j] = 'D'; | |
| 345 gaps[i][j] = 'N'; | |
| 346 } | |
| 325 else if(del > insert) | 347 else if(del > insert) |
| 326 { | 348 { |
| 327 scores[i][j] = del; | 349 scores[i][j] = del; |
| 328 dir[i][j] = 'U'; | 350 dir[i][j] = 'U'; |
| 329 gaps[i][j] = 'O'; | 351 gaps[i][j] = 'D'; |
| 330 } | 352 } |
| 331 else | 353 else |
| 332 { | 354 { |
| 333 scores[i][j] = insert; | 355 scores[i][j] = insert; |
| 334 dir[i][j] = 'L'; | 356 dir[i][j] = 'L'; |
| 335 gaps[i][j] = 'O'; | 357 gaps[i][j] = 'I'; |
| 336 } | 358 } |
| 337 } | 359 } |
| 338 | 360 |
| 339 //debug | 361 //debug |
| 340 // if (debug) | 362 // if (debug) |
| 344 | 366 |
| 345 int cont1 = size1; | 367 int cont1 = size1; |
| 346 int cont2 = size2; | 368 int cont2 = size2; |
| 347 int max1 = size1; | 369 int max1 = size1; |
| 348 int max2 = size2; | 370 int max2 = size2; |
| 349 double num = scores[size1][size2]; | 371 float num = scores[size1][size2]; |
| 350 | 372 |
| 351 if (noEndGapPenalty) | 373 if (noEndGapPenalty) |
| 352 { | 374 { |
| 353 //Search Maxes | 375 //Search Maxes |
| 354 | 376 |