Mercurial > repos > fcaramia > custom_amplicon_alignment
comparison alignCustomAmplicon/utils.cpp @ 5:0aaf65fbb48a draft default tip
Uploaded
author | fcaramia |
---|---|
date | Wed, 20 Mar 2013 00:22:08 -0400 |
parents | d32bddcff685 |
children |
comparison
equal
deleted
inserted
replaced
4:22f35f830f48 | 5:0aaf65fbb48a |
---|---|
4 | 4 |
5 #define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ; | 5 #define max(a,b,c) a > b ? ( a > c ? a : c ) : ( b > c ? b : c ) ; |
6 #define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ; | 6 #define min(a,b,c) a < b ? ( a < c ? a : c ) : ( b < c ? b : c ) ; |
7 using namespace std; | 7 using namespace std; |
8 | 8 |
9 void printMatrix (double* m, int rows, int cols) | 9 void printMatrix (float* m, int rows, int cols) |
10 { | 10 { |
11 for(int i = 0; i < rows; i++) | 11 for(int i = 0; i < rows; i++) |
12 { | 12 { |
13 for(int j = 0; j < cols; j++) | 13 for(int j = 0; j < cols; j++) |
14 { | 14 { |
42 } | 42 } |
43 | 43 |
44 return d[size1-1][size2-1]; // return edit distance | 44 return d[size1-1][size2-1]; // return edit distance |
45 } | 45 } |
46 | 46 |
47 double nw_align2 (string &seq1, string &seq2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) | 47 float nw_align2 (string &seq1, string &seq2, float matchScore, float missmatch_penalty, float gapPenOpen, float gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) |
48 { | 48 { |
49 //Do NW alignment of 2 strings... | 49 //Do NW alignment of 2 strings... |
50 int size1 = seq1.length(); | 50 int size1 = seq1.length(); |
51 int size2 = seq2.length(); | 51 int size2 = seq2.length(); |
52 | 52 |
53 double scores[size1+1][size2+1]; | 53 float scores[size1+1][size2+1]; |
54 char dir[size1+1][size2+1]; | 54 char dir[size1+1][size2+1]; |
55 char gaps[size1+1][size2+1]; | 55 char gaps[size1+1][size2+1]; |
56 | 56 |
57 for(int i = 0; i <= size1; i++) | 57 for(int i = 0; i <= size1; i++) |
58 { | 58 { |
59 scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; | 59 scores[i][0] = (float)(i-1)*gapPenExt + gapPenOpen; |
60 dir[i][0] = 'U'; | 60 dir[i][0] = 'U'; |
61 gaps[i][0] = 'O'; | 61 gaps[i][0] = 'D'; |
62 } | 62 } |
63 for(int j = 0; j <= size2; j++) | 63 for(int j = 0; j <= size2; j++) |
64 { | 64 { |
65 scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; | 65 scores[0][j] = (float)(j-1)*gapPenExt + gapPenOpen; |
66 dir[0][j] = 'L'; | 66 dir[0][j] = 'L'; |
67 gaps[0][j] = 'O'; | 67 gaps[0][j] = 'I'; |
68 } | 68 } |
69 | 69 |
70 scores[0][0] = 0; | 70 scores[0][0] = 0; |
71 gaps[0][0] = 'N'; | 71 gaps[0][0] = 'N'; |
72 | 72 |
74 { | 74 { |
75 for(int i = 0; i <= size1; i++) scores[i][0] = 0; | 75 for(int i = 0; i <= size1; i++) scores[i][0] = 0; |
76 for(int j = 0; j <= size2; j++) scores[0][j] = 0; | 76 for(int j = 0; j <= size2; j++) scores[0][j] = 0; |
77 } | 77 } |
78 | 78 |
79 double match; | 79 float match; |
80 double del; | 80 float del; |
81 double insert; | 81 float insert; |
82 | 82 |
83 for(int i = 1; i <= size1; i++) | 83 for(int i = 1; i <= size1; i++) |
84 for(int j = 1; j <= size2; j++) | 84 for(int j = 1; j <= size2; j++) |
85 { | 85 { |
86 if (seq1[i-1] == 'N' || seq2[j-1] == 'N') | 86 if (seq1[i-1] == 'N' || seq2[j-1] == 'N') |
87 { | 87 { |
88 match = scores[i-1][j-1]; | 88 match = scores[i-1][j-1]; |
89 } | 89 } |
90 else | 90 else |
91 { | 91 { |
92 match = scores[i-1][j-1] + (double)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty); //1.9f clustalw | 92 match = scores[i-1][j-1] + (float)(seq1[i-1] == seq2[j-1]? matchScore : missmatch_penalty); //1.9f clustalw |
93 } | 93 } |
94 del = scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); | 94 del = scores[i-1][j] + (gaps[i-1][j] != 'D'? gapPenOpen:gapPenExt); |
95 insert = scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); | 95 insert = scores[i][j-1] + (gaps[i][j-1] != 'I'? gapPenOpen:gapPenExt); |
96 | |
96 //debug | 97 //debug |
97 //printf("I:%d J:%d match = %f del %f insert:%f\n",i,j,match,del,insert); | 98 //if (match == del || match == insert) |
98 | 99 //{ |
99 if (match>del && match > insert) | 100 //printf("I:%d J:%d match = %f del %f insert %f %c %c\n",i,j,match,del,insert,seq1[i-1],seq2[j-1]); |
101 //} | |
102 | |
103 if (match > del && match > insert) | |
100 { | 104 { |
101 scores[i][j] = match; | 105 scores[i][j] = match; |
102 dir[i][j] = 'D'; | 106 dir[i][j] = 'D'; |
103 gaps[i][j] = 'N'; | 107 gaps[i][j] = 'N'; |
104 } | 108 } |
109 else if(match >= del && match >= insert && seq1[i-1] != seq2[j-1]) | |
110 { | |
111 scores[i][j] = match; | |
112 dir[i][j] = 'D'; | |
113 gaps[i][j] = 'N'; | |
114 } | |
105 else if(del > insert) | 115 else if(del > insert) |
106 { | 116 { |
107 scores[i][j] = del; | 117 scores[i][j] = del; |
108 dir[i][j] = 'U'; | 118 dir[i][j] = 'U'; |
109 gaps[i][j] = 'O'; | 119 gaps[i][j] = 'D'; |
110 } | 120 } |
111 else | 121 else |
112 { | 122 { |
113 scores[i][j] = insert; | 123 scores[i][j] = insert; |
114 dir[i][j] = 'L'; | 124 dir[i][j] = 'L'; |
115 gaps[i][j] = 'O'; | 125 gaps[i][j] = 'I'; |
116 } | 126 } |
117 } | 127 } |
118 | 128 |
119 //debug | 129 //debug |
120 //if (debug) | 130 //if (debug) |
127 | 137 |
128 int cont1 = size1; | 138 int cont1 = size1; |
129 int cont2 = size2; | 139 int cont2 = size2; |
130 int max1 = size1; | 140 int max1 = size1; |
131 int max2 = size2; | 141 int max2 = size2; |
132 double num = scores[size1][size2];; | 142 float num = scores[size1][size2];; |
133 | 143 |
134 if (noEndGapPenalty) | 144 if (noEndGapPenalty) |
135 { | 145 { |
136 //Search Maxes | 146 //Search Maxes |
137 | 147 |
238 | 248 |
239 return num; | 249 return num; |
240 | 250 |
241 } | 251 } |
242 | 252 |
243 vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, double matchScore, double missmatch_penalty, double gapPenOpen, double gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) | 253 vector <string> nw_alignAlignments (vector<string> a1, vector<string> a2, float matchScore, float missmatch_penalty, float gapPenOpen, float gapPenExt, bool noFrontGapPenalty, bool noEndGapPenalty, bool debug) |
244 { | 254 { |
245 //Do NW alignment of 2 strings... | 255 //Do NW alignment of 2 strings... |
246 int size1 = a1[0].length(); | 256 int size1 = a1[0].length(); |
247 int size2 = a2[0].length(); | 257 int size2 = a2[0].length(); |
248 int v1 = a1.size(); | 258 int v1 = a1.size(); |
258 for (int i=0;i<v2;++i) | 268 for (int i=0;i<v2;++i) |
259 alignment2.push_back(string()); | 269 alignment2.push_back(string()); |
260 | 270 |
261 | 271 |
262 | 272 |
263 double scores[size1+1][size2+1]; | 273 float scores[size1+1][size2+1]; |
264 char dir[size1+1][size2+1]; | 274 char dir[size1+1][size2+1]; |
265 char gaps[size1+1][size2+1]; | 275 char gaps[size1+1][size2+1]; |
266 | 276 |
267 for(int i = 0; i <= size1; i++) | 277 for(int i = 0; i <= size1; i++) |
268 { | 278 { |
269 scores[i][0] = (double)(i-1)*gapPenExt + gapPenOpen; | 279 scores[i][0] = (float)(i-1)*gapPenExt + gapPenOpen; |
270 dir[i][0] = 'U'; | 280 dir[i][0] = 'U'; |
271 gaps[i][0] = 'O'; | 281 gaps[i][0] = 'D'; |
272 } | 282 } |
273 for(int j = 0; j <= size2; j++) | 283 for(int j = 0; j <= size2; j++) |
274 { | 284 { |
275 scores[0][j] = (double)(j-1)*gapPenExt + gapPenOpen; | 285 scores[0][j] = (float)(j-1)*gapPenExt + gapPenOpen; |
276 dir[0][j] = 'L'; | 286 dir[0][j] = 'L'; |
277 gaps[0][j] = 'O'; | 287 gaps[0][j] = 'I'; |
278 } | 288 } |
279 | 289 |
280 scores[0][0] = 0; | 290 scores[0][0] = 0; |
281 gaps[0][0] = 'N'; | 291 gaps[0][0] = 'N'; |
282 | 292 |
284 { | 294 { |
285 for(int i = 0; i <= size1; i++) scores[i][0] = 0; | 295 for(int i = 0; i <= size1; i++) scores[i][0] = 0; |
286 for(int j = 0; j <= size2; j++) scores[0][j] = 0; | 296 for(int j = 0; j <= size2; j++) scores[0][j] = 0; |
287 } | 297 } |
288 | 298 |
289 double match; | 299 float match; |
290 double del; | 300 float del; |
291 double insert; | 301 float insert; |
302 bool missmatch; | |
292 | 303 |
293 for(int i = 1; i <= size1; i++) | 304 for(int i = 1; i <= size1; i++) |
294 for(int j = 1; j <= size2; j++) | 305 for(int j = 1; j <= size2; j++) |
295 { | 306 { |
296 | 307 |
297 match = del = insert = 0.0f; | 308 match = del = insert = 0.0f; |
309 missmatch = true; | |
298 for(int z=0;z<v1;++z) | 310 for(int z=0;z<v1;++z) |
299 { | 311 { |
300 for(int w=0;w<v2;++w) | 312 for(int w=0;w<v2;++w) |
301 { | 313 { |
302 if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N') | 314 if (a1[z][i-1] == 'N' || a2[w][j-1] == 'N') |
303 { | 315 { |
304 match += scores[i-1][j-1]; | 316 match += scores[i-1][j-1]; |
305 } | 317 } |
306 else | 318 else |
307 { | 319 { |
308 match += scores[i-1][j-1] + (double)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty); | 320 match += scores[i-1][j-1] + (float)(a1[z][i-1] == a2[w][j-1]? matchScore:missmatch_penalty); |
321 if(a1[z][i-1] == a2[w][j-1]) | |
322 { | |
323 missmatch = false; | |
324 } | |
309 } | 325 } |
310 del += scores[i-1][j] + (gaps[i-1][j] == 'N'? gapPenOpen:gapPenExt); | 326 del += scores[i-1][j] + (gaps[i-1][j] != 'D'? gapPenOpen:gapPenExt); |
311 insert += scores[i][j-1] + (gaps[i][j-1] == 'N'? gapPenOpen:gapPenExt); | 327 insert += scores[i][j-1] + (gaps[i][j-1] != 'I'? gapPenOpen:gapPenExt); |
312 } | 328 } |
313 } | 329 } |
314 | 330 |
315 match /= combs; | 331 match /= combs; |
316 del /= combs; | 332 del /= combs; |
320 { | 336 { |
321 scores[i][j] = match; | 337 scores[i][j] = match; |
322 dir[i][j] = 'D'; | 338 dir[i][j] = 'D'; |
323 gaps[i][j] = 'N'; | 339 gaps[i][j] = 'N'; |
324 } | 340 } |
341 else if(match >= del && match >= insert && missmatch) | |
342 { | |
343 scores[i][j] = match; | |
344 dir[i][j] = 'D'; | |
345 gaps[i][j] = 'N'; | |
346 } | |
325 else if(del > insert) | 347 else if(del > insert) |
326 { | 348 { |
327 scores[i][j] = del; | 349 scores[i][j] = del; |
328 dir[i][j] = 'U'; | 350 dir[i][j] = 'U'; |
329 gaps[i][j] = 'O'; | 351 gaps[i][j] = 'D'; |
330 } | 352 } |
331 else | 353 else |
332 { | 354 { |
333 scores[i][j] = insert; | 355 scores[i][j] = insert; |
334 dir[i][j] = 'L'; | 356 dir[i][j] = 'L'; |
335 gaps[i][j] = 'O'; | 357 gaps[i][j] = 'I'; |
336 } | 358 } |
337 } | 359 } |
338 | 360 |
339 //debug | 361 //debug |
340 // if (debug) | 362 // if (debug) |
344 | 366 |
345 int cont1 = size1; | 367 int cont1 = size1; |
346 int cont2 = size2; | 368 int cont2 = size2; |
347 int max1 = size1; | 369 int max1 = size1; |
348 int max2 = size2; | 370 int max2 = size2; |
349 double num = scores[size1][size2]; | 371 float num = scores[size1][size2]; |
350 | 372 |
351 if (noEndGapPenalty) | 373 if (noEndGapPenalty) |
352 { | 374 { |
353 //Search Maxes | 375 //Search Maxes |
354 | 376 |