Mercurial > repos > padge > trimal
comparison trimal_repo/source/utils.cpp @ 0:b15a3147e604 draft
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
| author | padge |
|---|---|
| date | Fri, 25 Mar 2022 17:10:43 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b15a3147e604 |
|---|---|
| 1 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** | |
| 2 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** | |
| 3 | |
| 4 trimAl v1.4: a tool for automated alignment trimming in large-scale | |
| 5 phylogenetics analyses. | |
| 6 | |
| 7 readAl v1.4: a tool for automated alignment conversion among different | |
| 8 formats. | |
| 9 | |
| 10 2009-2015 Capella-Gutierrez S. and Gabaldon, T. | |
| 11 [scapella, tgabaldon]@crg.es | |
| 12 | |
| 13 This file is part of trimAl/readAl. | |
| 14 | |
| 15 trimAl/readAl are free software: you can redistribute it and/or modify | |
| 16 it under the terms of the GNU General Public License as published by | |
| 17 the Free Software Foundation, the last available version. | |
| 18 | |
| 19 trimAl/readAl are distributed in the hope that it will be useful, | |
| 20 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| 22 GNU General Public License for more details. | |
| 23 | |
| 24 You should have received a copy of the GNU General Public License | |
| 25 along with trimAl/readAl. If not, see <http://www.gnu.org/licenses/>. | |
| 26 | |
| 27 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** | |
| 28 ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 29 | |
| 30 #include "utils.h" | |
| 31 #include "values.h" | |
| 32 #include "defines.h" | |
| 33 | |
| 34 /*++++++++++++++++++++++++++++++++++++++++++++++++ | |
| 35 | void utils::initVect(int *, int, int) | | |
| 36 | This method is used to initializate all | | |
| 37 | positions of a vector with a given value. | | |
| 38 ++++++++++++++++++++++++++++++++++++++++++++++++*/ | |
| 39 | |
| 40 void utils::initlVect(int *vector, int tam, int valor) { | |
| 41 | |
| 42 for(int i = 0; i < tam; i++) vector[i] = valor; | |
| 43 | |
| 44 } | |
| 45 | |
| 46 void utils::initlVect(float *vector, int tam, float valor) { | |
| 47 | |
| 48 for(int i = 0; i < tam; i++) vector[i] = valor; | |
| 49 | |
| 50 } | |
| 51 | |
| 52 | |
| 53 /*+++++++++++++++++++++++++++++++++++++++++++++ | |
| 54 | void utils::copyVect(int *, int *, int) | | |
| 55 | This method copies integer vector 1 to | | |
| 56 | integer vector 2. | | |
| 57 +++++++++++++++++++++++++++++++++++++++++++++*/ | |
| 58 | |
| 59 void utils::copyVect(int *vect1, int *vect2, int tam) { | |
| 60 | |
| 61 for(int i = 0; i < tam; i++) vect2[i] = vect1[i]; | |
| 62 | |
| 63 } | |
| 64 | |
| 65 | |
| 66 /*+++++++++++++++++++++++++++++++++++++++++++++++ | |
| 67 | void utils::copyVect(float *, float *, float) | | |
| 68 | This method copies float vector 1 to | | |
| 69 | float vector 2. | | |
| 70 +++++++++++++++++++++++++++++++++++++++++++++++*/ | |
| 71 | |
| 72 void utils::copyVect(float *vect1, float *vect2, int tam) { | |
| 73 | |
| 74 for(int i = 0; i < tam; i++) vect2[i] = vect1[i]; | |
| 75 | |
| 76 } | |
| 77 | |
| 78 | |
| 79 /*+++++++++++++++++++++++++++++++++++++++++ | |
| 80 | int utils::roundToInf(double) | | |
| 81 | This method rounds a double number | | |
| 82 | to the inferior integer. | | |
| 83 +++++++++++++++++++++++++++++++++++++++++*/ | |
| 84 | |
| 85 int utils::roundToInf(double number) { | |
| 86 | |
| 87 return ((int) number); | |
| 88 } | |
| 89 | |
| 90 | |
| 91 /*+++++++++++++++++++++++++++++++++++++++++ | |
| 92 | int utils::roundInt(double) | | |
| 93 | This method rounds a double number | | |
| 94 | to a integer. | | |
| 95 +++++++++++++++++++++++++++++++++++++++++*/ | |
| 96 | |
| 97 int utils::roundInt(double number) { | |
| 98 | |
| 99 return ((int) ((double) number + 0.5)); | |
| 100 } | |
| 101 | |
| 102 | |
| 103 /*+++++++++++++++++++++++++++++++++++++++++ | |
| 104 | int utils::roundToSup(double) | | |
| 105 | This method rounds a double number | | |
| 106 | to the greater integer. | | |
| 107 +++++++++++++++++++++++++++++++++++++++++*/ | |
| 108 | |
| 109 int utils::roundToSup(double number) { | |
| 110 | |
| 111 return ((int) ((double) number + 1.0)); | |
| 112 } | |
| 113 | |
| 114 | |
| 115 /*+++++++++++++++++++++++++++++++++++++++++ | |
| 116 | int utils::max(int, int) | | |
| 117 | This method returns the maximum | | |
| 118 | value of the two given arguments. | | |
| 119 +++++++++++++++++++++++++++++++++++++++++*/ | |
| 120 | |
| 121 int utils::max(int x, int y) { | |
| 122 | |
| 123 if(x > y) return x; | |
| 124 else return y; | |
| 125 } | |
| 126 | |
| 127 float utils::max(float x, float y) { | |
| 128 | |
| 129 if(x > y) return x; | |
| 130 else return y; | |
| 131 } | |
| 132 | |
| 133 double utils::max(double x, double y) { | |
| 134 | |
| 135 if(x > y) return x; | |
| 136 else return y; | |
| 137 } | |
| 138 | |
| 139 int utils::min(int x, int y) { | |
| 140 | |
| 141 if(x < y) return x; | |
| 142 else return y; | |
| 143 } | |
| 144 | |
| 145 float utils::min(float x, float y) { | |
| 146 | |
| 147 if(x < y) return x; | |
| 148 else return y; | |
| 149 } | |
| 150 | |
| 151 double utils::min(double x, double y) { | |
| 152 | |
| 153 if(x < y) return x; | |
| 154 else return y; | |
| 155 } | |
| 156 | |
| 157 /*+++++++++++++++++++++++++++++++++++++++++++ | |
| 158 | bool utils::isNumber(char *) | | |
| 159 | This method checks if the given | | |
| 160 | string is a float number. | | |
| 161 +++++++++++++++++++++++++++++++++++++++++++*/ | |
| 162 | |
| 163 bool utils::isNumber(char *num){ | |
| 164 | |
| 165 int tam = strlen(num); | |
| 166 int i, flt = 1, expn = 1, sgn = 1; | |
| 167 | |
| 168 for(i = 0; i < tam; i++) { | |
| 169 if(num[i] == '.' && flt) | |
| 170 flt = 0; | |
| 171 | |
| 172 else if(((num[i] == 'e') ||(num[i] == 'E')) && expn) | |
| 173 expn = 0; | |
| 174 | |
| 175 else if(((num[i] == '+') ||(num[i] == '-')) && sgn) { | |
| 176 if(!expn) sgn = 0; | |
| 177 } | |
| 178 else if(num[i] > '9' || num[i] < '0') | |
| 179 return false; | |
| 180 } | |
| 181 | |
| 182 return true; | |
| 183 | |
| 184 } | |
| 185 | |
| 186 | |
| 187 /*+++++++++++++++++++++++++++++++++++++++++++ | |
| 188 | bool utils::compare(char *, char *) | | |
| 189 | This method compares the two strings | | |
| 190 | given, and returns true if the two | | |
| 191 | strings are equal. | | |
| 192 +++++++++++++++++++++++++++++++++++++++++++*/ | |
| 193 | |
| 194 bool utils::compare(char *a, char *b){ | |
| 195 | |
| 196 return(!strcmp(a,b)); | |
| 197 } | |
| 198 | |
| 199 | |
| 200 /*++++++++++++++++++++++++++++++++++++++++++ | |
| 201 | void utils::removeSpaces(char *, char *) | | |
| 202 | This method removes spaces in the | | |
| 203 | input string and put the result in | | |
| 204 | the output string. | | |
| 205 ++++++++++++++++++++++++++++++++++++++++++*/ | |
| 206 | |
| 207 void utils::removeSpaces(char *in, char *out){ | |
| 208 | |
| 209 unsigned int i, j = 0; | |
| 210 | |
| 211 for(i = 0; i < strlen(in); i++){ | |
| 212 | |
| 213 if(in[i] != ' ' && in[i] != '\t'){ | |
| 214 out[j] = in[i]; | |
| 215 j++; | |
| 216 } | |
| 217 } | |
| 218 out[j] = '\0'; | |
| 219 } | |
| 220 | |
| 221 | |
| 222 /*++++++++++++++++++++++++++++++++++++++++++ | |
| 223 | void utils::quicksort(float *, int, int) | | |
| 224 | This method sorts the vector using | | |
| 225 | the quicksort method. | | |
| 226 ++++++++++++++++++++++++++++++++++++++++++*/ | |
| 227 | |
| 228 void utils::quicksort(float *vect, int ini, int fin) { | |
| 229 | |
| 230 float elem_div; | |
| 231 int i, j; | |
| 232 | |
| 233 if ((ini >= fin) || (fin < 0)) | |
| 234 return; | |
| 235 | |
| 236 elem_div = vect[fin]; | |
| 237 i = ini - 1; | |
| 238 j = fin; | |
| 239 | |
| 240 while (1) { | |
| 241 | |
| 242 while (vect[++i] < elem_div) | |
| 243 if(i == fin) | |
| 244 break; | |
| 245 | |
| 246 while (vect[--j] > elem_div) | |
| 247 if(j == 0) | |
| 248 break; | |
| 249 | |
| 250 if(i < j) | |
| 251 swap(&vect[i], &vect[j]); | |
| 252 else | |
| 253 break; | |
| 254 } | |
| 255 | |
| 256 swap(&vect[i], &vect[fin]); | |
| 257 | |
| 258 quicksort(vect, ini, i - 1); | |
| 259 quicksort(vect, i + 1, fin); | |
| 260 } | |
| 261 | |
| 262 | |
| 263 /*++++++++++++++++++++++++++++++++++++++++ | |
| 264 | void utils::swap(float *, float *) | | |
| 265 | This method swaps the values in a | | |
| 266 | and b. | | |
| 267 ++++++++++++++++++++++++++++++++++++++++*/ | |
| 268 | |
| 269 void utils::swap(float *a, float *b){ | |
| 270 | |
| 271 float temp; | |
| 272 | |
| 273 temp = *a; | |
| 274 *a = *b; | |
| 275 *b = temp; | |
| 276 | |
| 277 } | |
| 278 | |
| 279 /*++++++++++++++++++++++++++++++++++++++++++ | |
| 280 | void utils::quicksort(float *, int, int) | | |
| 281 | This method sorts the vector using | | |
| 282 | the quicksort method. | | |
| 283 ++++++++++++++++++++++++++++++++++++++++++*/ | |
| 284 | |
| 285 void utils::quicksort(int *vect, int ini, int fin) { | |
| 286 | |
| 287 int i, j, elem_div; | |
| 288 | |
| 289 if ((ini >= fin) || (fin < 0)) | |
| 290 return; | |
| 291 | |
| 292 elem_div = vect[fin]; | |
| 293 i = ini - 1; | |
| 294 j = fin; | |
| 295 | |
| 296 while (1) { | |
| 297 | |
| 298 while (vect[++i] < elem_div) | |
| 299 if(i == fin) | |
| 300 break; | |
| 301 | |
| 302 while (vect[--j] > elem_div) | |
| 303 if(j == 0) | |
| 304 break; | |
| 305 | |
| 306 if(i < j) | |
| 307 swap(&vect[i], &vect[j]); | |
| 308 else | |
| 309 break; | |
| 310 } | |
| 311 | |
| 312 swap(&vect[i], &vect[fin]); | |
| 313 | |
| 314 quicksort(vect, ini, i - 1); | |
| 315 quicksort(vect, i + 1, fin); | |
| 316 } | |
| 317 | |
| 318 | |
| 319 /*++++++++++++++++++++++++++++++++++++++++ | |
| 320 | void utils::swap(float *, float *) | | |
| 321 | This method swaps the values in a | | |
| 322 | and b. | | |
| 323 ++++++++++++++++++++++++++++++++++++++++*/ | |
| 324 | |
| 325 void utils::swap(int *a, int *b) { | |
| 326 | |
| 327 int temp; | |
| 328 | |
| 329 temp = *a; | |
| 330 *a = *b; | |
| 331 *b = temp; | |
| 332 | |
| 333 } | |
| 334 | |
| 335 | |
| 336 void utils::quicksort(int **vect, int ini, int fin) { | |
| 337 | |
| 338 float elem_div; | |
| 339 int i, j; | |
| 340 | |
| 341 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 342 if ((ini >= fin) || (fin < 0)) | |
| 343 return; | |
| 344 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 345 | |
| 346 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 347 elem_div = vect[fin][0]; | |
| 348 i = ini - 1; | |
| 349 j = fin; | |
| 350 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 351 | |
| 352 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 353 while (true) { | |
| 354 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 355 while (vect[++i][0] < elem_div) if(i == fin) break; | |
| 356 while (vect[--j][0] > elem_div) if(j == 0) break; | |
| 357 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 358 | |
| 359 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 360 if(i < j) swap(&vect[i], &vect[j]); | |
| 361 else break; | |
| 362 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 363 } | |
| 364 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 365 | |
| 366 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 367 swap(&vect[i], &vect[fin]); | |
| 368 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 369 quicksort(vect, ini, i - 1); | |
| 370 quicksort(vect, i + 1, fin); | |
| 371 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 372 } | |
| 373 | |
| 374 void utils::swap(int **a, int **b){ | |
| 375 | |
| 376 int *temp; | |
| 377 | |
| 378 temp = *a; | |
| 379 *a = *b; | |
| 380 *b = temp; | |
| 381 } | |
| 382 | |
| 383 /* ***************************************************************************** | |
| 384 * | |
| 385 * START - Refactored code | |
| 386 * | |
| 387 * ************************************************************************** */ | |
| 388 | |
| 389 bool utils::checkFile(ifstream &file) { | |
| 390 /* Check if a given file exists and its size is greater than 0 */ | |
| 391 long begin, end; | |
| 392 | |
| 393 /* Check whether input file exists or not */ | |
| 394 if(!file) | |
| 395 return false; | |
| 396 | |
| 397 /* Check input file sizes. A valid file should have a size grater than 0 */ | |
| 398 begin = file.tellg(); | |
| 399 file.seekg(0, ios::end); | |
| 400 end = file.tellg(); | |
| 401 file.seekg(0, ios::beg); | |
| 402 /* Compare difference between file start and end. | |
| 403 * Depending on result, return True or False */ | |
| 404 if(!(end - begin)) | |
| 405 return false; | |
| 406 return true; | |
| 407 } | |
| 408 | |
| 409 char* utils::readLine(ifstream &file) { | |
| 410 /* Read a new line from current input stream. This function is better than | |
| 411 * standard one since cares of operative system compability. It is useful | |
| 412 * as well because remove tabs and blank spaces at lines beginning/ending */ | |
| 413 | |
| 414 int state; | |
| 415 char c = ' '; | |
| 416 string nline; | |
| 417 static char *line = NULL; | |
| 418 | |
| 419 /* Check it the end of the file has been reached or not */ | |
| 420 if(file.eof()) | |
| 421 return NULL; | |
| 422 | |
| 423 /* Store first line found. For -Windows & MacOS compatibility- carriage return | |
| 424 * is considered as well as a new line character */ | |
| 425 for( ; (c != '\n') && ((!file.eof())); file.read(&c, 1)) | |
| 426 if ((c != '\r')) | |
| 427 nline.resize(nline.size() + 1, c); | |
| 428 | |
| 429 /* Remove blank spaces & tabs from the beginning of the line */ | |
| 430 state = nline.find(" ", 0); | |
| 431 while(state != (int) string::npos && state == 0) { | |
| 432 nline.erase(state, 1); | |
| 433 state = nline.find(" ", state); | |
| 434 } | |
| 435 | |
| 436 state = nline.find("\t", 0); | |
| 437 while(state != (int) string::npos && state == 0) { | |
| 438 nline.erase(state, 1); | |
| 439 state = nline.find("\t", state); | |
| 440 } | |
| 441 | |
| 442 /* If there is nothing to return, give back a NULL pointer ... */ | |
| 443 if(nline.size() == 0) | |
| 444 return NULL; | |
| 445 | |
| 446 /* Otherwise, initialize the appropiate data structure, | |
| 447 * dump the data and return it */ | |
| 448 line = new char[nline.size() + 1]; | |
| 449 strcpy(line, nline.c_str()); | |
| 450 return line; | |
| 451 } | |
| 452 | |
| 453 char* utils::trimLine(string nline) { | |
| 454 /* This function is used to remove comments inbetween a biological sequence. | |
| 455 * Remove all content surrounded by ("") or ([]). It wans as well when a | |
| 456 * mismatch for these flags is found */ | |
| 457 | |
| 458 int pos, next; | |
| 459 static char *line; | |
| 460 | |
| 461 /* Set-up lower and upper limit to look for comments inside of input string */ | |
| 462 pos = -1; | |
| 463 | |
| 464 /* Identify comments inside of input sequence and remove it */ | |
| 465 while(true) { | |
| 466 pos = nline.find("\"", (pos + 1)); | |
| 467 | |
| 468 /* When there is not any more a comment inside of sequence, | |
| 469 * go out from this loop */ | |
| 470 if(pos == (int) string::npos) | |
| 471 break; | |
| 472 | |
| 473 /* Look for closing flag */ | |
| 474 next = nline.rfind("\"", nline.size()); | |
| 475 | |
| 476 /* If a pair of comments flags '"' is found, remove everything inbetween */ | |
| 477 if((int) nline.find("\"", (pos + 1)) == next) { | |
| 478 nline.erase(pos, (next - pos + 1)); | |
| 479 pos = -1; | |
| 480 } | |
| 481 | |
| 482 /* If there is only one flag '"' for comments inside of sequence, | |
| 483 * user should be warned about that */ | |
| 484 if (pos == next) { | |
| 485 cerr << endl << "ERROR: Possible (\") mismatch for comments" << endl; | |
| 486 return NULL; | |
| 487 } | |
| 488 } | |
| 489 | |
| 490 /* Look for other kind of comments, in this case those with [] */ | |
| 491 while(true) { | |
| 492 pos = -1; | |
| 493 next = -1; | |
| 494 | |
| 495 /* Search for last opened bracket. It is supposed to be the first one for | |
| 496 * being close */ | |
| 497 while((pos = nline.find("[", (pos + 1))) != (int) string::npos) | |
| 498 next = pos; | |
| 499 | |
| 500 /* If no opening bracket has been found. | |
| 501 * Check if there is any closing one */ | |
| 502 if (next == -1) { | |
| 503 /* There are not any bracket in input string */ | |
| 504 if ((int) nline.find("]", 0) == (int) string::npos) | |
| 505 break; | |
| 506 /* Otherwise, warn about the error */ | |
| 507 cerr << endl << "ERROR: Brackets (]) mismatch found" << endl; | |
| 508 return NULL; | |
| 509 } | |
| 510 | |
| 511 /* Look for closest closing bracket to the opening one found */ | |
| 512 pos = nline.find("]", (next + 1)); | |
| 513 | |
| 514 /* If no closing bracket has been found. Warn about the mismatch */ | |
| 515 if (pos == (int) string::npos) { | |
| 516 cerr << endl << "ERROR: Brackets ([) mismatch found" << endl; | |
| 517 return NULL; | |
| 518 } | |
| 519 | |
| 520 /* When both brackets have been found, remove comments inbetween them */ | |
| 521 nline.erase(next, (pos - next + 1)); | |
| 522 } | |
| 523 | |
| 524 /* Check if after removing all comments from input string there is still part | |
| 525 * of sequences or not */ | |
| 526 if(nline.size() == 0) | |
| 527 return NULL; | |
| 528 | |
| 529 /* Initialize and store resulting sequence into an appropiate structure */ | |
| 530 line = new char[nline.size() + 1]; | |
| 531 strcpy(line, nline.c_str()); | |
| 532 | |
| 533 return line; | |
| 534 } | |
| 535 /* ***************************************************************************** | |
| 536 * | |
| 537 * END - Refactored code | |
| 538 * | |
| 539 * ************************************************************************** */ | |
| 540 string utils::getReverse(string toReverse) { | |
| 541 | |
| 542 string line; | |
| 543 int i; | |
| 544 | |
| 545 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 546 for(i = toReverse.size() - 1; i >= 0; i--) | |
| 547 line += toReverse[i]; | |
| 548 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 549 | |
| 550 return line; | |
| 551 } | |
| 552 | |
| 553 string utils::removeCharacter(char c, string line) { | |
| 554 | |
| 555 int pos; | |
| 556 | |
| 557 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 558 pos = line.find(c, 0); | |
| 559 while(pos != (int) string::npos) { | |
| 560 line.erase(pos, 1); | |
| 561 pos = line.find(c, pos); | |
| 562 } | |
| 563 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 564 | |
| 565 return line; | |
| 566 } | |
| 567 | |
| 568 int utils::checkTypeAlignment(int seqNumber, int residNumber, string *sequences) { | |
| 569 | |
| 570 int i, j, k, l, hitDNA, hitRNA, degenerate, gDNA, gRNA, extDNA, extRNA; | |
| 571 float ratioDNA, ratioRNA; | |
| 572 /* Standard tables */ | |
| 573 char listRNA[11] = "AGCUNagcun"; | |
| 574 char listDNA[11] = "AGCTNagctn"; | |
| 575 | |
| 576 /* Degenerate Nucleotides codes */ | |
| 577 char degeneratedCodes[21] = "MmRrWwSsYyKkVvHhDdBb"; | |
| 578 | |
| 579 /* For each sequences, this method locks at the 100 letters (excluding gaps). | |
| 580 * The method is able to distinguish between pure DNA/RNA nucleotides or those | |
| 581 * containing degenerate Nucleotide letters */ | |
| 582 for(i = 0, gDNA = 0, gRNA = 0, extDNA = 0, extRNA = 0; i < seqNumber; i++) { | |
| 583 | |
| 584 /* Looks at the 100 letters (excluding gaps) while doesn's get the sequence's end */ | |
| 585 /* When there are less than a 100 characters, break the loop before reaching that limit */ | |
| 586 residNumber = (int) sequences[i].size(); | |
| 587 //~ for(j = 0, k = 0, hitDNA = 0, hitRNA = 0, degenerate = 0; j < residNumber && k < 100; j++) | |
| 588 for(j = 0, k = 0, hitDNA = 0, hitRNA = 0, degenerate = 0; j < residNumber; j++) | |
| 589 if(sequences[i][j] != '-' && sequences[i][j] != '.' && sequences[i][j] != '?') { | |
| 590 k++; | |
| 591 | |
| 592 /* Recognizes between DNA and RNA. */ | |
| 593 for(l = 0; l < (int) strlen(listDNA); l++) | |
| 594 if(listDNA[l] == sequences[i][j]) | |
| 595 hitDNA++; | |
| 596 | |
| 597 for(l = 0; l < (int) strlen(listRNA); l++) | |
| 598 if(listRNA[l] == sequences[i][j]) | |
| 599 hitRNA++; | |
| 600 | |
| 601 for(l = 0; l < (int) strlen(degeneratedCodes); l++) | |
| 602 if(degeneratedCodes[l] == sequences[i][j]) | |
| 603 degenerate++; | |
| 604 } | |
| 605 | |
| 606 /* If input sequences have less than 95% of nucleotides, even when residues | |
| 607 * are treated with degenerated codes, consider the input file as containing | |
| 608 * amino-acidic sequences. */ | |
| 609 ratioDNA = float(degenerate + hitDNA)/k; | |
| 610 ratioRNA = float(degenerate + hitRNA)/k; | |
| 611 | |
| 612 if(ratioDNA < 0.95 && ratioRNA < 0.95) | |
| 613 return AAType; | |
| 614 | |
| 615 /* Identify precisely if nucleotides sequences are DNA/RNA strict or | |
| 616 * any degenerate code has been used in the sequence */ | |
| 617 else if(hitRNA > hitDNA && degenerate == 0) | |
| 618 gRNA++; | |
| 619 else if(hitRNA > hitDNA && degenerate != 0) | |
| 620 extRNA++; | |
| 621 else if(hitRNA < hitDNA && degenerate == 0) | |
| 622 gDNA++; | |
| 623 else if(hitRNA < hitDNA && degenerate != 0) | |
| 624 extDNA++; | |
| 625 } | |
| 626 /* Return the datatype with greater values, considering always degenerate | |
| 627 * codes */ | |
| 628 if (extDNA != 0 && extDNA > extRNA) | |
| 629 return DNADeg; | |
| 630 else if (extRNA != 0 && extDNA < extRNA) | |
| 631 return RNADeg; | |
| 632 else if(gRNA > gDNA) | |
| 633 return RNAType; | |
| 634 else | |
| 635 return DNAType; | |
| 636 } | |
| 637 | |
| 638 int* utils::readNumbers_StartEnd(string line) { | |
| 639 | |
| 640 int comma, nElems = 0; | |
| 641 static int *numbers; | |
| 642 | |
| 643 comma = -1; | |
| 644 while((comma = line.find(",", comma + 1)) != (int) string::npos) | |
| 645 nElems += 2; | |
| 646 | |
| 647 //~ If there is more than two numbers separated by a comma, return NULL | |
| 648 if(nElems != 2) | |
| 649 return NULL; | |
| 650 | |
| 651 numbers = new int[2]; | |
| 652 comma = line.find(",", 0); | |
| 653 numbers[0] = atoi(line.substr(0, comma).c_str()); | |
| 654 numbers[1] = atoi(line.substr(comma+1).c_str()); | |
| 655 | |
| 656 return numbers; | |
| 657 } | |
| 658 | |
| 659 | |
| 660 int* utils::readNumbers(string line) { | |
| 661 | |
| 662 int i, comma, separ, init, nElems = 0; | |
| 663 static int *numbers; | |
| 664 | |
| 665 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 666 comma = -1; | |
| 667 while((comma = line.find(",", comma + 1)) != (int) string::npos) | |
| 668 nElems += 2; | |
| 669 nElems += 2; | |
| 670 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 671 | |
| 672 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 673 numbers = new int[nElems + 1]; | |
| 674 numbers[0] = nElems; | |
| 675 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 676 | |
| 677 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 678 init = 0; | |
| 679 i = 1; | |
| 680 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 681 | |
| 682 do { | |
| 683 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 684 comma = line.find(",", init); | |
| 685 separ = line.find("-", init); | |
| 686 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 687 | |
| 688 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 689 if(((separ < comma) || (comma == (int) string::npos)) && (separ != (int) string::npos)) { | |
| 690 numbers[i++] = atoi(line.substr(init, separ - init).c_str()); | |
| 691 numbers[i++] = atoi(line.substr(separ+1, comma - separ - 1).c_str()); | |
| 692 init = comma + 1; | |
| 693 } | |
| 694 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 695 | |
| 696 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 697 else if((separ > comma) || (separ == (int) string::npos)) { | |
| 698 numbers[i++] = atoi(line.substr(init, comma - init).c_str()); | |
| 699 numbers[i++] = atoi(line.substr(init, comma - init).c_str()); | |
| 700 init = comma + 1; | |
| 701 } | |
| 702 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 703 | |
| 704 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 705 if(numbers[i-2] < 0) | |
| 706 return NULL; | |
| 707 if(numbers[i-1] < numbers[i-2]) | |
| 708 return NULL; | |
| 709 if(comma == (int) string::npos) | |
| 710 break; | |
| 711 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 712 } while(true); | |
| 713 | |
| 714 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 715 return numbers; | |
| 716 /* ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 717 } | |
| 718 | |
| 719 | |
| 720 char utils::determineColor(char res, string column) { | |
| 721 | |
| 722 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 723 if(toupper(res) == 'G') | |
| 724 return 'o'; | |
| 725 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 726 else if(toupper(res) == 'P') | |
| 727 return 'y'; | |
| 728 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 729 | |
| 730 else if(res != '-') { | |
| 731 switch(toupper(res)) { | |
| 732 | |
| 733 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 734 /* (W, L, V, I, M, F): {50%, p}{60%, wlvimafcyhp} */ | |
| 735 case 87: case 76: case 86: case 73: case 77: case 70: | |
| 736 if(lookForPattern(column, "p", 0.5)) return 'b'; | |
| 737 else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'b'; | |
| 738 else return 'w'; | |
| 739 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 740 | |
| 741 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 742 /* (A): {50%, p}{60%, wlvimafcyhp}{85% t,s,g} */ | |
| 743 case 65: | |
| 744 if(lookForPattern(column, "p", 0.5)) return 'b'; | |
| 745 else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'b'; | |
| 746 else if(lookForPattern(column, "t", 0.85)) return 'b'; | |
| 747 else if(lookForPattern(column, "s", 0.85)) return 'b'; | |
| 748 else if(lookForPattern(column, "g", 0.85)) return 'b'; | |
| 749 else return 'w'; | |
| 750 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 751 | |
| 752 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 753 /* BLUE: (C): {50%, p}{60%, wlvimafcyhp}{85% s} | |
| 754 * PINK: (C): {85%, c} | |
| 755 */ | |
| 756 case 67: | |
| 757 if(lookForPattern(column, "p", 0.5)) return 'b'; | |
| 758 else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'b'; | |
| 759 else if(lookForPattern(column, "s", 0.85)) return 'b'; | |
| 760 else if(lookForPattern(column, "c", 0.85)) return 'p'; | |
| 761 else return 'w'; | |
| 762 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 763 | |
| 764 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 765 /* (K, R): {60%, kr}{85%, q} */ | |
| 766 case 75: case 82: | |
| 767 if(lookForPattern(column, "kr", 0.6)) return 'r'; | |
| 768 else if(lookForPattern(column, "q", 0.85)) return 'r'; | |
| 769 else return 'w'; | |
| 770 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 771 | |
| 772 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 773 /* (T): {50%, ts}{60%, wlvimafcyhp } */ | |
| 774 case 84: | |
| 775 if(lookForPattern(column, "ts", 0.5)) return 'g'; | |
| 776 else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'g'; | |
| 777 else return 'w'; | |
| 778 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 779 | |
| 780 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 781 /* (S): {50%, ts}{80%, wlvimafcyhp } */ | |
| 782 case 83: | |
| 783 if(lookForPattern(column, "ts", 0.5)) return 'g'; | |
| 784 else if(lookForPattern(column, "wlvimafcyhp", 0.8)) return 'g'; | |
| 785 else return 'w'; | |
| 786 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 787 | |
| 788 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 789 /* (N): {50%, n}{85%, d } */ | |
| 790 case 78: | |
| 791 if(lookForPattern(column, "n", 0.5)) return 'g'; | |
| 792 else if(lookForPattern(column, "d", 0.85)) return 'g'; | |
| 793 else return 'w'; | |
| 794 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 795 | |
| 796 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 797 /* (Q): {50%, qe}{60%, kr} */ | |
| 798 case 81: | |
| 799 if(lookForPattern(column, "qe", 0.5)) return 'g'; | |
| 800 else if(lookForPattern(column, "kr", 0.6)) return 'g'; | |
| 801 else return 'w'; | |
| 802 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 803 | |
| 804 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 805 /* (D): {50%, de, n} */ | |
| 806 case 68: | |
| 807 if(lookForPattern(column, "de", 0.5)) return 'm'; | |
| 808 else if(lookForPattern(column, "n", 0.5)) return 'm'; | |
| 809 else return 'w'; | |
| 810 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 811 | |
| 812 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 813 /* (E): {50%, de,qe} */ | |
| 814 case 69: | |
| 815 if(lookForPattern(column, "de", 0.5)) return 'm'; | |
| 816 else if(lookForPattern(column, "qe", 0.5)) return 'm'; | |
| 817 else return 'w'; | |
| 818 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 819 | |
| 820 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 821 /* (H,Y): {50%, p}{60%, wlvimafcyhp} */ | |
| 822 case 72: case 89: | |
| 823 if(lookForPattern(column, "p", 0.5)) return 'c'; | |
| 824 else if(lookForPattern(column, "wlvimafcyhp", 0.5)) return 'c'; | |
| 825 else return 'w'; | |
| 826 /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ | |
| 827 } | |
| 828 } | |
| 829 return 'w'; | |
| 830 } | |
| 831 | |
| 832 | |
| 833 bool utils::lookForPattern(string column, string dataset, float level) { | |
| 834 | |
| 835 float count = 0; | |
| 836 int i, j; | |
| 837 | |
| 838 for(i = 0; i < (int) column.size(); i++) { | |
| 839 for(j = 0; j < (int) dataset.size(); j++) { | |
| 840 if(toupper(column[i]) == toupper(dataset[j])) { | |
| 841 count++; break; | |
| 842 } | |
| 843 } | |
| 844 } | |
| 845 | |
| 846 if((count/column.size()) >= level) | |
| 847 return true; | |
| 848 else return false; | |
| 849 } | |
| 850 |
