Mercurial > repos > padge > trimal
diff trimal_repo/source/utils.cpp @ 0:b15a3147e604 draft
"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"
author | padge |
---|---|
date | Fri, 25 Mar 2022 17:10:43 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/trimal_repo/source/utils.cpp Fri Mar 25 17:10:43 2022 +0000 @@ -0,0 +1,850 @@ +/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** + ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** + + trimAl v1.4: a tool for automated alignment trimming in large-scale + phylogenetics analyses. + + readAl v1.4: a tool for automated alignment conversion among different + formats. + + 2009-2015 Capella-Gutierrez S. and Gabaldon, T. + [scapella, tgabaldon]@crg.es + + This file is part of trimAl/readAl. + + trimAl/readAl are free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, the last available version. + + trimAl/readAl are distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with trimAl/readAl. If not, see <http://www.gnu.org/licenses/>. + +***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** +***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + +#include "utils.h" +#include "values.h" +#include "defines.h" + +/*++++++++++++++++++++++++++++++++++++++++++++++++ +| void utils::initVect(int *, int, int) | +| This method is used to initializate all | +| positions of a vector with a given value. | +++++++++++++++++++++++++++++++++++++++++++++++++*/ + +void utils::initlVect(int *vector, int tam, int valor) { + + for(int i = 0; i < tam; i++) vector[i] = valor; + +} + +void utils::initlVect(float *vector, int tam, float valor) { + + for(int i = 0; i < tam; i++) vector[i] = valor; + +} + + +/*+++++++++++++++++++++++++++++++++++++++++++++ +| void utils::copyVect(int *, int *, int) | +| This method copies integer vector 1 to | +| integer vector 2. | ++++++++++++++++++++++++++++++++++++++++++++++*/ + +void utils::copyVect(int *vect1, int *vect2, int tam) { + + for(int i = 0; i < tam; i++) vect2[i] = vect1[i]; + +} + + +/*+++++++++++++++++++++++++++++++++++++++++++++++ +| void utils::copyVect(float *, float *, float) | +| This method copies float vector 1 to | +| float vector 2. | ++++++++++++++++++++++++++++++++++++++++++++++++*/ + +void utils::copyVect(float *vect1, float *vect2, int tam) { + + for(int i = 0; i < tam; i++) vect2[i] = vect1[i]; + +} + + +/*+++++++++++++++++++++++++++++++++++++++++ +| int utils::roundToInf(double) | +| This method rounds a double number | +| to the inferior integer. | ++++++++++++++++++++++++++++++++++++++++++*/ + +int utils::roundToInf(double number) { + + return ((int) number); +} + + +/*+++++++++++++++++++++++++++++++++++++++++ +| int utils::roundInt(double) | +| This method rounds a double number | +| to a integer. | ++++++++++++++++++++++++++++++++++++++++++*/ + +int utils::roundInt(double number) { + + return ((int) ((double) number + 0.5)); +} + + +/*+++++++++++++++++++++++++++++++++++++++++ +| int utils::roundToSup(double) | +| This method rounds a double number | +| to the greater integer. | ++++++++++++++++++++++++++++++++++++++++++*/ + +int utils::roundToSup(double number) { + + return ((int) ((double) number + 1.0)); +} + + +/*+++++++++++++++++++++++++++++++++++++++++ +| int utils::max(int, int) | +| This method returns the maximum | +| value of the two given arguments. | ++++++++++++++++++++++++++++++++++++++++++*/ + +int utils::max(int x, int y) { + + if(x > y) return x; + else return y; +} + +float utils::max(float x, float y) { + + if(x > y) return x; + else return y; +} + +double utils::max(double x, double y) { + + if(x > y) return x; + else return y; +} + +int utils::min(int x, int y) { + + if(x < y) return x; + else return y; +} + +float utils::min(float x, float y) { + + if(x < y) return x; + else return y; +} + +double utils::min(double x, double y) { + + if(x < y) return x; + else return y; +} + +/*+++++++++++++++++++++++++++++++++++++++++++ +| bool utils::isNumber(char *) | +| This method checks if the given | +| string is a float number. | ++++++++++++++++++++++++++++++++++++++++++++*/ + +bool utils::isNumber(char *num){ + + int tam = strlen(num); + int i, flt = 1, expn = 1, sgn = 1; + + for(i = 0; i < tam; i++) { + if(num[i] == '.' && flt) + flt = 0; + + else if(((num[i] == 'e') ||(num[i] == 'E')) && expn) + expn = 0; + + else if(((num[i] == '+') ||(num[i] == '-')) && sgn) { + if(!expn) sgn = 0; + } + else if(num[i] > '9' || num[i] < '0') + return false; + } + + return true; + +} + + +/*+++++++++++++++++++++++++++++++++++++++++++ +| bool utils::compare(char *, char *) | +| This method compares the two strings | +| given, and returns true if the two | +| strings are equal. | ++++++++++++++++++++++++++++++++++++++++++++*/ + +bool utils::compare(char *a, char *b){ + + return(!strcmp(a,b)); +} + + +/*++++++++++++++++++++++++++++++++++++++++++ +| void utils::removeSpaces(char *, char *) | +| This method removes spaces in the | +| input string and put the result in | +| the output string. | +++++++++++++++++++++++++++++++++++++++++++*/ + +void utils::removeSpaces(char *in, char *out){ + + unsigned int i, j = 0; + + for(i = 0; i < strlen(in); i++){ + + if(in[i] != ' ' && in[i] != '\t'){ + out[j] = in[i]; + j++; + } + } + out[j] = '\0'; +} + + +/*++++++++++++++++++++++++++++++++++++++++++ +| void utils::quicksort(float *, int, int) | +| This method sorts the vector using | +| the quicksort method. | +++++++++++++++++++++++++++++++++++++++++++*/ + +void utils::quicksort(float *vect, int ini, int fin) { + + float elem_div; + int i, j; + + if ((ini >= fin) || (fin < 0)) + return; + + elem_div = vect[fin]; + i = ini - 1; + j = fin; + + while (1) { + + while (vect[++i] < elem_div) + if(i == fin) + break; + + while (vect[--j] > elem_div) + if(j == 0) + break; + + if(i < j) + swap(&vect[i], &vect[j]); + else + break; + } + + swap(&vect[i], &vect[fin]); + + quicksort(vect, ini, i - 1); + quicksort(vect, i + 1, fin); +} + + +/*++++++++++++++++++++++++++++++++++++++++ +| void utils::swap(float *, float *) | +| This method swaps the values in a | +| and b. | +++++++++++++++++++++++++++++++++++++++++*/ + +void utils::swap(float *a, float *b){ + + float temp; + + temp = *a; + *a = *b; + *b = temp; + +} + +/*++++++++++++++++++++++++++++++++++++++++++ +| void utils::quicksort(float *, int, int) | +| This method sorts the vector using | +| the quicksort method. | +++++++++++++++++++++++++++++++++++++++++++*/ + +void utils::quicksort(int *vect, int ini, int fin) { + + int i, j, elem_div; + + if ((ini >= fin) || (fin < 0)) + return; + + elem_div = vect[fin]; + i = ini - 1; + j = fin; + + while (1) { + + while (vect[++i] < elem_div) + if(i == fin) + break; + + while (vect[--j] > elem_div) + if(j == 0) + break; + + if(i < j) + swap(&vect[i], &vect[j]); + else + break; + } + + swap(&vect[i], &vect[fin]); + + quicksort(vect, ini, i - 1); + quicksort(vect, i + 1, fin); +} + + +/*++++++++++++++++++++++++++++++++++++++++ +| void utils::swap(float *, float *) | +| This method swaps the values in a | +| and b. | +++++++++++++++++++++++++++++++++++++++++*/ + +void utils::swap(int *a, int *b) { + + int temp; + + temp = *a; + *a = *b; + *b = temp; + +} + + +void utils::quicksort(int **vect, int ini, int fin) { + + float elem_div; + int i, j; + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + if ((ini >= fin) || (fin < 0)) + return; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + elem_div = vect[fin][0]; + i = ini - 1; + j = fin; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + while (true) { + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + while (vect[++i][0] < elem_div) if(i == fin) break; + while (vect[--j][0] > elem_div) if(j == 0) break; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + if(i < j) swap(&vect[i], &vect[j]); + else break; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + } + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + swap(&vect[i], &vect[fin]); + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + quicksort(vect, ini, i - 1); + quicksort(vect, i + 1, fin); + /* ***** ***** ***** ***** ***** ***** ***** ***** */ +} + +void utils::swap(int **a, int **b){ + + int *temp; + + temp = *a; + *a = *b; + *b = temp; +} + +/* ***************************************************************************** + * + * START - Refactored code + * + * ************************************************************************** */ + +bool utils::checkFile(ifstream &file) { + /* Check if a given file exists and its size is greater than 0 */ + long begin, end; + + /* Check whether input file exists or not */ + if(!file) + return false; + + /* Check input file sizes. A valid file should have a size grater than 0 */ + begin = file.tellg(); + file.seekg(0, ios::end); + end = file.tellg(); + file.seekg(0, ios::beg); + /* Compare difference between file start and end. + * Depending on result, return True or False */ + if(!(end - begin)) + return false; + return true; +} + +char* utils::readLine(ifstream &file) { + /* Read a new line from current input stream. This function is better than + * standard one since cares of operative system compability. It is useful + * as well because remove tabs and blank spaces at lines beginning/ending */ + + int state; + char c = ' '; + string nline; + static char *line = NULL; + + /* Check it the end of the file has been reached or not */ + if(file.eof()) + return NULL; + + /* Store first line found. For -Windows & MacOS compatibility- carriage return + * is considered as well as a new line character */ + for( ; (c != '\n') && ((!file.eof())); file.read(&c, 1)) + if ((c != '\r')) + nline.resize(nline.size() + 1, c); + + /* Remove blank spaces & tabs from the beginning of the line */ + state = nline.find(" ", 0); + while(state != (int) string::npos && state == 0) { + nline.erase(state, 1); + state = nline.find(" ", state); + } + + state = nline.find("\t", 0); + while(state != (int) string::npos && state == 0) { + nline.erase(state, 1); + state = nline.find("\t", state); + } + + /* If there is nothing to return, give back a NULL pointer ... */ + if(nline.size() == 0) + return NULL; + + /* Otherwise, initialize the appropiate data structure, + * dump the data and return it */ + line = new char[nline.size() + 1]; + strcpy(line, nline.c_str()); + return line; +} + +char* utils::trimLine(string nline) { + /* This function is used to remove comments inbetween a biological sequence. + * Remove all content surrounded by ("") or ([]). It wans as well when a + * mismatch for these flags is found */ + + int pos, next; + static char *line; + + /* Set-up lower and upper limit to look for comments inside of input string */ + pos = -1; + + /* Identify comments inside of input sequence and remove it */ + while(true) { + pos = nline.find("\"", (pos + 1)); + + /* When there is not any more a comment inside of sequence, + * go out from this loop */ + if(pos == (int) string::npos) + break; + + /* Look for closing flag */ + next = nline.rfind("\"", nline.size()); + + /* If a pair of comments flags '"' is found, remove everything inbetween */ + if((int) nline.find("\"", (pos + 1)) == next) { + nline.erase(pos, (next - pos + 1)); + pos = -1; + } + + /* If there is only one flag '"' for comments inside of sequence, + * user should be warned about that */ + if (pos == next) { + cerr << endl << "ERROR: Possible (\") mismatch for comments" << endl; + return NULL; + } + } + + /* Look for other kind of comments, in this case those with [] */ + while(true) { + pos = -1; + next = -1; + + /* Search for last opened bracket. It is supposed to be the first one for + * being close */ + while((pos = nline.find("[", (pos + 1))) != (int) string::npos) + next = pos; + + /* If no opening bracket has been found. + * Check if there is any closing one */ + if (next == -1) { + /* There are not any bracket in input string */ + if ((int) nline.find("]", 0) == (int) string::npos) + break; + /* Otherwise, warn about the error */ + cerr << endl << "ERROR: Brackets (]) mismatch found" << endl; + return NULL; + } + + /* Look for closest closing bracket to the opening one found */ + pos = nline.find("]", (next + 1)); + + /* If no closing bracket has been found. Warn about the mismatch */ + if (pos == (int) string::npos) { + cerr << endl << "ERROR: Brackets ([) mismatch found" << endl; + return NULL; + } + + /* When both brackets have been found, remove comments inbetween them */ + nline.erase(next, (pos - next + 1)); + } + + /* Check if after removing all comments from input string there is still part + * of sequences or not */ + if(nline.size() == 0) + return NULL; + + /* Initialize and store resulting sequence into an appropiate structure */ + line = new char[nline.size() + 1]; + strcpy(line, nline.c_str()); + + return line; +} +/* ***************************************************************************** + * + * END - Refactored code + * + * ************************************************************************** */ +string utils::getReverse(string toReverse) { + + string line; + int i; + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + for(i = toReverse.size() - 1; i >= 0; i--) + line += toReverse[i]; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + return line; +} + +string utils::removeCharacter(char c, string line) { + + int pos; + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + pos = line.find(c, 0); + while(pos != (int) string::npos) { + line.erase(pos, 1); + pos = line.find(c, pos); + } + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + return line; +} + +int utils::checkTypeAlignment(int seqNumber, int residNumber, string *sequences) { + + int i, j, k, l, hitDNA, hitRNA, degenerate, gDNA, gRNA, extDNA, extRNA; + float ratioDNA, ratioRNA; + /* Standard tables */ + char listRNA[11] = "AGCUNagcun"; + char listDNA[11] = "AGCTNagctn"; + + /* Degenerate Nucleotides codes */ + char degeneratedCodes[21] = "MmRrWwSsYyKkVvHhDdBb"; + + /* For each sequences, this method locks at the 100 letters (excluding gaps). + * The method is able to distinguish between pure DNA/RNA nucleotides or those + * containing degenerate Nucleotide letters */ + for(i = 0, gDNA = 0, gRNA = 0, extDNA = 0, extRNA = 0; i < seqNumber; i++) { + + /* Looks at the 100 letters (excluding gaps) while doesn's get the sequence's end */ + /* When there are less than a 100 characters, break the loop before reaching that limit */ + residNumber = (int) sequences[i].size(); + //~ for(j = 0, k = 0, hitDNA = 0, hitRNA = 0, degenerate = 0; j < residNumber && k < 100; j++) + for(j = 0, k = 0, hitDNA = 0, hitRNA = 0, degenerate = 0; j < residNumber; j++) + if(sequences[i][j] != '-' && sequences[i][j] != '.' && sequences[i][j] != '?') { + k++; + + /* Recognizes between DNA and RNA. */ + for(l = 0; l < (int) strlen(listDNA); l++) + if(listDNA[l] == sequences[i][j]) + hitDNA++; + + for(l = 0; l < (int) strlen(listRNA); l++) + if(listRNA[l] == sequences[i][j]) + hitRNA++; + + for(l = 0; l < (int) strlen(degeneratedCodes); l++) + if(degeneratedCodes[l] == sequences[i][j]) + degenerate++; + } + + /* If input sequences have less than 95% of nucleotides, even when residues + * are treated with degenerated codes, consider the input file as containing + * amino-acidic sequences. */ + ratioDNA = float(degenerate + hitDNA)/k; + ratioRNA = float(degenerate + hitRNA)/k; + + if(ratioDNA < 0.95 && ratioRNA < 0.95) + return AAType; + + /* Identify precisely if nucleotides sequences are DNA/RNA strict or + * any degenerate code has been used in the sequence */ + else if(hitRNA > hitDNA && degenerate == 0) + gRNA++; + else if(hitRNA > hitDNA && degenerate != 0) + extRNA++; + else if(hitRNA < hitDNA && degenerate == 0) + gDNA++; + else if(hitRNA < hitDNA && degenerate != 0) + extDNA++; + } + /* Return the datatype with greater values, considering always degenerate + * codes */ + if (extDNA != 0 && extDNA > extRNA) + return DNADeg; + else if (extRNA != 0 && extDNA < extRNA) + return RNADeg; + else if(gRNA > gDNA) + return RNAType; + else + return DNAType; +} + +int* utils::readNumbers_StartEnd(string line) { + + int comma, nElems = 0; + static int *numbers; + + comma = -1; + while((comma = line.find(",", comma + 1)) != (int) string::npos) + nElems += 2; + + //~ If there is more than two numbers separated by a comma, return NULL + if(nElems != 2) + return NULL; + + numbers = new int[2]; + comma = line.find(",", 0); + numbers[0] = atoi(line.substr(0, comma).c_str()); + numbers[1] = atoi(line.substr(comma+1).c_str()); + + return numbers; +} + + +int* utils::readNumbers(string line) { + + int i, comma, separ, init, nElems = 0; + static int *numbers; + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + comma = -1; + while((comma = line.find(",", comma + 1)) != (int) string::npos) + nElems += 2; + nElems += 2; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + numbers = new int[nElems + 1]; + numbers[0] = nElems; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + init = 0; + i = 1; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + do { + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + comma = line.find(",", init); + separ = line.find("-", init); + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + if(((separ < comma) || (comma == (int) string::npos)) && (separ != (int) string::npos)) { + numbers[i++] = atoi(line.substr(init, separ - init).c_str()); + numbers[i++] = atoi(line.substr(separ+1, comma - separ - 1).c_str()); + init = comma + 1; + } + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + else if((separ > comma) || (separ == (int) string::npos)) { + numbers[i++] = atoi(line.substr(init, comma - init).c_str()); + numbers[i++] = atoi(line.substr(init, comma - init).c_str()); + init = comma + 1; + } + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + if(numbers[i-2] < 0) + return NULL; + if(numbers[i-1] < numbers[i-2]) + return NULL; + if(comma == (int) string::npos) + break; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + } while(true); + + /* ***** ***** ***** ***** ***** ***** ***** ***** */ + return numbers; + /* ***** ***** ***** ***** ***** ***** ***** ***** */ +} + + +char utils::determineColor(char res, string column) { + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + if(toupper(res) == 'G') + return 'o'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + else if(toupper(res) == 'P') + return 'y'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + else if(res != '-') { + switch(toupper(res)) { + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (W, L, V, I, M, F): {50%, p}{60%, wlvimafcyhp} */ + case 87: case 76: case 86: case 73: case 77: case 70: + if(lookForPattern(column, "p", 0.5)) return 'b'; + else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'b'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (A): {50%, p}{60%, wlvimafcyhp}{85% t,s,g} */ + case 65: + if(lookForPattern(column, "p", 0.5)) return 'b'; + else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'b'; + else if(lookForPattern(column, "t", 0.85)) return 'b'; + else if(lookForPattern(column, "s", 0.85)) return 'b'; + else if(lookForPattern(column, "g", 0.85)) return 'b'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* BLUE: (C): {50%, p}{60%, wlvimafcyhp}{85% s} + * PINK: (C): {85%, c} + */ + case 67: + if(lookForPattern(column, "p", 0.5)) return 'b'; + else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'b'; + else if(lookForPattern(column, "s", 0.85)) return 'b'; + else if(lookForPattern(column, "c", 0.85)) return 'p'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (K, R): {60%, kr}{85%, q} */ + case 75: case 82: + if(lookForPattern(column, "kr", 0.6)) return 'r'; + else if(lookForPattern(column, "q", 0.85)) return 'r'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (T): {50%, ts}{60%, wlvimafcyhp } */ + case 84: + if(lookForPattern(column, "ts", 0.5)) return 'g'; + else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'g'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (S): {50%, ts}{80%, wlvimafcyhp } */ + case 83: + if(lookForPattern(column, "ts", 0.5)) return 'g'; + else if(lookForPattern(column, "wlvimafcyhp", 0.8)) return 'g'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (N): {50%, n}{85%, d } */ + case 78: + if(lookForPattern(column, "n", 0.5)) return 'g'; + else if(lookForPattern(column, "d", 0.85)) return 'g'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (Q): {50%, qe}{60%, kr} */ + case 81: + if(lookForPattern(column, "qe", 0.5)) return 'g'; + else if(lookForPattern(column, "kr", 0.6)) return 'g'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (D): {50%, de, n} */ + case 68: + if(lookForPattern(column, "de", 0.5)) return 'm'; + else if(lookForPattern(column, "n", 0.5)) return 'm'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (E): {50%, de,qe} */ + case 69: + if(lookForPattern(column, "de", 0.5)) return 'm'; + else if(lookForPattern(column, "qe", 0.5)) return 'm'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + /* (H,Y): {50%, p}{60%, wlvimafcyhp} */ + case 72: case 89: + if(lookForPattern(column, "p", 0.5)) return 'c'; + else if(lookForPattern(column, "wlvimafcyhp", 0.5)) return 'c'; + else return 'w'; + /* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */ + } + } + return 'w'; +} + + +bool utils::lookForPattern(string column, string dataset, float level) { + + float count = 0; + int i, j; + + for(i = 0; i < (int) column.size(); i++) { + for(j = 0; j < (int) dataset.size(); j++) { + if(toupper(column[i]) == toupper(dataset[j])) { + count++; break; + } + } + } + + if((count/column.size()) >= level) + return true; + else return false; +} +