trimal: trimal_repo/source/utils.cpp comparison

comparison trimal_repo/source/utils.cpp @ 0:b15a3147e604 draft

"planemo upload for repository https://github.com/inab/trimal commit cbe1e8577ecb1a46709034a40dff36052e876e7a-dirty"

author	padge
date	Fri, 25 Mar 2022 17:10:43 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:b15a3147e604
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
+***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
+trimAl v1.4: a tool for automated alignment trimming in large-scale
+phylogenetics analyses.
+readAl v1.4: a tool for automated alignment conversion among different
+formats.
+2009-2015 Capella-Gutierrez S. and Gabaldon, T.
+[scapella, tgabaldon]@crg.es
+This file is part of trimAl/readAl.
+trimAl/readAl are free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, the last available version.
+trimAl/readAl are distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with trimAl/readAl. If not, see <http://www.gnu.org/licenses/>.
+***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
+***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+#include "utils.h"
+#include "values.h"
+#include "defines.h"
+/*++++++++++++++++++++++++++++++++++++++++++++++++
+| void utils::initVect(int *, int, int)          |
+|      This method is used to initializate all   |
+|      positions of a vector with a given value. |
+++++++++++++++++++++++++++++++++++++++++++++++++*/
+void utils::initlVect(int *vector, int tam, int valor) {
+for(int i = 0; i < tam; i++) vector[i] = valor;
+}
+void utils::initlVect(float *vector, int tam, float valor) {
+for(int i = 0; i < tam; i++) vector[i] = valor;
+}
+/*+++++++++++++++++++++++++++++++++++++++++++++
+| void utils::copyVect(int *, int *, int)     |
+|      This method copies integer vector 1 to |
+|      integer vector 2.                      |
++++++++++++++++++++++++++++++++++++++++++++++*/
+void utils::copyVect(int *vect1, int *vect2, int tam) {
+for(int i = 0; i < tam; i++) vect2[i] = vect1[i];
+}
+/*+++++++++++++++++++++++++++++++++++++++++++++++
+| void utils::copyVect(float *, float *, float) |
+|      This method copies float vector 1 to     |
+|      float vector 2.                          |
++++++++++++++++++++++++++++++++++++++++++++++++*/
+void utils::copyVect(float *vect1, float *vect2, int tam) {
+for(int i = 0; i < tam; i++) vect2[i] = vect1[i];
+}
+/*+++++++++++++++++++++++++++++++++++++++++
+| int utils::roundToInf(double)           |
+|      This method rounds a double number |
+|      to the inferior integer.           |
++++++++++++++++++++++++++++++++++++++++++*/
+int utils::roundToInf(double number) {
+return ((int) number);
+}
+/*+++++++++++++++++++++++++++++++++++++++++
+| int utils::roundInt(double)             |
+|      This method rounds a double number |
+|      to a integer.                      |
++++++++++++++++++++++++++++++++++++++++++*/
+int utils::roundInt(double number) {
+return ((int) ((double) number + 0.5));
+}
+/*+++++++++++++++++++++++++++++++++++++++++
+| int utils::roundToSup(double)           |
+|      This method rounds a double number |
+|      to the greater integer.            |
++++++++++++++++++++++++++++++++++++++++++*/
+int utils::roundToSup(double number) {
+return ((int) ((double) number + 1.0));
+}
+/*+++++++++++++++++++++++++++++++++++++++++
+| int utils::max(int, int)                |
+|      This method returns the maximum    |
+|      value of the two given arguments.  |
++++++++++++++++++++++++++++++++++++++++++*/
+int utils::max(int x, int y) {
+if(x > y) return x;
+else      return y;
+}
+float utils::max(float x, float y) {
+if(x > y) return x;
+else      return y;
+}
+double utils::max(double x, double y) {
+if(x > y) return x;
+else      return y;
+}
+int utils::min(int x, int y) {
+if(x < y) return x;
+else      return y;
+}
+float utils::min(float x, float y) {
+if(x < y) return x;
+else      return y;
+}
+double utils::min(double x, double y) {
+if(x < y) return x;
+else      return y;
+}
+/*+++++++++++++++++++++++++++++++++++++++++++
+| bool utils::isNumber(char *)              |
+|      This method checks if the given      |
+|      string is a float number.            |
++++++++++++++++++++++++++++++++++++++++++++*/
+bool utils::isNumber(char *num){
+int tam = strlen(num);
+int i, flt = 1, expn = 1, sgn = 1;
+for(i = 0; i < tam; i++) {
+if(num[i] == '.' && flt)
+flt = 0;
+else if(((num[i] == 'e') ||(num[i] == 'E')) && expn)
+expn = 0;
+else if(((num[i] == '+') ||(num[i] == '-')) && sgn) {
+if(!expn) sgn = 0;
+}
+else if(num[i] > '9' || num[i] < '0')
+return false;
+}
+return true;
+}
+/*+++++++++++++++++++++++++++++++++++++++++++
+| bool utils::compare(char *, char *)       |
+|      This method compares the two strings |
+|      given, and returns true if the two   |
+|      strings are equal.                   |
++++++++++++++++++++++++++++++++++++++++++++*/
+bool utils::compare(char *a, char *b){
+return(!strcmp(a,b));
+}
+/*++++++++++++++++++++++++++++++++++++++++++
+| void utils::removeSpaces(char *, char *) |
+|      This method removes spaces in the   |
+|      input string and put the result in  |
+|      the output string.                  |
+++++++++++++++++++++++++++++++++++++++++++*/
+void utils::removeSpaces(char *in, char *out){
+unsigned int i, j = 0;
+for(i = 0; i < strlen(in); i++){
+if(in[i] != ' ' && in[i] != '\t'){
+out[j] = in[i];
+j++;
+}
+}
+out[j] = '\0';
+}
+/*++++++++++++++++++++++++++++++++++++++++++
+| void utils::quicksort(float *, int, int) |
+|      This method sorts the vector using  |
+|      the quicksort method.               |
+++++++++++++++++++++++++++++++++++++++++++*/
+void utils::quicksort(float *vect, int ini, int fin) {
+float elem_div;
+int i, j;
+if ((ini >= fin) || (fin < 0))
+return;
+elem_div = vect[fin];
+i = ini - 1;
+j = fin;
+while (1) {
+while (vect[++i] < elem_div)
+if(i == fin)
+break;
+while (vect[--j] > elem_div)
+if(j == 0)
+break;
+if(i < j)
+swap(&vect[i], &vect[j]);
+else
+break;
+}
+swap(&vect[i], &vect[fin]);
+quicksort(vect, ini, i - 1);
+quicksort(vect, i + 1, fin);
+}
+/*++++++++++++++++++++++++++++++++++++++++
+| void utils::swap(float *, float *)     |
+|      This method swaps the values in a |
+|      and b.                            |
+++++++++++++++++++++++++++++++++++++++++*/
+void utils::swap(float *a, float *b){
+float temp;
+temp = *a;
+*a = *b;
+*b = temp;
+}
+/*++++++++++++++++++++++++++++++++++++++++++
+| void utils::quicksort(float *, int, int) |
+|      This method sorts the vector using  |
+|      the quicksort method.               |
+++++++++++++++++++++++++++++++++++++++++++*/
+void utils::quicksort(int *vect, int ini, int fin) {
+int i, j, elem_div;
+if ((ini >= fin) || (fin < 0))
+return;
+elem_div = vect[fin];
+i = ini - 1;
+j = fin;
+while (1) {
+while (vect[++i] < elem_div)
+if(i == fin)
+break;
+while (vect[--j] > elem_div)
+if(j == 0)
+break;
+if(i < j)
+swap(&vect[i], &vect[j]);
+else
+break;
+}
+swap(&vect[i], &vect[fin]);
+quicksort(vect, ini, i - 1);
+quicksort(vect, i + 1, fin);
+}
+/*++++++++++++++++++++++++++++++++++++++++
+| void utils::swap(float *, float *)     |
+|      This method swaps the values in a |
+|      and b.                            |
+++++++++++++++++++++++++++++++++++++++++*/
+void utils::swap(int *a, int *b) {
+int temp;
+temp = *a;
+*a = *b;
+*b = temp;
+}
+void utils::quicksort(int **vect, int ini, int fin) {
+float elem_div;
+int i, j;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+if ((ini >= fin) || (fin < 0))
+return;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+elem_div = vect[fin][0];
+i = ini - 1;
+j = fin;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+while (true) {
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+while (vect[++i][0] < elem_div) if(i == fin) break;
+while (vect[--j][0] > elem_div) if(j == 0)   break;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+if(i < j) swap(&vect[i], &vect[j]);
+else break;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+}
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+swap(&vect[i], &vect[fin]);
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+quicksort(vect, ini, i - 1);
+quicksort(vect, i + 1, fin);
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+}
+void utils::swap(int **a, int **b){
+int *temp;
+temp = *a;
+*a = *b;
+*b = temp;
+}
+/* *****************************************************************************
+*
+* START - Refactored code
+*
+* ************************************************************************** */
+bool utils::checkFile(ifstream &file) {
+/* Check if a given file exists and its size is greater than 0 */
+long begin, end;
+/* Check whether input file exists or not */
+if(!file)
+return false;
+/* Check input file sizes. A valid file should have a size grater than 0 */
+begin = file.tellg();
+file.seekg(0, ios::end);
+end = file.tellg();
+file.seekg(0, ios::beg);
+/* Compare difference between file start and end.
+* Depending on result, return True or False */
+if(!(end - begin))
+return false;
+return true;
+}
+char* utils::readLine(ifstream &file) {
+/* Read a new line from current input stream. This function is better than
+* standard one since cares of operative system compability. It is useful
+* as well because remove tabs and blank spaces at lines beginning/ending */
+int state;
+char c = ' ';
+string nline;
+static char *line = NULL;
+/* Check it the end of the file has been reached or not */
+if(file.eof())
+return NULL;
+/* Store first line found. For -Windows & MacOS compatibility- carriage return
+* is considered as well as a new line character */
+for( ; (c != '\n') && ((!file.eof())); file.read(&c, 1))
+if ((c != '\r'))
+nline.resize(nline.size() + 1, c);
+/* Remove blank spaces & tabs from the beginning of the line */
+state = nline.find(" ", 0);
+while(state != (int) string::npos && state == 0) {
+nline.erase(state, 1);
+state = nline.find(" ", state);
+}
+state = nline.find("\t", 0);
+while(state != (int) string::npos && state == 0) {
+nline.erase(state, 1);
+state = nline.find("\t", state);
+}
+/* If there is nothing to return, give back a NULL pointer ... */
+if(nline.size() == 0)
+return NULL;
+/* Otherwise, initialize the appropiate data structure,
+* dump the data and return it */
+line = new char[nline.size() + 1];
+strcpy(line, nline.c_str());
+return line;
+}
+char* utils::trimLine(string nline) {
+/* This function is used to remove comments inbetween a biological sequence.
+* Remove all content surrounded by ("") or ([]). It wans as well when a
+* mismatch for these flags is found */
+int pos, next;
+static char *line;
+/* Set-up lower and upper limit to look for comments inside of input string */
+pos = -1;
+/* Identify comments inside of input sequence and remove it */
+while(true) {
+pos  = nline.find("\"", (pos + 1));
+/* When there is not any more a comment inside of sequence,
+* go out from this loop */
+if(pos == (int) string::npos)
+break;
+/* Look for closing flag */
+next = nline.rfind("\"", nline.size());
+/* If a pair of comments flags '"' is found, remove everything inbetween */
+if((int) nline.find("\"", (pos + 1)) == next) {
+nline.erase(pos, (next - pos + 1));
+pos = -1;
+}
+/* If there is only one flag '"' for comments inside of sequence,
+* user should be warned about that */
+if (pos == next) {
+cerr << endl << "ERROR: Possible (\") mismatch for comments" << endl;
+return NULL;
+}
+}
+/* Look for other kind of comments, in this case those with [] */
+while(true) {
+pos = -1;
+next = -1;
+/* Search for last opened bracket. It is supposed to be the first one for
+* being close */
+while((pos = nline.find("[", (pos + 1))) != (int) string::npos)
+next = pos;
+/* If no opening bracket has been found.
+* Check if there is any closing one */
+if (next == -1) {
+/* There are not any bracket in input string */
+if ((int) nline.find("]", 0) == (int) string::npos)
+break;
+/* Otherwise, warn about the error */
+cerr << endl << "ERROR: Brackets (]) mismatch found" << endl;
+return NULL;
+}
+/* Look for closest closing bracket to the opening one found */
+pos = nline.find("]", (next + 1));
+/* If no closing bracket has been found. Warn about the mismatch */
+if (pos == (int) string::npos) {
+cerr << endl << "ERROR: Brackets ([) mismatch found" << endl;
+return NULL;
+}
+/* When both brackets have been found, remove comments inbetween them */
+nline.erase(next, (pos - next + 1));
+}
+/* Check if after removing all comments from input string there is still part
+* of sequences or not */
+if(nline.size() == 0)
+return NULL;
+/* Initialize and store resulting sequence into an appropiate structure */
+line = new char[nline.size() + 1];
+strcpy(line, nline.c_str());
+return line;
+}
+/* *****************************************************************************
+*
+* END - Refactored code
+*
+* ************************************************************************** */
+string utils::getReverse(string toReverse) {
+string line;
+int i;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+for(i = toReverse.size() - 1; i >= 0; i--)
+line += toReverse[i];
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+return line;
+}
+string utils::removeCharacter(char c, string line) {
+int pos;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+pos = line.find(c, 0);
+while(pos != (int) string::npos) {
+line.erase(pos, 1);
+pos = line.find(c, pos);
+}
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+return line;
+}
+int utils::checkTypeAlignment(int seqNumber, int residNumber, string *sequences) {
+int i, j, k, l, hitDNA, hitRNA, degenerate, gDNA, gRNA, extDNA, extRNA;
+float ratioDNA, ratioRNA;
+/* Standard tables */
+char listRNA[11] = "AGCUNagcun";
+char listDNA[11] = "AGCTNagctn";
+/* Degenerate Nucleotides codes */
+char degeneratedCodes[21] = "MmRrWwSsYyKkVvHhDdBb";
+/* For each sequences, this method locks at the 100 letters (excluding gaps).
+* The method is able to distinguish between pure DNA/RNA nucleotides or those
+* containing degenerate Nucleotide letters */
+for(i = 0, gDNA = 0, gRNA = 0, extDNA = 0, extRNA = 0; i < seqNumber; i++) {
+/* Looks at the 100 letters (excluding gaps) while doesn's get the sequence's end */
+/* When there are less than a 100 characters, break the loop before reaching that limit */
+residNumber = (int) sequences[i].size();
+//~ for(j = 0, k = 0, hitDNA = 0, hitRNA = 0, degenerate = 0; j < residNumber && k  < 100; j++)
+for(j = 0, k = 0, hitDNA = 0, hitRNA = 0, degenerate = 0; j < residNumber; j++)
+if(sequences[i][j] != '-' && sequences[i][j] != '.' && sequences[i][j] != '?') {
+k++;
+/* Recognizes between DNA and RNA. */
+for(l = 0; l < (int) strlen(listDNA); l++)
+if(listDNA[l] == sequences[i][j])
+hitDNA++;
+for(l = 0; l < (int) strlen(listRNA); l++)
+if(listRNA[l] == sequences[i][j])
+hitRNA++;
+for(l = 0; l < (int) strlen(degeneratedCodes); l++)
+if(degeneratedCodes[l] == sequences[i][j])
+degenerate++;
+}
+/* If input sequences have less than 95% of nucleotides, even when residues
+* are treated with degenerated codes, consider the input file as containing
+* amino-acidic sequences. */
+ratioDNA = float(degenerate + hitDNA)/k;
+ratioRNA = float(degenerate + hitRNA)/k;
+if(ratioDNA < 0.95 && ratioRNA < 0.95)
+return AAType;
+/* Identify precisely if nucleotides sequences are DNA/RNA strict or
+* any degenerate code has been used in the sequence */
+else if(hitRNA > hitDNA && degenerate == 0)
+gRNA++;
+else if(hitRNA > hitDNA && degenerate != 0)
+extRNA++;
+else if(hitRNA < hitDNA && degenerate == 0)
+gDNA++;
+else if(hitRNA < hitDNA && degenerate != 0)
+extDNA++;
+}
+/* Return the datatype with greater values, considering always degenerate
+* codes */
+if (extDNA != 0 && extDNA > extRNA)
+return DNADeg;
+else if (extRNA != 0 && extDNA < extRNA)
+return RNADeg;
+else if(gRNA > gDNA)
+return RNAType;
+else
+return DNAType;
+}
+int* utils::readNumbers_StartEnd(string line) {
+int comma, nElems = 0;
+static int *numbers;
+comma = -1;
+while((comma = line.find(",", comma + 1)) != (int) string::npos)
+nElems += 2;
+//~ If there is more than two numbers separated by a comma, return NULL
+if(nElems != 2)
+return NULL;
+numbers = new int[2];
+comma = line.find(",", 0);
+numbers[0] = atoi(line.substr(0, comma).c_str());
+numbers[1] = atoi(line.substr(comma+1).c_str());
+return numbers;
+}
+int* utils::readNumbers(string line) {
+int i, comma, separ, init, nElems = 0;
+static int *numbers;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+comma = -1;
+while((comma = line.find(",", comma + 1)) != (int) string::npos)
+nElems += 2;
+nElems += 2;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+numbers = new int[nElems + 1];
+numbers[0] = nElems;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+init = 0;
+i = 1;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+do {
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+comma = line.find(",", init);
+separ = line.find("-", init);
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+if(((separ < comma) || (comma == (int) string::npos)) && (separ != (int) string::npos)) {
+numbers[i++] = atoi(line.substr(init, separ - init).c_str());
+numbers[i++] = atoi(line.substr(separ+1, comma - separ - 1).c_str());
+init = comma + 1;
+}
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+else if((separ > comma) || (separ == (int) string::npos)) {
+numbers[i++] = atoi(line.substr(init, comma - init).c_str());
+numbers[i++] = atoi(line.substr(init, comma - init).c_str());
+init = comma + 1;
+}
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+if(numbers[i-2] < 0)
+return NULL;
+if(numbers[i-1] < numbers[i-2])
+return NULL;
+if(comma == (int) string::npos)
+break;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+} while(true);
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+return numbers;
+/* ***** ***** ***** ***** ***** ***** ***** ***** */
+}
+char utils::determineColor(char res, string column) {
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+if(toupper(res) == 'G')
+return 'o';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+else if(toupper(res) == 'P')
+return 'y';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+else if(res != '-') {
+switch(toupper(res)) {
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (W, L, V, I, M, F): {50%, p}{60%, wlvimafcyhp} */
+case 87: case 76:  case 86: case 73: case 77: case 70:
+if(lookForPattern(column, "p", 0.5))                return 'b';
+else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'b';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (A): {50%, p}{60%, wlvimafcyhp}{85% t,s,g} */
+case 65:
+if(lookForPattern(column, "p", 0.5))                return 'b';
+else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'b';
+else if(lookForPattern(column, "t", 0.85))          return 'b';
+else if(lookForPattern(column, "s", 0.85))          return 'b';
+else if(lookForPattern(column, "g", 0.85))          return 'b';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* BLUE: (C): {50%, p}{60%, wlvimafcyhp}{85% s}
+* PINK: (C): {85%, c}
+*/
+case 67:
+if(lookForPattern(column, "p", 0.5))                return 'b';
+else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'b';
+else if(lookForPattern(column, "s", 0.85))          return 'b';
+else if(lookForPattern(column, "c", 0.85))          return 'p';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (K, R): {60%, kr}{85%, q} */
+case 75: case 82:
+if(lookForPattern(column, "kr", 0.6))               return 'r';
+else if(lookForPattern(column, "q", 0.85))          return 'r';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (T): {50%, ts}{60%, wlvimafcyhp } */
+case 84:
+if(lookForPattern(column, "ts", 0.5))               return 'g';
+else if(lookForPattern(column, "wlvimafcyhp", 0.6)) return 'g';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (S): {50%, ts}{80%, wlvimafcyhp } */
+case 83:
+if(lookForPattern(column, "ts", 0.5))               return 'g';
+else if(lookForPattern(column, "wlvimafcyhp", 0.8)) return 'g';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (N): {50%, n}{85%, d } */
+case 78:
+if(lookForPattern(column, "n", 0.5))                return 'g';
+else if(lookForPattern(column, "d", 0.85))          return 'g';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (Q): {50%, qe}{60%, kr} */
+case 81:
+if(lookForPattern(column, "qe", 0.5))               return 'g';
+else if(lookForPattern(column, "kr", 0.6))          return 'g';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (D): {50%, de, n} */
+case 68:
+if(lookForPattern(column, "de", 0.5))               return 'm';
+else if(lookForPattern(column, "n", 0.5))           return 'm';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (E): {50%, de,qe} */
+case 69:
+if(lookForPattern(column, "de", 0.5))               return 'm';
+else if(lookForPattern(column, "qe", 0.5))          return 'm';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+/* (H,Y): {50%, p}{60%, wlvimafcyhp} */
+case 72: case 89:
+if(lookForPattern(column, "p", 0.5))                return 'c';
+else if(lookForPattern(column, "wlvimafcyhp", 0.5)) return 'c';
+else                                                return 'w';
+/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
+}
+}
+return 'w';
+}
+bool utils::lookForPattern(string column, string dataset, float level) {
+float count = 0;
+int i, j;
+for(i = 0; i < (int) column.size(); i++) {
+for(j = 0; j < (int) dataset.size(); j++) {
+if(toupper(column[i]) == toupper(dataset[j])) {
+count++; break;
+}
+}
+}
+if((count/column.size()) >= level)
+return true;
+else return false;
+}

Mercurial > repos > padge > trimal

comparison trimal_repo/source/utils.cpp @ 0:b15a3147e604 draft