Mercurial > repos > padge > trimal

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
   ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****

    trimAl v1.4: a tool for automated alignment trimming in large-scale
                 phylogenetics analyses.

    readAl v1.4: a tool for automated alignment conversion among different
                 formats.

    statAl v1.4: a tool for getting stats about multiple sequence alignments.


    2009-2015 Capella-Gutierrez S. and Gabaldon, T.
              [scapella, tgabaldon]@crg.es

    This file is part of trimAl/readAl.

    trimAl/readAl are free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, the last available version.

    trimAl/readAl are distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with trimAl/readAl. If not, see <http://www.gnu.org/licenses/>.

***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** *****
***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
using namespace std;

#include <float.h>
#include "alignment.h"
#include "rwAlignment.cpp"
#include "autAlignment.cpp"

#include <deque>

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Class constructor */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */

alignment::alignment(void) {

  /* Alignment parameter */
  sequenNumber = 0;
  residNumber =  0;

  /* Are the input sequences aligned? */
  isAligned = false;

  /* Should the output file be reversed? */
  reverse   = false;

  /* Should be trimmed only terminal gaps? - set automated and manual boundaries
   * values */
  terminalGapOnly = false;
  left_boundary = -1;
  right_boundary = -1;

  /* Input and output formats */
  iformat = 0;
  oformat = 0;
  shortNames = false;

  forceCaps = false;
  upperCase = false;
  lowerCase = false;

  /* Indicate whether sequences composed only by gaps should be kept or not */
  keepSequences = false;

  /* Indicate whether original header, they may include non-alphanumerical
   * characters, should be dumped into output stream without any preprocessing
   * step */
  keepHeader = false;

  gapSymbol = "-";

  /* Sequence datatype: DNA, RNA or Protein */
  dataType = 0;

  /* Window sizes to trim the input alignment */
  ghWindow = 0;
  shWindow = 0;

  /* Minimum block size in the new alignment */
  blockSize = 0;

  /* Is this alignmnet new? */
  oldAlignment  = false;

  /* Sequence residues number */
  residuesNumber = NULL;

  /* Columns and sequences that have been previously selected */
  saveResidues  = NULL;
  saveSequences = NULL;

  /* Input sequences as well other information such as sequences name, etc */
  sequences = NULL;
  seqsName  = NULL;
  seqsInfo  = NULL;

  /* Information about input alignment */
  filename = "";
  aligInfo = "";

  /* Information computed from alignment */
  sgaps =     NULL;
  scons =     NULL;
  seqMatrix = NULL;

  identities = NULL;
  overlaps = NULL;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Class constructor */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */

alignment::alignment(string o_filename, string o_aligInfo, string *o_sequences, string *o_seqsName,
                     string *o_seqsInfo, int o_sequenNumber, int o_residNumber, int o_iformat, int o_oformat,
                     bool o_shortNames, int o_dataType, int o_isAligned, bool o_reverse, bool o_terminalGapOnly,
                     int o_left_boundary, int o_right_boundary,
                     bool o_keepSeqs, bool o_keepHeader, int OldSequences, int OldResidues, int *o_residuesNumber,
                     int *o_saveResidues, int *o_saveSequences, int o_ghWindow, int o_shWindow, int o_blockSize,
                     float **o_identities, float **o_overlaps) {

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  int i, j, k, ll;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  oldAlignment = true;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Assign the parameter values to the variables */
  sequenNumber = o_sequenNumber;
  residNumber  = o_residNumber;

  iformat = o_iformat;
  oformat = o_oformat;
  shortNames = o_shortNames;

  dataType = o_dataType;

  ghWindow = o_ghWindow;
  shWindow = o_shWindow;

  blockSize = o_blockSize;

  isAligned = o_isAligned;
  reverse   = o_reverse;

  terminalGapOnly = o_terminalGapOnly;
  right_boundary = o_right_boundary;
  left_boundary = o_left_boundary;

  filename = o_filename;
  aligInfo = o_aligInfo;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  keepSequences = o_keepSeqs;
  keepHeader = o_keepHeader;

  forceCaps = false;
  upperCase = false;
  lowerCase = false;

  gapSymbol = "-";


  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Basic information for the new alignment */
  sequences = new string[sequenNumber];
  for(i = 0; i < sequenNumber; i++)
    sequences[i] = o_sequences[i];

  seqsName = new string[sequenNumber];
  for(i = 0; i < sequenNumber; i++)
    seqsName[i] = o_seqsName[i];
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  residuesNumber = new int[sequenNumber];
  if((isAligned) || (o_residuesNumber != NULL)) {
    for(i = 0; i < sequenNumber; i++)
      residuesNumber[i] = residNumber;
  } else {
    for(i = 0; i < sequenNumber; i++)
      residuesNumber[i] = o_residuesNumber[i];
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  if(o_seqsInfo != NULL) {
    seqsInfo = new string[sequenNumber];
    for(i = 0; i < sequenNumber; i++)
      seqsInfo[i] = o_seqsInfo[i];
  } else seqsInfo = NULL;

  saveResidues  = NULL;
  if(o_saveResidues != NULL) {
    saveResidues = new int[residNumber];
    for(i = 0, j = 0; i < OldResidues; i++)
      if(o_saveResidues[i] != -1) {
        saveResidues[j] = o_saveResidues[i];
        j++;
      }
  }

  saveSequences = NULL;
  if(o_saveSequences != NULL) {
    saveSequences = new int[sequenNumber];
    for(i = 0, j = 0; i < OldSequences; i++)
      if(o_saveSequences[i] != -1) {
        saveSequences[j] = o_saveSequences[i];
        j++;
      }
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  identities = NULL;
  if(o_identities != NULL) {
    identities = new float*[sequenNumber];
    for(i = 0, j = 0; i < OldSequences; i++) {
      if(o_saveSequences[i] != -1) {
        identities[j] = new float[sequenNumber];
        for(k = 0, ll = 0; k < OldSequences; k++) {
          if(o_saveSequences[k] != -1) {
            identities[j][ll] = o_identities[i][k];
            ll++;
          }
        }
        j++;
      }
    }
  }

  overlaps = NULL;
  if(o_overlaps != NULL) {
    overlaps = new float*[sequenNumber];
    for(i = 0, j = 0; i < OldSequences; i++) {
      if(o_saveSequences[i] != -1) {
        overlaps[j] = new float[sequenNumber];
        for(k = 0, ll = 0; k < OldSequences; k++) {
          if(o_saveSequences[k] != -1) {
            overlaps[j][ll] = o_overlaps[i][k];
            ll++;
          }
        }
        j++;
      }
    }
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Any structure associated to the new alignment is
   * initialize to NULL. In this way, these structure,
   * if it will be necessary, has to be computed */
  sgaps  =     NULL;
  scons  =     NULL;
  seqMatrix =  NULL;
  identities = NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Overlapping operator = to use it as a kind of class constructor */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */

alignment &alignment::operator=(const alignment &old) {
  int i, j;

  if(this != &old) {

    oldAlignment = true;

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* Assign the parameter values to the variables */
    sequenNumber = old.sequenNumber;
    residNumber =  old.residNumber;

    isAligned =  old.isAligned;
    reverse   =  old.reverse;

    terminalGapOnly = old.terminalGapOnly;
    right_boundary = old.right_boundary;
    left_boundary = old.left_boundary;

    iformat = old.iformat;
    oformat = old.oformat;
    shortNames = old.shortNames;

    dataType = old.dataType;

    ghWindow = old.ghWindow;
    shWindow = old.shWindow;

    blockSize = old.blockSize;

    filename = old.filename;
    aligInfo = old.aligInfo;
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    keepSequences = old.keepSequences;
    keepHeader = old.keepHeader;

    forceCaps = old.forceCaps;
    upperCase = old.upperCase;
    lowerCase = old.lowerCase;

    gapSymbol = old.gapSymbol;

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    sequences = new string[sequenNumber];
    for(i = 0; i < sequenNumber; i++)
      sequences[i] = old.sequences[i];

    seqsName = new string[sequenNumber];
    for(i = 0; i < sequenNumber; i++)
      seqsName[i] = old.seqsName[i];
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    delete [] residuesNumber;
    if(old.residuesNumber) {
      residuesNumber = new int[sequenNumber];
      for(i = 0; i < sequenNumber; i++)
        residuesNumber[i] = old.residuesNumber[i];
    } residuesNumber = NULL;
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    delete [] seqsInfo;
    if(old.seqsInfo) {
      seqsInfo = new string[sequenNumber];
      for(i = 0; i < sequenNumber; i++)
        seqsInfo[i] = old.seqsInfo[i];
    } else seqsInfo = NULL;

    delete [] saveResidues;
    if(old.saveResidues) {
      saveResidues = new int[residNumber];
      for(i = 0; i < residNumber; i++)
        saveResidues[i] = old.saveResidues[i];
    } else saveResidues = NULL;

    delete [] saveSequences;
    if(old.saveSequences) {
      saveSequences = new int[sequenNumber];
      for(i = 0; i < sequenNumber; i++)
        saveSequences[i] = old.saveSequences[i];
    } else saveSequences = NULL;
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    delete [] identities;
    if(old.identities) {
      identities = new float*[sequenNumber];
      for(i = 0; i < sequenNumber; i++) {
        identities[i] = new float[sequenNumber];
        for(j = 0; j < sequenNumber; j++)
          identities[i][j] = old.identities[i][j];
      }
    } else identities = NULL;
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    delete [] overlaps;
    if(old.overlaps) {
      overlaps = new float*[sequenNumber];
      for(i = 0; i < sequenNumber; i++) {
        overlaps[i] = new float[sequenNumber];
        for(j = 0; j < sequenNumber; j++)
          overlaps[i][j] = old.overlaps[i][j];
      }
    } else overlaps = NULL;
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    delete sgaps;
    sgaps = NULL;

    delete scons;
    scons = NULL;

    delete seqMatrix;
    seqMatrix = old.seqMatrix;
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  return *this;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Class destructor */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */

alignment::~alignment(void) {
  int i;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  if(sequences != NULL)
    delete [] sequences;
  sequences = NULL;

  if(seqsName != NULL)
    delete [] seqsName;
  seqsName = NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  if(residuesNumber != NULL)
    delete [] residuesNumber;
  residuesNumber = NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  if(seqsInfo != NULL)
    delete [] seqsInfo;
  seqsInfo = NULL;

  if(saveResidues != NULL)
    delete[] saveResidues;
  saveResidues = NULL;

  if(saveSequences != NULL)
    delete[] saveSequences;
  saveSequences = NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  if(identities != NULL) {
    for(i = 0; i < sequenNumber; i++)
      delete [] identities[i];
    delete [] identities;
  }
  identities = NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  if(overlaps != NULL) {
    for(i = 0; i < sequenNumber; i++)
      delete [] overlaps[i];
    delete [] overlaps;
  }
  overlaps = NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  if(sgaps != NULL)
    delete sgaps;
  sgaps = NULL;

  if(scons != NULL)
    delete scons;
  scons = NULL;

  if(seqMatrix != NULL)
    delete seqMatrix;
  seqMatrix = NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  oldAlignment = false;

  sequenNumber = 0;
  residNumber  = 0;

  isAligned = false;
  reverse   = false;

  terminalGapOnly = false;
  right_boundary = -1;
  left_boundary = -1;

  iformat =  0;
  oformat =  0;
  shortNames = false;

  dataType = 0;

  ghWindow = 0;
  shWindow = 0;

  blockSize = 0;

  filename.clear();
  aligInfo.clear();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

// Try to load current alignment and inform otherwise
bool alignment::loadAlignment(char *alignmentFile) {

  // Detect input alignment format - it is an strict detection procedure
  iformat = formatInputAlignment(alignmentFile);
  // Unless it is indicated somewhere else, output alignment format will be
  // the same as the input one
  oformat = iformat;

  // Use the appropiate function to read input alignment
  switch(iformat) {
    case 1:
      return loadClustalAlignment(alignmentFile);
    case 3:
      return loadNBRF_PirAlignment(alignmentFile);
    case 8:
      return loadFastaAlignment(alignmentFile);
    case 11:
      return loadPhylip3_2Alignment(alignmentFile);
    case 12:
      return loadPhylipAlignment(alignmentFile);
    case 17:
      return loadNexusAlignment(alignmentFile);
    case 21:
      return loadMegaInterleavedAlignment(alignmentFile);
    case 22:
      return loadMegaNonInterleavedAlignment(alignmentFile);
  // Return a FALSE value - meaning the input alignment was not loaded
    default:
      return false;
  }
  return false;
}

// Return input alignment format - it is useful to set-up output alignment one
int alignment::getInputFormat(void) {
  return iformat;
}

// Return output alignment format
int alignment::getOutputFormat(void) {
  return oformat;
}

// Return whether there is one or more alignments at the input file.
// Currently trimAl family only support single alignments per file.
int alignment::typeInputFile(void) {
  return SINGLE;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function prints the alignmnet to the standard output using an
 * appropiate function depending on the output format */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
bool alignment::printAlignment(void){

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  if(sequences == NULL)
    return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  if((residNumber == 0)  || (sequenNumber == 0)) {
    cerr << endl << "WARNING: Output alignment has not been generated. "
      << "It is empty" << endl << endl;
    return true;
  }

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  switch(oformat) {
    case 1:
      alignmentClustalToFile(cout);
      break;
    case 3:
      alignmentNBRF_PirToFile(cout);
      break;
    case 8:
      alignmentFastaToFile(cout);
      break;
    case 11:
      alignmentPhylip3_2ToFile(cout);
      break;
    case 12:
      alignmentPhylipToFile(cout);
      break;
    case 13:
      alignmentPhylip_PamlToFile(cout);
      break;
    case 17:
      alignmentNexusToFile(cout);
      break;
    case 21: case 22:
      alignmentMegaToFile(cout);
      break;
    case 99:
      getSequences(cout);
      break;
    case 100:
      alignmentColourHTML(cout);
      break;
    default:
      return false;
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  return true;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function puts the alignment in a given file */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
bool alignment::saveAlignment(char *destFile) {

  ofstream file;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  if(sequences == NULL)
    return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  if((residNumber == 0)  || (sequenNumber == 0)) {
    cerr << endl << "WARNING: Output alignment has not been generated. "
      << "It is empty." << endl << endl;
    return true;
  }

  /* Check whether the input sequences file is aligned or not before creating
   * a given output format. */
  switch(oformat) {
    case 1: case 11: case 12: case 13: case 17: case 21: case 22:
      /* Check whether sequences in the alignment are aligned or not.
       * Warn about it if there are not aligned. */
      if (!isAligned) {
        cerr << endl << "ERROR: Sequences are not aligned. Output format is "
             << "not compatible with unaligned sequences.";
      return false;
    }
  }

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* File open and correct open check */
  file.open(destFile);
  if(!file) return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Depending on the output format, we call to the
   * appropiate function */
  switch(oformat) {
    case 1:
      alignmentClustalToFile(file);
      break;
    case 3:
      alignmentNBRF_PirToFile(file);
      break;
    case 8:
      alignmentFastaToFile(file);
      break;
    case 11:
      alignmentPhylip3_2ToFile(file);
      break;
    case 12:
      alignmentPhylipToFile(file);
      break;
    case 13:
      alignmentPhylip_PamlToFile(file);
      break;
    case 17:
      alignmentNexusToFile(file);
      break;
    case 21: case 22:
      alignmentMegaToFile(file);
      break;
    case 99:
      getSequences(file);
      break;
    case 100:
      alignmentColourHTML(file);
      break;
    default:
      return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  }

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Close the output file */
  file.close();

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* All is OK, return true */
  return true;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function trimms a given alignment based on the gap distribution
 * values. To trim the alignment, this function uses two parameters:
 *
 *      baseLine:   Minimum percentage of columns that should
 *                  be conserved in the new alignment.
 *      gapsPct:    Maximum percentage of gaps per column that
 *                  should be in the new alignment.
 *
 * The function selects that combination of parameters that maximize the
 * final number of columns in the new alignment. There is an extra parameter
 * to ask for the complementary alignment. A complementary alignment consits
 * of those columns that would delete applying the standard approach. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::cleanGaps(float baseLine, float gapsPct, bool complementary) {

  alignment *ret;
  double cut;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If gaps statistics are not calculated, we
   * calculate them */
  if(calculateGapStats() != true)
    return NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Obtain the cut point using the given parameters */
  cut = sgaps -> calcCutPoint(baseLine, gapsPct);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Once we have the cut value proposed, we call the
   * appropiate method to clean the alignment and, then,
   * generate the new alignment. */
  ret = cleanByCutValue(cut, baseLine, sgaps -> getGapsWindow(), complementary);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Return a reference of the new alignment */
  return ret;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function trimms a given alignment based on the similarity distribution
 * values. To trim the alignment, this function uses two parameters:
 *
 *      baseLine:   Minimum percentage of columns that should
 *                  be conserved in the new alignment.
 *      conservat:  Minimum value of similarity per column that
 *                  should be in the new alignment.
 *
 * The function selects that combination of parameters that maximize the
 * final number of columns in the new alignment. There is an extra parameter
 * to ask for the complementary alignment. A complementary alignment consits
 * of those columns that would delete applying the standard approach. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::cleanConservation(float baseLine, float conservationPct, bool complementary) {

  alignment *ret;
  float cut;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If conservation's statistics are not calculated,
   * we calculate them */
  if(calculateConservationStats() != true)
    return NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Calculate the cut point using the given parameters */
  cut = (float) scons -> calcCutPoint(baseLine, conservationPct);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Once we have the cut value, we call the appropiate
   * method to clean the alignment and, then, generate
     the new alignment */
  ret = cleanByCutValue(cut, baseLine, scons -> getMdkwVector(), complementary);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Return a reference of the new alignment */
  return ret;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function trimms a given alignment based on the similarity and gaps
 * distribution values. To trim the alignment, this function uses three
 * parameters:
 *
 *      baseLine:   Minimum percentage of columns that should
 *                  be conserved in the new alignment.
 *      gapsPct:    Maximum percentage of gaps per column that
 *                  should be in the new alignment.
 *      conservat:  Minimum value of similarity per column that
 *                  should be in the new alignment.
 *
 * The function selects that combination of parameters that maximize the
 * final number of columns in the new alignment. If the baseLine parameter
 * is the most strict, the other two ones would be relaxed to keep columns
 * that would not be selected. There is an extra parameter to ask for the
 * complementary alignment. A complementary alignment consits of those
 * columns that would delete applying the standard approach. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::clean(float baseLine, float GapsPct, float conservationPct, bool complementary) {

  alignment *ret;
  float cutCons;
  double cutGaps;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If gaps statistics are not calculated, we calculate
   *  them */
  if(calculateGapStats() != true)
    return NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If conservation's statistics are not calculated,
   * we calculate them */
  if(calculateConservationStats() != true)
    return NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Calculate the two cut points using the parameters */
  cutGaps = sgaps->calcCutPoint(baseLine, GapsPct);
  cutCons = scons->calcCutPoint(baseLine, conservationPct);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Clean the alingment using the two cut values, the
   * gapsWindow and MDK_Windows vectors and the baseline
   * value */
  ret = cleanByCutValue(cutGaps, sgaps -> getGapsWindow(), baseLine, cutCons, scons -> getMdkwVector(), complementary);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Return a reference of the clean alignment object */
  return ret;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function cleans a given alignment based on the consistency values
 * asses from a dataset of alignments. The method takes three parameters:
 *
 *      cutpoint:   Lower limit (0-1) of comparefile value admits in
 *                  the new alignment.
 *      baseline:   Minimum percentage of columns to have in the new alignment
 *      vectValues: A vector with alignment's consistency values.
 *
 * The function computes the optimal parameter combination value to trim
 * an alignment based on the consistency value from the comparison among
 * a dataset of alignments with the same sequences. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::cleanCompareFile(float cutpoint, float baseLine, float *vectValues, bool complementary) {

  alignment *ret;
  float cut, *vectAux;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Allocate memory */
  vectAux = new float[residNumber];
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Sort a copy of the vectValues vector, and take the
   *  value at 100% - baseline position. */
  utils::copyVect((float *) vectValues, vectAux, residNumber);
  utils::quicksort(vectAux, 0, residNumber-1);
  cut = vectAux[(int) ((float)(residNumber - 1) * (100.0 - baseLine)/100.0)];
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* We have to decide which is the smallest value
   * between the cutpoint value and the value from
   * the minimum percentage threshold */
  cut = cutpoint < cut ? cutpoint : cut;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Deallocate memory */
  delete [] vectAux;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Clean the selected alignment using the input parameters. */
  ret = cleanByCutValue(cut, baseLine, vectValues, complementary);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Return a refernce of the new alignment */
  return ret;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function removes those sequences that are misaligned with the rest
 * of sequences in the alignment at given sequence and residue overlaps.
 *
 *      overlapColumn:  Set the minimum similarity fraction value that has
 *                      to have a position from a given sequence to be
 *                      considered as an hit.
 *      minimumOverlap: Set the minimum proportion of hits that has to be
 *                      a sequence to keep it in the new alignment.
 *
 * At the same time, it's possible to get an alignment with those columns
 * that this method should remove setting for that purpose the complementary
 * parameter to True */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::cleanSpuriousSeq(float overlapColumn, float minimumOverlap, bool complementary) {

  float *overlapVector;
  alignment *newAlig;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Allocate local memory */
  overlapVector = new float[sequenNumber];
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Compute the overlap's vector using the overlap
   * column's value */
  if(!calculateSpuriousVector(overlapColumn, overlapVector))
    return NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Select and remove the sequences with a overlap less
   * than threshold's overlap and create a new alignemnt */
  newAlig = cleanOverlapSeq(minimumOverlap, overlapVector, complementary);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Deallocate local memory */
  delete [] overlapVector;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Return a reference of the clean alignment object */
  return newAlig;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function implements the gappyout approach. To trim the alignment, this
 * method computes the slope from the gaps distribution from the input alig.
 * Using this information, the method asses the most abrupt change between
 * three consecutive points from that distribution and selects the first of
 * this point as aan cut-off point.
 *
 * The method also can return the complementary alignment that consists of
 * those columns that would be delete applying this algorithm. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::clean2ndSlope(bool complementarity) {

  alignment *ret;
  int cut;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If gaps statistics are not calculated, we calculate
   *  them */
  if(calculateGapStats() != true)
    return NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* We get the cut point using a automatic method for
   * this purpose. */
  cut = sgaps -> calcCutPoint2ndSlope();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Using the cut point calculates in last steps, we
   * clean the alignment and generate a new alignment */
  ret = cleanByCutValue(cut, 0, sgaps->getGapsWindow(), complementarity);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Returns the new alignment. */
  return ret;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method implements the strict and strictplus approaches, to apply
 * these algorithms, the program computes the gaps and similarity distribution
 * values to decide which ones are the optimal cutoff points. To compute these
 * cutoff points, the method applies those steps described in the manual. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::cleanCombMethods(bool complementarity, bool variable) {

  float simCut, first20Point, last80Point, *simil, *vectAux;
  int i, j, acm, gapCut, *positions, *gaps;
  double inic, fin, vlr;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If gaps statistics are not calculated, we calculate
   *  them */
  if(calculateGapStats() != true)
    return NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Computes the gap cut point using a automatic method
   * and at the same time, we get the gaps values from
   * the alignment. */
  gapCut = sgaps -> calcCutPoint2ndSlope();
  gaps = sgaps -> getGapsWindow();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If conservation's statistics are not calculated,
   * we calculate them */
  if(calculateConservationStats() != true)
    return NULL;

  // Once the conservation stats have been calculated, generate the vector
  // containing the similarity value for each column considering parameters
  // such as windows size
  simil = scons -> getMdkwVector();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Allocate local memory and initializate it to -1 */
  positions = new int[residNumber];
  utils::initlVect(positions, residNumber, -1);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* The method only selects columns with gaps number
   * less or equal than the gap's cut point. Counts the
   * number of columns that have been selected */
  for(i = 0, acm = 0; i < residNumber; i++) {
    if(gaps[i] <= gapCut) {
      positions[i] = i;
      acm++;
    }
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Allocate local memory and save the similaritys
   * values for the columns that have been selected */
  vectAux = new float[acm];
  for(i = 0, j = 0; i < residNumber; i++)
    if(positions[i] != -1)
      vectAux[j++] = simil[i];
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Sort the conservation's value vector. */
  utils::quicksort(vectAux, 0, acm-1);

  /* ...and search for the vector points at the 20 and
   * 80% of length. */
  first20Point = 0;
  last80Point  = 0;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  for(i = acm - 1, j = 1; i >= 0; i--, j++) {
    if((((float) j/acm) * 100.0) <= 20.0)
      first20Point = vectAux[i];
    if((((float) j/acm) * 100.0) <= 80.0)
      last80Point = vectAux[i];
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Computes the logaritmic's values for those points.
   * Finally the method computes the similarity cut
   * point using these values. */
  inic = log10(first20Point);
  fin  = log10(last80Point);
  vlr  = ((inic - fin) / 10) + fin;
  simCut = (float) pow(10, vlr);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Clean the alignment and generate a new alignment
   * object using the gaps cut and the similaritys cut
   *  values */
  alignment *ret = cleanStrict(gapCut, sgaps -> getGapsWindow(),
        simCut, scons -> getMdkwVector(), complementarity, variable);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Deallocate local memory */
  delete [] vectAux;
  delete [] positions;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Return a reference of the new alignment */
  return ret;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function allows to delete those columns consistents of only gaps.
 * This function is specially useful when we remove misaligned sequences from
 * a given alignmnet. As the rest of functions, this function can return the
 * complementary alignment but this alignment would have only columns with
 * all gaps. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::cleanNoAllGaps(bool complementarity) {

  alignment *ret;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If gaps statistics are not calculated, we calculate
   *  them */
  if(calculateGapStats() != true)
    return NULL;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* We want to conserve the columns with gaps' number
   * less or equal than sequences' number - 1  */
  ret = cleanByCutValue((sequenNumber - 1), 0, sgaps->getGapsWindow(), complementarity);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Returns the new alignment. */
  return ret;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method computes the overlap for each sequence given a overlap value.
 * This overlap set the minimum fraction that has to be a position for the
 * selected sequence to count as an hit. This proportion measures how much
 * is similar, (in terms of residues, indetermination and gaps), a given
 * position respect to the element on its column. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
bool alignment::calculateSpuriousVector(float overlap, float *spuriousVector) {

  int i, j, k, seqValue, ovrlap, hit;
  float floatOverlap;
  char indet;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Compute the overlap */
  floatOverlap = overlap * float(sequenNumber-1);
  ovrlap = int(overlap * (sequenNumber-1));

  if(floatOverlap > float(ovrlap))
    ovrlap++;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If the spurious vectos is NULL, returns false. */
  if(spuriousVector == NULL)
    return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Depending on the kind of alignment, we have
   * different indetermination symbol */
  if(getTypeAlignment() == AAType)
    indet = 'X';
  else
    indet = 'N';
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* For each alignment's sequence, computes its overlap */
  for(i = 0, seqValue = 0; i < sequenNumber; i++, seqValue = 0) {

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* For each alignment's column, computes the overlap
     * between the selected sequence and the other ones */
    for(j = 0; j < residNumber; j++) {

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* For sequences are before the sequence selected */
      for(k = 0, hit = 0; k < i; k++) {

        /* ***** ***** ***** ***** ***** ***** ***** ***** */
        /* If the element of sequence selected is the same
         * that the element of sequence considered, computes
         * a hit */
        if(sequences[i][j] == sequences[k][j])
          hit++;

        /* If the element of sequence selected isn't a 'X' nor
         * 'N' (indetermination) or a '-' (gap) and the element
         * of sequence considered isn't a  a 'X' nor 'N'
         * (indetermination) or a '-' (gap), computes a hit */
        else if((sequences[i][j] != indet) && (sequences[i][j] != '-')
          && (sequences[k][j] != indet) && (sequences[k][j] != '-'))
          hit++;
      }
      /* ***** ***** ***** ***** ***** ***** ***** ***** */

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* For sequences are after the sequence selected */
      for(k = (i + 1); k < sequenNumber; k++) {

        /* ***** ***** ***** ***** ***** ***** ***** ***** */
        /* If the element of sequence selected is the same
         * that the element of sequence considered, computes
         * a hit */
        if(sequences[i][j] == sequences[k][j])
          hit++;

        /* If the element of sequence selected isn't a 'X' nor
         * 'N' (indetermination) or a '-' (gap) and the element
         * of sequence considered isn't a  a 'X' nor 'N'
         * (indetermination) or a '-' (gap), computes a hit */
        else if((sequences[i][j] != indet) && (sequences[i][j] != '-')
          && (sequences[k][j] != indet) && (sequences[k][j] != '-'))
          hit++;
      }
      /* ***** ***** ***** ***** ***** ***** ***** ***** */

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* Finally, if the hit's number divided by number of
       * sequences minus one is greater or equal than
       * overlap's value, computes a column's hit. */
      if(hit >= ovrlap)
        seqValue++;
      /* ***** ***** ***** ***** ***** ***** ***** ***** */
    }

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* For each alignment's sequence, computes its spurious's
     * or overlap's value as the column's hits -for that
       sequence- divided by column's number. */
    spuriousVector[i] = ((float) seqValue / residNumber);
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
  }

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If there is not problem in the method, return true */
  return true;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method compute the cluster belongs to a given alignment at certain
 * identity threshold. To know the clusters from the alignment, those
 * clusters are calculated as follow: 1) Select the longest sequences. 2)
 * Using the previous computed identity values, compare if the second
 * longest sequence belongs, because the identity value with the cluster
 * representative is equal or greater than a given threshold, to this cluster
 * or not. 3) If the sequence belongs to this cluster, we add it, if not
 * belongs, we create a new cluster and fix this sequence as its representative
 * 4) Continue with the rest of sequences. In the case that a given sequence
 * can belong to more than one cluster, we choose the cluster which one
 * maximize the identity value respect to its representative sequence */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
int *alignment::calculateRepresentativeSeq(float maximumIdent) {

  int i, j, pos, clusterNum, **seqs;
  int *cluster;
  static int *repres;
  float max;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Ask for the sequence identities assesment */
  if(identities == NULL)
    calculateSeqIdentity();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  seqs = new int*[sequenNumber];
  for(i = 0; i < sequenNumber; i++) {
    seqs[i] = new int[2];
    seqs[i][0] = utils::removeCharacter('-', sequences[i]).size();
    seqs[i][1] = i;
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  utils::quicksort(seqs, 0, sequenNumber-1);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  cluster = new int[sequenNumber];
  cluster[0] = seqs[sequenNumber - 1][1];
  clusterNum = 1;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  for(i = sequenNumber - 2; i >= 0; i--) {

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    for(j = 0, max = 0, pos = -1; j < clusterNum; j++) {
      if(identities[seqs[i][1]][cluster[j]] > maximumIdent) {
        if(identities[seqs[i][1]][cluster[j]] > max) {
          max = identities[seqs[i][1]][cluster[j]];
          pos = j;
        }
      }
    }
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    if(pos == -1) {
    cluster[j] = seqs[i][1];
      clusterNum++;
    }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  }

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  repres = new int[clusterNum + 1];
  repres[0] = clusterNum;
  for(i = 0; i < clusterNum; i++)
    repres[i+1] = cluster[i];
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Deallocate dinamic memory */
  for(i = 0; i < sequenNumber; i++)
  delete [] seqs[i];

  delete [] cluster;
  delete [] seqs;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  return repres;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method looks for the optimal cut point for a given clusters number.
 * The idea is to find a identity cut-off point that can be used to get a
 * number of representative sequences similar to the input parameter */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
float alignment::getCutPointClusters(int clusterNumber) {

  float max, min, avg, gMax, gMin, startingPoint, prevValue = 0, iter = 0;
  int i, j, clusterNum, *cluster, **seqs;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If the user wants only one cluster means that all
   * of sequences have to be in the same clauster.
   * Otherwise, if the users wants the maximum number of
   * clusters means that each sequence have to be in their
   * own cluster */
  if(clusterNumber == sequenNumber) return 1;
  else if(clusterNumber == 1) return 0;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Ask for the sequence identities assesment */
  if(identities == NULL)
    calculateSeqIdentity();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Compute the maximum, the minimum and the average
   * identity values from the sequences */
  for(i = 0,gMax = 0, gMin = 1, startingPoint = 0; i < sequenNumber; i++) {
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    for(j = 0, max = 0, avg = 0, min = 1; j < i; j++) {
      if(max < identities[i][j])
        max  = identities[i][j];
      if(min > identities[i][j])
        min  = identities[i][j];
      avg += identities[i][j];
    }
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    for(j = i + 1; j < sequenNumber; j++) {
      if(max < identities[i][j])
        max  = identities[i][j];
      if(min > identities[i][j])
        min  = identities[i][j];
      avg += identities[i][j];
    }
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    startingPoint += avg / (sequenNumber - 1);
    if(max > gMax) gMax = max;
    if(min < gMin) gMin = min;
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Take the starting point as the average value */
  startingPoint /= sequenNumber;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Compute and sort the sequence length */
  seqs = new int*[sequenNumber];
  for(i = 0; i < sequenNumber; i++) {
    seqs[i] = new int[2];
    seqs[i][0] = utils::removeCharacter('-', sequences[i]).size();
    seqs[i][1] = i;
  }
  utils::quicksort(seqs, 0, sequenNumber-1);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Create the data structure to store the different
   * clusters for a given thresholds */
  cluster    = new int[sequenNumber];
  cluster[0] = seqs[sequenNumber - 1][1];
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /**** ***** ***** ***** ***** ***** ***** ***** */
  /* Look for the optimal identity value to get the
   * number of cluster set by the user. To do that, the
   * method starts in the average identity value and moves
   * to the right or lefe side of this value depending on
   * if this value is so strict or not. We set an flag to
   * avoid enter in an infinite loop, if we get the same
   * value for more than 10 consecutive point without get
   * the given cluster number means that it's impossible
   * to get this cut-off and we need to stop the search */
  do {
    clusterNum = 1;
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* Start the search */
    for(i = sequenNumber - 2; i >= 0; i--) {
      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      for(j = 0; j < clusterNum; j++)
        if(identities[seqs[i][1]][cluster[j]] > startingPoint)
          break;
      /* ***** ***** ***** ***** ***** ***** ***** ***** */

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      if(j == clusterNum) {
        cluster[j] = seqs[i][1];
        clusterNum++;
      }
      /* ***** ***** ***** ***** ***** ***** ***** ***** */
    }
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* Given the cutoff point, if we get the same cluster
     * number or we have been getting for more than 10
     * consecutive times the same cutoff point, we stop */
    if((clusterNum == clusterNumber) || (iter > 10))
      break;
    else {
      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* We have to move to the left side from the average
       * to decrease the cutoff value and get a smaller number
       * of clusters */
      if(clusterNum > clusterNumber) {
        gMax = startingPoint;
        startingPoint = (gMax + gMin)/2;
      } else {
      /* In the opposite side, we have to move to the right
       * side from the cutting point to be more strict and get
       * a bigger number of clusters */
        gMin = startingPoint;
        startingPoint = (gMax + gMin)/2;
      }
      /* ***** ***** ***** ***** ***** ***** ***** ***** */

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* If the clusters number from the previous iteration
       * is different from the current number, we put the
       * iteration number to 0 and store this new value */
      if(prevValue != clusterNum) {
        iter = 0;
        prevValue = clusterNum;
      /* Otherwise, we increase the iteration number to
       * avoid enter in an infinitve loop */
      } else iter++;
      /* ***** ***** ***** ***** ***** ***** ***** ***** */
    }
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
  } while(true);

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Deallocate dinamic memory */
  for(i = 0; i < sequenNumber; i++) delete [] seqs[i];
  delete [] seqs;
  delete [] cluster;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  return startingPoint;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method select one representative sequence (the longest one) per each
 * cluster from the input alignment and generate a new alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::getClustering(float identityThreshold) {

  string *matrixAux, *newSeqsName;
  int i, j, *clustering;
  alignment *newAlig;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Get the representative member for each cluster
   * given a maximum identity threshold */
  clustering = calculateRepresentativeSeq(identityThreshold);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Put all sequences to be deleted and get back those
   * sequences that are representative for each cluster
   * */
  for(i = 0; i < sequenNumber; i ++)
    saveSequences[i] = -1;
  for(i = 1; i <= clustering[0]; i ++)
    saveSequences[clustering[i]] = clustering[i];
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* We allocate memory to save the sequences selected */
  matrixAux = new string[clustering[0]];
  newSeqsName = new string[clustering[0]];

  /* Copy to new structures the information that have
   * been selected previously. */
  for(i = 0, j = 0; i < sequenNumber; i++)
    if(saveSequences[i] != -1) {
       newSeqsName[j] = seqsName[i];
       matrixAux[j] = sequences[i];
       j++;
    }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* When we have all parameters, we create the new
   * alignment */
  newAlig = new alignment(filename, aligInfo, matrixAux, newSeqsName, seqsInfo,
    clustering[0], residNumber, iformat, oformat, shortNames, dataType, isAligned,
    reverse, terminalGapOnly, left_boundary, right_boundary,
    keepSequences, keepHeader, sequenNumber, residNumber, residuesNumber,
    saveResidues, saveSequences, ghWindow, shWindow, blockSize, identities,
    overlaps);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Deallocated auxiliar memory */
  delete [] matrixAux;
  delete [] newSeqsName;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Return the new alignment reference */
  return newAlig;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function returns the backtranslation for a given protein processed
 * alignment into its CDS alignment. To do this convertion, the function needs
 * the Coding sequences as well the original composition of each protein
 * sequence. Also, the function needs to know which columns/sequences will be
 * in the final alignment to carray out the conversion. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
alignment *alignment::getTranslationCDS(int newResidues, int newSequences, int *ColumnsToKeep, string *oldSeqsName, sequencesMatrix *seqMatrix, alignment *ProtAlig) {

  string *matrixAux;
  alignment *newAlig;
  int i, j, k, l, oldResidues;
  int *mappedSeqs, *tmpSequence, *selectedRes;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Map the selected protein sequences to the input
   * coding sequences */
  mappedSeqs = new int[newSequences];
  for(i = 0; i < sequenNumber; i++)
    for(j = 0; j < newSequences; j++)
    if(!seqsName[i].compare(oldSeqsName[j])) {
      mappedSeqs[j] = i;
      break;
    }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Get the original alignment size as well the original
   * selected/non-selected columns */
  oldResidues = seqMatrix -> getResidNumber();
  selectedRes = new int[oldResidues];

  for(i = 0; i < oldResidues; i++)
    selectedRes[i] = i;

  for(j = 0; j < ColumnsToKeep[0]; j++)
    selectedRes[j] = -1;

  for(i = 0; i < newResidues - 1; i++)
    for(j = ColumnsToKeep[i] + 1; j < ColumnsToKeep[i+1]; j++)
      selectedRes[j] = -1;

  for(j = ColumnsToKeep[newResidues - 1] + 1; j < oldResidues; j++)
    selectedRes[j] = -1;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* We allocate memory to save the columns selected */
  matrixAux = new string[newSequences];
  tmpSequence = new int[oldResidues];

  /* Using the information about which residues for each
   * sequence was selected by others function, we process
   * these residues to recover the corresponding codons */
  for(i = 0; i < newSequences; i++)
  if(seqMatrix -> getSequence(oldSeqsName[i], tmpSequence)) {
    for(j = 0; j < oldResidues; j++) {
    if((selectedRes[j] != -1) && (tmpSequence[j] != 0)) {

      for(k = 3 * (tmpSequence[j] - 1), l = 0; l < 3; k++, l++) {
        /* Check whether the nucleotide sequences end has been reached or not.
         * If it has been reached, complete backtranslation using indetermination
         * symbols 'N' */
        if((int) sequences[mappedSeqs[i]].length() > k)
          matrixAux[i].resize(matrixAux[i].size() + 1, sequences[mappedSeqs[i]][k]);
        else
          matrixAux[i].resize(matrixAux[i].size() + 1, 'N');
    }
    } else if(selectedRes[j] != -1) {
          matrixAux[i].resize(matrixAux[i].size() + 3, '-');
    }
    }
  /* If there is any problems with a sequence then
   * the function returns an error */
  } else {
    return NULL;
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */


  /* Remove columns/sequences composed only by gaps before making the retrotranslation */


  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* When we have all parameters, we create the new
   * alignment */
  newAlig = new alignment(filename, "", matrixAux, oldSeqsName, NULL, newSequences,
    newResidues * 3, ProtAlig -> getInputFormat(), ProtAlig -> getOutputFormat(),
    ProtAlig -> getShortNames(), DNAType, true, ProtAlig -> getReverse(),
    terminalGapOnly, left_boundary, right_boundary,
    keepSequences, keepHeader, sequenNumber, oldResidues * 3, NULL, NULL,
    NULL, 0, 0, ProtAlig -> getBlockSize(), NULL, NULL);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Deallocated auxiliar memory */
  delete [] matrixAux;
  delete mappedSeqs;
  delete tmpSequence;
  delete selectedRes;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Return the new alignment reference */
  return newAlig;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function computes the gaps statistics for the input alignment. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
bool alignment::calculateGapStats(void) {

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If alignment matrix is not created, return false */
  if(sequences == NULL)
    return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If sgaps object is not created, we create them
     and calculate the statistics */
  if(sgaps == NULL) {
    sgaps = new statisticsGaps(sequences, sequenNumber, residNumber, dataType);
    sgaps -> applyWindow(ghWindow);
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  return true;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Print the gaps value for each column in the alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::printStatisticsGapsColumns(void) {

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Check if there is computed the gaps statistics */
  if(calculateGapStats())
    /* then prints the information */
    sgaps -> printGapsColumns();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Print the acumulative gaps distribution value from the input alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::printStatisticsGapsTotal(void) {

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Check it there is computed the gaps statistics */
  if(calculateGapStats())
    /* then prints the information */
    sgaps -> printGapsAcl();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Set the similarity matrix. This matrix is necessary for some methods in
 * the program */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
bool alignment::setSimilarityMatrix(similarityMatrix *sm) {

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If scons object is not created, we create them */
  if(scons == NULL)
    scons = new statisticsConservation(sequences, sequenNumber, residNumber, dataType);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Associate the matrix to the similarity statistics
   * object */
  if(!scons -> setSimilarityMatrix(sm))
    return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If it's OK, we return true */
  return true;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method compute the similarity values from the input alignment if it
 * has not been computed before */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
bool alignment::calculateConservationStats(void) {

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* It the gaps statistics object has not been created
   * we create it */
  if(calculateGapStats() != true)
    return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* It the similarity statistics object has not been
   * created we create it */
  if(scons == NULL)
    return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Ask for the similarity matrix */
  if(scons -> isSimMatrixDef() != true)
    return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Compute the similarity statistics from the input
   * alignment */
  if(!scons -> calculateVectors(sequences, sgaps->getGapsWindow()))
    return false;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Ask to know if it is necessary to apply any window
   * method. If it's necessary, we apply it */
  if(scons->isDefinedWindow())
    return true;
  else
    return scons->applyWindow(shWindow);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Print the similarity value for each column from the alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::printStatisticsConservationColumns(void) {

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Check if the similarity statistics object has been
   * created */
  if(calculateConservationStats())
    /* then prints the information */
    scons -> printConservationColumns();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Print the accumulative similarity distribution values from the alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::printStatisticsConservationTotal(void) {

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Check if the similarity statistics object has been
   * created */
  if(calculateConservationStats())
    /* then prints the information */
    scons -> printConservationAcl();
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method prints the correspondece between the columns in the original
 * and in the trimmed alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::printCorrespondence(void) {
  int i;

  cout << "#ColumnsMap\t";
  /* Print the saveResidues relathionship */
  for(i = 0; i < residNumber - 1; i++)
    cout << saveResidues[i] << ", ";
  cout << saveResidues[i] << endl;

}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Return the number of the sequences in the alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
int alignment::getNumSpecies(void) {
  return sequenNumber;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Return the number of residues in the alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
int alignment::getNumAminos(void) {
  return residNumber;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Sets the condition to remove only terminal gaps after applying any
 * trimming method or not.   */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::trimTerminalGaps(bool terminalOnly_, int *boundaries_) {
  terminalGapOnly = terminalOnly_;

  /* We are only interested in the first and last number of the vector - this
   * vector is generated by other function where it is important to store all
   * intervals */
  if (boundaries_ != NULL) {
    left_boundary = boundaries_[0];
    right_boundary = boundaries_[1];
  }
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method stores the diferent windows values in the alignment object */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::setWindowsSize(int ghWindow_, int shWindow_) {
  ghWindow = ghWindow_;
  shWindow = shWindow_;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method lets to change the output format */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::setOutputFormat(int format_, bool shortNames_) {
  oformat    = format_;
  shortNames = shortNames_;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method sets a new block size value */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::setBlockSize(int blockSize_) {
  blockSize = blockSize_;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Return true if the shortNames flag has been setted */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
int alignment::getShortNames(void) {
  return shortNames;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Return true if the reverse flag has been setted. */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
int alignment::getReverse(void) {
  return reverse;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method lets to change the output alignment orientation */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::setReverse(void) {
  reverse = true;
}


/* Set appropiate flag to decide whether sequences composed only by gaps should
 * be kept or not */
void alignment::setKeepSequencesFlag(bool flag) {
  keepSequences = flag;
}

/* Set appropiate flag to decide whether original sequences header should be
 * dumped into the output stream with or without any preprocessing step */
void alignment::setKeepSeqsHeaderFlag(bool flag) {
  keepHeader = flag;
}
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Return the block size value */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
int alignment::getBlockSize(void) {
  return blockSize;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method returns the sequences name */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::getSequences(string *Names) {
  for(int i = 0; i < sequenNumber; i++)
    Names[i] = seqsName[i];
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::getSequences(string *Names, int *lengths) {
  for(int i = 0; i < sequenNumber; i++) {
    lengths[i] = (int) utils::removeCharacter('-', sequences[i]).length();
    Names[i] = seqsName[i];
  }
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::getSequences(string *Names, string *Sequences, int *Lengths) {
  for(int i = 0; i < sequenNumber; i++) {
    Names[i] = seqsName[i];
    Sequences[i] = utils::removeCharacter('-', sequences[i]);
    Lengths[i] = (int) Sequences[i].length();
  }
}


/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* For a given set of sequences name, check for its correspondence with
 * the current sequences name. If there is not a complete correspondence
 * between both sets, the method return false, otherwise, return true */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
bool alignment::getSeqNameOrder(string *names, int *orderVector) {
  int i, j, numNames;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* For each name in the current alignment, we look
   * for its correspondence in the input set */
  for(i = 0, numNames = 0; i < sequenNumber; i++) {
    for(j = 0; j < sequenNumber; j++) {
      if(seqsName[i] == names[j]) {
        orderVector[i] = j;
        numNames++;
        break;
      }
    }
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Depending on if we get the complete correspondence
   * between both sets of names, we return true or not */
  if(numNames == sequenNumber) return true;
  else return false;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This method lets to build a sequence matrix. A sequence matrix contains
 * the residue position for each sequence without taking into account the
 * gaps in the sequence. This means that at position 10 we have the residue
 * 1 for that sequence */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::sequenMatrix(void) {
  if(seqMatrix == NULL)
    seqMatrix = new sequencesMatrix(sequences, seqsName, sequenNumber, residNumber);
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Use this method to destroy a given sequence matrix */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::destroySequenMatrix(void) {
  if(seqMatrix != NULL)
    delete seqMatrix;
  seqMatrix = NULL;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Print the alignment's sequence matrix */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::printSequenMatrix(void) {
  if(seqMatrix == NULL)
    seqMatrix = new sequencesMatrix(sequences, seqsName, sequenNumber, residNumber);
  seqMatrix -> printMatrix();
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Given an index, this method returns the sequence matrix column for that
 * index */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::getColumnSeqMatrix(int column, int *columnSeqMatrix) {
  if(seqMatrix == NULL)
    seqMatrix = new sequencesMatrix(sequences, seqsName, sequenNumber, residNumber);
  seqMatrix -> getColumn(column, columnSeqMatrix);
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function looks if a given value belongs a given row. In the affirmative
 * case, returns the columns value for that row and value combination */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::getColumnSeqMatrix(int value, int row, int *columnSeqMatrix) {
  if(seqMatrix == NULL)
    seqMatrix = new sequencesMatrix(sequences, seqsName, sequenNumber, residNumber);
  seqMatrix -> getColumn(value, row, columnSeqMatrix);
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function change the rows order in the sequence matrix */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
void alignment::setSeqMatrixOrder(int *order) {
  if(seqMatrix == NULL)
    seqMatrix = new sequencesMatrix(sequences, seqsName, sequenNumber, residNumber);
  seqMatrix -> setOrder(order);
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* This function change the rows order in the sequence matrix */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
sequencesMatrix *alignment::getSeqMatrix(void) {
  if(seqMatrix == NULL)
    seqMatrix = new sequencesMatrix(sequences, seqsName, sequenNumber, residNumber);
  return seqMatrix;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Computes, if it's necessary, and return the alignment's type */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
int alignment::getTypeAlignment(void) {
  if(dataType == 0)
    dataType = utils::checkTypeAlignment(sequenNumber, residNumber, sequences);
  return dataType;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Returns the correspondence between the columns in the original and in the
 * trimmed alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
int *alignment::getCorrespResidues(void) {
  return saveResidues;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Returns the correspondence between the sequences in the original and in
 * the trimmed alignment */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
int *alignment::getCorrespSequences(void) {
  return saveSequences;
}

/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
/* Returns if the alignment is aligned or not */
/* ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** ***** */
bool alignment::isFileAligned(void) {
  return isAligned;
}

/* *****************************************************************************
 * *****************************************************************************
 * *****************************************************************************
 * ************************************************************************** */

/* This method removes those columns that exceed a given threshold. If the
 * number of columns in the trimmed alignment is lower than a given percentage
 * of the original alignment. The program relaxes the threshold until to add
 * enough columns to achieve this percentage, to add those columns the program
 * start in the middle of the original alignment and make, alternatively,
 * movements to the left and right side from that point. This method also can
 * return the complementary alignment. */
alignment *alignment::cleanByCutValue(double cut, float baseLine,
  const int *gInCol, bool complementary) {

  int i, j, k, jn, oth, pos, block, *vectAux;
  string *matrixAux, *newSeqsName;
  alignment *newAlig;
  newValues counter;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Select the columns with a gaps value less or equal
   * than the cut point. */
  for(i = 0, counter.residues = 0; i < residNumber; i++)
    if(gInCol[i] <= cut) counter.residues++;
    else saveResidues[i] = -1;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Compute, if it's necessary, the number of columns
   * necessary to achieve the minimum number of columns
   * fixed by coverage parameter. */
  oth = utils::roundInt((((baseLine/100.0) - (float) counter.residues/residNumber)) * residNumber);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* if it's necessary to recover some columns, we
   * applied this instructions to recover it */
  if(oth > 0) {
    counter.residues += oth;

    /* Allocate memory */
    vectAux = new int[residNumber];

    /* Sort a copy of the gInCol vector, and take the value of the column that marks the % baseline */
    utils::copyVect((int *) gInCol, vectAux, residNumber);
    utils::quicksort(vectAux, 0, residNumber-1);
    cut = vectAux[(int) ((float)(residNumber - 1) * (baseLine)/100.0)];

    /* Deallocate memory */
    delete [] vectAux;
  }
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Fixed the initial size of blocks as 0.5% of
   * alignment's length */
  for(k = utils::roundInt(0.005 * residNumber); (k >= 0) && (oth > 0); k--) {

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* We start in the alignment middle then we move on
     * right and left side at the same time. */
    for(i = (residNumber/2), j = (i + 1); (((i > 0) || (j < (residNumber - 1))) && (oth > 0)); i--, j++) {

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* Left side. Here, we compute the block's size. */
      for(jn = i; ((saveResidues[jn] != -1) && (jn >= 0) && (oth > 0)); jn--) ;

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* if block's size is greater or equal than the fixed
       * size then we save all columns that have not been
       * saved previously. */
      if((i - jn) >= k) {
        for( ; ((saveResidues[jn] == -1) && (jn >= 0) && (oth > 0)); jn--) {
          if(gInCol[jn] <= cut) {
            saveResidues[jn] = jn;
            oth--;
          } else
            break;
        }
      }
      i = jn;
      /* ***** ***** ***** ***** ***** ***** ***** ***** */

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* Right side. Here, we compute the block's size. */
      for(jn = j; ((saveResidues[jn] != -1) && (jn < residNumber) && (oth > 0)); jn++) ;

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* if block's size is greater or equal than the fixed
       * size then we save all columns that have not been
       * saved previously. */
      if((jn - j) >= k) {
        for( ; ((saveResidues[jn] == -1) && (jn < residNumber) && (oth > 0)); jn++) {
          if(gInCol[jn] <= cut) {
            saveResidues[jn] = jn;
            oth--;
          } else
            break;
        }
      }
      j = jn;
      /* ***** ***** ***** ***** ***** ***** ***** ***** */
    }
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
  }

  /* Keep only columns blocks bigger than an input columns block size */
  if(blockSize != 0) {

    /* Traverse all alignment looking for columns blocks greater than LONGBLOCK,
     * everytime than a column is not selected by the trimming method, check
     * whether the current block size until that point is big enough to be kept
     * or it should be removed from the final alignment */
    for(i = 0, pos = 0, block = 0; i < residNumber; i++) {
      if(saveResidues[i] != -1)
        block++;
      else {
        /* Remove columns from blocks smaller than input blocks size */
        if(block < blockSize)
          for(j = pos; j <= i; j++)
            saveResidues[j] = -1;
        pos = i + 1;
        block = 0;
      }
    }
    /* Check final block separately since it could happen than last block is not
     * big enough but because the loop end could provoke to ignore it */
    if(block < blockSize)
      for(j = pos; j < i; j++)
        saveResidues[j] = -1;
  }

  /* If the flag -terminalony is set, apply a method to look for internal
   * boundaries and get back columns inbetween them, if they exist */
  if(terminalGapOnly == true)
    if(!removeOnlyTerminal())
      return NULL;

  /* Once the columns/sequences selection is done, turn it around
   * if complementary flag is active */
  if(complementary == true)
    computeComplementaryAlig(true, false);

  /* Check for any additional column/sequence to be removed */
  /* Compute new sequences and columns numbers */
  counter = removeCols_SeqsAllGaps();

  /* Allocate memory  for selected sequences/columns */
  matrixAux = new string[counter.sequences];
  newSeqsName = new string[counter.sequences];

  /* Fill local allocated memory with previously selected data */
  fillNewDataStructure(matrixAux, newSeqsName);

  /* When we have all parameters, we create the new alignment */
  newAlig = new alignment(filename, aligInfo, matrixAux, newSeqsName, seqsInfo,
    counter.sequences, counter.residues, iformat, oformat, shortNames, dataType,
    isAligned, reverse, terminalGapOnly, left_boundary, right_boundary,
    keepSequences, keepHeader, sequenNumber, residNumber,
    residuesNumber, saveResidues, saveSequences, ghWindow, shWindow, blockSize,
    identities, overlaps);

  /* Deallocate local memory */
  delete[] matrixAux;
  delete[] newSeqsName;

  /* Return the new alignment reference */
  return newAlig;
}

/* This method removes those columns that not achieve a given threshold. If the
 * number of columns in the trimmed alignment is lower than a given percentage
 * of the original alignment. The program relaxes the threshold until to add
 * enough columns to achieve this percentage, to add those columns the program
 * start in the middle of the original alignment and make, alternatively,
 * movements to the left and right side from that point. This method also can
 * return the complementary alignment. */
alignment *alignment::cleanByCutValue(float cut, float baseLine,
  const float *ValueVect, bool complementary) {

  int i, j, k, jn, oth, pos, block;
  string *matrixAux, *newSeqsName;
  alignment *newAlig;
  newValues counter;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Select the columns with a conservation's value
   * greater than the cut point. */
  for(i = 0, counter.residues = 0; i < residNumber; i++)
    if(ValueVect[i] > cut) counter.residues++;
    else saveResidues[i] = -1;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Compute, if it's necessary, the number of columns
   * necessary to achieve the minimum number of columns
   * fixed by coverage value. */
  oth = utils::roundInt((((baseLine/100.0) - (float) counter.residues/residNumber)) * residNumber);
  if(oth > 0) counter.residues += oth;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Fixed the initial size of blocks as 0.5% of
   * alignment's length */
  for(k = utils::roundInt(0.005 * residNumber); (k >= 0) && (oth > 0); k--) {

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* We start in the alignment middle then we move on
     * right and left side at the same time. */
    for(i = (residNumber/2), j = (i + 1); (((i > 0) || (j < (residNumber - 1))) && (oth > 0)); i--, j++) {

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* Left side. Here, we compute the block's size. */
      for(jn = i; ((saveResidues[jn] != -1) && (jn >= 0) && (oth > 0)); jn--) ;

       /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* if block's size is greater or equal than the fixed
       * size then we save all columns that have not been
       * saved previously. */
      /* Here, we only accept column with a conservation's
       * value equal to conservation cut point. */
      if((i - jn) >= k) {
        for( ; ((saveResidues[jn] == -1) && (jn >= 0) && (oth > 0)); jn--) {
          if(ValueVect[jn] == cut) {
            saveResidues[jn] = jn;
            oth--;
          } else
            break;
        }
      }
      i = jn;
      /* ***** ***** ***** ***** ***** ***** ***** ***** */

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* Right side. Here, we compute the block's size. */
      for(jn = j; ((saveResidues[jn] != -1) && (jn < residNumber) && (oth > 0)); jn++) ;

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* if block's size is greater or equal than the fixed
       * size then we select the column and save the block's
       * size for the next iteraction. it's obvius that we
       * decrease the column's number needed to finish. */
     /* Here, we only accept column with a conservation's
      * value equal to conservation cut point. */
      if((jn - j) >= k) {
        for( ; ((saveResidues[jn] == -1) && (jn < residNumber) && (oth > 0)); jn++) {
          if(ValueVect[jn] == cut) {
            saveResidues[jn] = jn;
            oth--;
          } else
            break;
        }
      }
      j = jn;
      /* ***** ***** ***** ***** ***** ***** ***** ***** */
    }
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
  }

  /* Keep only columns blocks bigger than an input columns block size */
  if(blockSize != 0) {

    /* Traverse all alignment looking for columns blocks greater than LONGBLOCK,
     * everytime than a column is not selected by the trimming method, check
     * whether the current block size until that point is big enough to be kept
     * or it should be removed from the final alignment */
    for(i = 0, pos = 0, block = 0; i < residNumber; i++) {
      if(saveResidues[i] != -1)
        block++;
      else {
        /* Remove columns from blocks smaller than input blocks size */
        if(block < blockSize)
          for(j = pos; j <= i; j++)
            saveResidues[j] = -1;
        pos = i + 1;
        block = 0;
      }
    }
    /* Check final block separately since it could happen than last block is not
     * big enough but because the loop end could provoke to ignore it */
    if(block < blockSize)
      for(j = pos; j < i; j++)
        saveResidues[j] = -1;
  }

  /* If the flag -terminalony is set, apply a method to look for internal
   * boundaries and get back columns inbetween them, if they exist */
  if(terminalGapOnly == true)
    if(!removeOnlyTerminal())
      return NULL;

  /* Once the columns/sequences selection is done, turn it around
   * if complementary flag is active */
  if(complementary == true)
    computeComplementaryAlig(true, false);

  /* Check for any additional column/sequence to be removed */
  /* Compute new sequences and columns numbers */
  counter = removeCols_SeqsAllGaps();

  /* Allocate memory  for selected sequences/columns */
  matrixAux = new string[counter.sequences];
  newSeqsName = new string[counter.sequences];

  /* Fill local allocated memory with previously selected data */
  fillNewDataStructure(matrixAux, newSeqsName);

  /* When we have all parameters, we create the new alignment */
  newAlig = new alignment(filename, aligInfo, matrixAux, newSeqsName, seqsInfo,
    counter.sequences, counter.residues, iformat, oformat, shortNames, dataType,
    isAligned, reverse, terminalGapOnly, left_boundary, right_boundary,
    keepSequences, keepHeader, sequenNumber, residNumber,
    residuesNumber, saveResidues, saveSequences, ghWindow, shWindow, blockSize,
    identities, overlaps);

  /* Deallocate local memory */
  delete[] matrixAux;
  delete[] newSeqsName;

  /* Return the new alignment reference */
  return newAlig;
}

/* This method removes those columns that not achieve the similarity threshond,
 * by one hand, and exceed the gaps threshold. If the number of columns that
 * have been selected is lower that a given coverage, the program relaxes both
 * thresholds in order to get back some columns and achieve this minimum
 * number of columns in the new alignment. For that purpose, the program
 * starts at the middle of the original alignment and makes, alternatively,
 * movememts to the right and left sides looking for those columns necessary
 * to achieve the minimum number of columns set by the coverage parameter.
 * This method also can return the complementary alignmnet consists of those
 * columns that will be deleted from the original alignment applying the
 * standard method. */
alignment *alignment::cleanByCutValue(double cutGaps, const int *gInCol,
  float baseLine, float cutCons, const float *MDK_Win, bool complementary) {

  int i, j, k, oth, pos, block, jn, blGaps, *vectAuxGaps;
  string *matrixAux, *newSeqsName;
  float blCons, *vectAuxCons;
  alignment *newAlig;
  newValues counter;

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Select the columns with a conservation's value
   * greater than the conservation cut point AND
   * less or equal than the gap cut point. */
  for(i = 0, counter.residues = 0; i < residNumber; i++)
    if((MDK_Win[i] > cutCons) && (gInCol[i] <= cutGaps)) counter.residues++;
    else saveResidues[i] = -1;
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Compute, if it's necessary, the number of columns
   * necessary to achieve the minimum number of it fixed
   * by the coverage parameter. */
  oth = utils::roundInt((((baseLine/100.0) - (float) counter.residues/residNumber)) * residNumber);
  /* ***** ***** ***** ***** ***** ***** ***** ***** */

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* If it's needed to add new columns, we compute the
   * news thresholds */
  if(oth > 0) {
    counter.residues += oth;

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* Allocate memory */
    vectAuxCons = new float[residNumber];
    vectAuxGaps = new int[residNumber];
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* Sort a copy of the MDK_Win vector and of the gInCol
     * vector, and take the value of the column that marks
     * the % baseline */
    utils::copyVect((float *) MDK_Win, vectAuxCons, residNumber);
    utils::copyVect((int *) gInCol, vectAuxGaps, residNumber);

    utils::quicksort(vectAuxCons, 0, residNumber-1);
    utils::quicksort(vectAuxGaps, 0, residNumber-1);

    blCons = vectAuxCons[(int) ((float)(residNumber - 1) * (100.0 - baseLine)/100.0)];
    blGaps = vectAuxGaps[(int) ((float)(residNumber - 1) * (baseLine)/100.0)];
    /* ***** ***** ***** ***** ***** ***** ***** ***** */

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* Deallocate memory */
    delete [] vectAuxCons;
    delete [] vectAuxGaps;
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
  }

  /* ***** ***** ***** ***** ***** ***** ***** ***** */
  /* Fixed the initial size of blocks as 0.5% of
   * alignment's length */
  for(k = utils::roundInt(0.005 * residNumber); (k >= 0) && (oth > 0); k--) {

    /* ***** ***** ***** ***** ***** ***** ***** ***** */
    /* We start in the alignment middle then we move on
     * right and left side at the same time. */
    for(i = (residNumber/2), j = (i + 1); (((i > 0) || (j < (residNumber - 1))) && (oth > 0)); i--, j++) {

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* Left side. Here, we compute the block's size. */
      for(jn = i; ((saveResidues[jn] != -1) && (jn >= 0) && (oth > 0)); jn--) ;

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* if block's size is greater or equal than the fixed
       * size then we select the column and save the block's
       * size for the next iteraction. it's obvius that we
       * decrease the column's number needed to finish. */
      /* Here, we accept column with a conservation's value
       * greater or equal than the conservation cut point OR
       * less or equal than the gap cut point. */
      if((i - jn) >= k) {
        for( ; ((saveResidues[jn] == -1) && (jn >= 0) && (oth > 0)); jn--) {
          if((MDK_Win[jn] >= blCons) || (gInCol[jn] <= blGaps)) {
            saveResidues[jn] = jn;
            oth--;
          } else
            break;
        }
      }
      i = jn;
      /* ***** ***** ***** ***** ***** ***** ***** ***** */

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* Right side. Here, we compute the block's size. */
      for(jn = j; ((saveResidues[jn] != -1) && (jn < residNumber) && (oth > 0)); jn++) ;

      /* ***** ***** ***** ***** ***** ***** ***** ***** */
      /* if block's size is greater or equal than the fixed
       * size then we select the column and save the block's
       * size for the next iteraction. it's obvius that we
       * decrease the column's number needed to finish. */
      /* Here, we accept column with a conservation's value
       * greater or equal than the conservation cut point OR
       * less or equal than the gap cut point. */
      if((jn - j) >= k) {
        for( ; ((saveResidues[jn] == -1) && (jn < residNumber) && (oth > 0)); jn++) {
          if((MDK_Win[jn] >= blCons) || (gInCol[jn] <= blGaps)) {
            saveResidues[jn] = jn;
            oth--;
          } else
            break;
        }
      }
      j = jn;
      /* ***** ***** ***** ***** ***** ***** ***** ***** */
    }
    /* ***** ***** ***** ***** ***** ***** ***** ***** */
  }

  /* Keep only columns blocks bigger than an input columns block size */
  if(blockSize != 0) {

    /* Traverse all alignment looking for columns blocks greater than LONGBLOCK,
     * everytime than a column is not selected by the trimming method, check
     * whether the current block size until that point is big enough to be kept
     * or it should be removed from the final alignment */
    for(i = 0, pos = 0, block = 0; i < residNumber; i++) {
      if(saveResidues[i] != -1)
        block++;
      else {
        /* Remove columns from blocks smaller than input blocks size */
        if(block < blockSize)
          for(j = pos; j <= i; j++)
            saveResidues[j] = -1;
        pos = i + 1;
        block = 0;
      }
    }
    /* Check final block separately since it could happen than last block is not
     * big enough but because the loop end could provoke to ignore it */
    if(block < blockSize)
      for(j = pos; j < i; j++)
        saveResidues[j] = -1;
  }

  /* If the flag -terminalony is set, apply a method to look for internal
   * boundaries and get back columns inbetween them, if they exist */
  if(terminalGapOnly == true)
    if(!removeOnlyTerminal())
      return NULL;

  /* Once the columns/sequences selection is done, turn it around
   * if complementary flag is active */
  if(complementary == true)
    computeComplementaryAlig(true, false);

  /* Check for any additional column/sequence to be removed */
  /* Compute new sequences and columns numbers */
  counter = removeCols_SeqsAllGaps();

  /* Allocate memory  for selected sequences/columns */
  matrixAux = new string[counter.sequences];
  newSeqsName = new string[counter.sequences];

  /* Fill local allocated memory with previously selected data */
  fillNewDataStructure(matrixAux, newSeqsName);

  /* When we have all parameters, we create the new alignment */
  newAlig = new alignment(filename, aligInfo, matrixAux, newSeqsName, seqsInfo,
    counter.sequences, counter.residues, iformat, oformat, shortNames, dataType,
    isAligned, reverse, terminalGapOnly, left_boundary, right_boundary,
    keepSequences, keepHeader, sequenNumber, residNumber,
    residuesNumber, saveResidues, saveSequences, ghWindow, shWindow, blockSize,
    identities, overlaps);

  /* Deallocate local memory */
  delete[] matrixAux;
  delete[] newSeqsName;

  /* Return the new alignment reference */
  return newAlig;
}

/* This method carries out the strict and strict plus method. To trim the
 * alignment in an automated way, the method uses as an input a given gaps
 * thresholds over a gaps vector values, a given similarity threshold over a
 * similarity vector values. With a flag, we can decide which method the
 * program shall apply. With another one, the user can ask for the
 * complementary alignment. In this method, those columns that has been marked
 * to be deleted but has, at least, 3 of 4 surronding neighbour selected are
 * get back to the new alignmnet. At other part of the method, those column
 * blocks that do not have a minimum size fix by the method will be deleted
 * from the trimmed alignment. */
alignment *alignment::cleanStrict(int gapCut, const int *gInCol, float simCut,
  const float *MDK_W, bool complementary, bool variable) {

  int i, num, lenBlock;
  alignment *newAlig;

  deque<int> neighboursBlock;

  /* Reject columns with gaps number greater than the gap threshold. */
  for(i = 0; i < residNumber; i++)
    if(gInCol[i] > gapCut)
      saveResidues[i] = -1;

  /* Reject columns with similarity score under the threshold. */
  for(i = 0; i < residNumber; i++)
    if(MDK_W[i] < simCut)
      saveResidues[i] = -1;

  /* Search for those columns that have been removed but have,
   * at least, 3 out of 4 adjacent columns selected. */

  /* Load the first 5 columns into the list */
  for(i = 0; i < (residNumber) && i < 5; i++)
    neighboursBlock.push_back(saveResidues[i]);

  /* Special case #1 - we analyze it before processing the rest positions
   * Column: 1 - Save this column considering its neighbours */
  if((saveResidues[0] == -1) && (neighboursBlock.size() > 2) &&
       (neighboursBlock[1] != -1) &&
       (neighboursBlock[2] != -1))
    saveResidues[0] = 0;

  /* Special case #2A - we analyze it before processing the rest positions
   * Column: 2 - Save this column considering its neighbours */
  if((saveResidues[1] == -1) && (neighboursBlock.size() > 3) &&
      ((neighboursBlock[0] != -1) &&
       (neighboursBlock[2] != -1) &&
       (neighboursBlock[3] != -1)))
    saveResidues[1] = 1;

  /* Special case #2B - when alignment is 3 columns long - we analyze it before
   * processing the rest positions.
   * Column: 2 - Save this column considering its neighbours */
  if((saveResidues[1] == -1) && (neighboursBlock.size() > 2) &&
      ((neighboursBlock[0] != -1) &&
       (neighboursBlock[2] != -1)))
    saveResidues[1] = 1;

  /* Normal cases */
  for(i = 2, num = 0; i < (residNumber - 2); i++, num = 0) {
    if(saveResidues[i] == -1) {
      if(neighboursBlock[0] != -1)
        num++;
      if(neighboursBlock[1] != -1)
        num++;
      if(neighboursBlock[3] != -1)
        num++;
      if(neighboursBlock[4] != -1)
        num++;
      saveResidues[i] = (num >= 3) ? i : -1;

     }
    /* We slide the window one position to the right */
    if((i+3) < residNumber) {
      neighboursBlock.pop_front();
      neighboursBlock.push_back(saveResidues[i+3]);
    }
  }

  /* Special case #3 -
   * Column: 2nd last one - We consider the list rather than the saveResidues
   * vector in order to consider only unmodified values before the previous
   * loop */
  if((saveResidues[(residNumber - 2)] == -1) && (neighboursBlock.size() > 3) &&
      ((neighboursBlock[neighboursBlock.size() - 4] != -1) &&
       (neighboursBlock[neighboursBlock.size() - 3] != -1) &&
       (neighboursBlock[neighboursBlock.size() - 1] != -1)))
    saveResidues[(residNumber - 2)] = (residNumber - 2);

  /* Special case #4 -
   * Column: last one - We consider the list rather than the saveResidues
   * vector in order to consider only unmodified values before the previous
   * loop */
  if((saveResidues[(residNumber - 1)] == -1) && (neighboursBlock.size() > 2) &&
      ((neighboursBlock[neighboursBlock.size() - 3] != -1) &&
       (neighboursBlock[neighboursBlock.size() - 2] != -1)))
    saveResidues[(residNumber - 1)] = (residNumber - 1);

  /* Select blocks size based on user input. It can be set either to 5 or to a
   * variable number between 3 and 12 depending on alignment length (1% alig) */
  if(!variable)
    lenBlock = 5;
  else {
    lenBlock = utils::roundInt(residNumber * 0.01);
    lenBlock = lenBlock >  3 ? (lenBlock < 12 ? lenBlock : 12) : 3;
  }

  /* Allow to change minimal block size */
  blockSize = blockSize > 0 ? blockSize : lenBlock;

  /* Keep only columns blocks bigger than either a computed dinamically or
   * set by the user block size */
  removeSmallerBlocks(blockSize);

  /* If the flag -terminalony is set, apply a method to look for internal
   * boundaries and get back columns inbetween them, if they exist */
  if(terminalGapOnly == true)
    if(!removeOnlyTerminal())
      return NULL;

  /* Once the columns/sequences selection is done, turn it around
   * if complementary flag is active */
  if(complementary == true)
    computeComplementaryAlig(true, false);

  /* Check for any additional column/sequence to be removed */
  /* Compute new sequences and columns numbers */

  newValues counter;
  counter = removeCols_SeqsAllGaps();

  /* Allocate memory  for selected sequences/columns */
  //~ matrixAux = new string[counter.sequences];
  //~ newSeqsName = new string[counter.sequences];

  /* Fill local allocated memory with previously selected data */
  //~ fillNewDataStructure(matrixAux, newSeqsName);

  fillNewDataStructure(&counter);

  //~ cerr << counter -> seqsName[counter -> sequences - 1] << endl;

  /* When we have all parameters, we create the new alignment */
  //~ newAlig = new alignment(filename, aligInfo, matrixAux, newSeqsName, seqsInfo, counter.sequences, counter.residues,
  newAlig = new alignment(filename, aligInfo, counter.matrix, counter.seqsName,
    seqsInfo, counter.sequences, counter.residues, iformat, oformat, shortNames,
    dataType, isAligned, reverse, terminalGapOnly, left_boundary, right_boundary,
    keepSequences, keepHeader, sequenNumber,
    residNumber, residuesNumber, saveResidues, saveSequences, ghWindow, shWindow,
    blockSize, identities, overlaps);

  /* Deallocate local memory */
  //~ delete[] matrixAux;
  //~ delete[] newSeqsName;

  /* Return the new alignment reference */
  return newAlig;
}

/* Remove those sequences with an overlap less than a given threshold. It can
 * return the complementary alignment if appropiate flag is set. */
alignment *alignment::cleanOverlapSeq(float minimumOverlap, float *overlapSeq,
  bool complementary) {

  string *matrixAux, *newSeqsName;
  alignment *newAlig;
  newValues counter;
  int i;

  /* Keep only those sequences with an overlap value equal or greater than
   * the minimum overlap value set by the user.  */
  for(i = 0; i < sequenNumber; i++)
    if(overlapSeq[i] < minimumOverlap)
      saveSequences[i] = -1;

  /* Once the columns/sequences selection is done, turn it around
   * if complementary flag is active */
  if(complementary == true)
    computeComplementaryAlig(false, true);

  /* Check for any additional column/sequence to be removed */
  /* Compute new sequences and columns numbers */
  counter = removeCols_SeqsAllGaps();

  /* Allocate memory  for selected sequences/columns */
  matrixAux = new string[counter.sequences];
  newSeqsName = new string[counter.sequences];

  /* Fill local allocated memory with previously selected data */
  fillNewDataStructure(matrixAux, newSeqsName);

  /* When we have all parameters, we create the new alignment */
  newAlig = new alignment(filename, aligInfo, matrixAux, newSeqsName, seqsInfo,
    counter.sequences, counter.residues, iformat, oformat, shortNames, dataType,
    isAligned, reverse, terminalGapOnly, left_boundary, right_boundary,
    keepSequences, keepHeader, sequenNumber, residNumber,
    residuesNumber, saveResidues, saveSequences, ghWindow, shWindow, blockSize,
    identities, overlaps);

  /* Deallocate local memory */
  delete [] matrixAux;
  delete [] newSeqsName;

  /* Return the new alignment reference */
  return newAlig;
}

/* Remove those columns, expressed as range, set by the user. It can return
 * the complementary alignmnet if appropiate flags is set. */
alignment *alignment::removeColumns(int *columns, int init, int size,
  bool complementary) {

  string *matrixAux, *newSeqsName;
  alignment *newAlig;
  newValues counter;
  int i, j;

  /* Delete those range columns defines in the columns vector */
  for(i = init; i < size + init; i += 2)
    for(j = columns[i]; j <= columns[i+1]; j++)
      saveResidues[j] = -1;

  /* Once the columns/sequences selection is done, turn it around
   * if complementary flag is active */
  if(complementary == true)
    computeComplementaryAlig(true, false);

  /* Check for any additional column/sequence to be removed */
  /* Compute new sequences and columns numbers */
  counter = removeCols_SeqsAllGaps();

  /* Allocate memory  for selected sequences/columns */
  matrixAux = new string[counter.sequences];
  newSeqsName = new string[counter.sequences];

  /* Fill local allocated memory with previously selected data */
  fillNewDataStructure(matrixAux, newSeqsName);

  /* When we have all parameters, we create the new alignment */
  newAlig = new alignment(filename, aligInfo, matrixAux, newSeqsName, seqsInfo,
    counter.sequences, counter.residues, iformat, oformat, shortNames, dataType,
    isAligned, reverse, terminalGapOnly, left_boundary, right_boundary,
    keepSequences, keepHeader, sequenNumber, residNumber,
    residuesNumber, saveResidues, saveSequences, ghWindow, shWindow, blockSize,
    identities, overlaps);

  /* Deallocate local memory */
  delete[] matrixAux;
  delete[] newSeqsName;

  /* Return the new alignment reference */
  return newAlig;
}

/* This method removes those sequences, expressed as range of sequences, set by
 * the user. The method also can return the complementary alignment, it means,
 * those sequences that originally should be removed from the input alignment */
alignment *alignment::removeSequences(int *seqs, int init, int size,
  bool complementary) {

  string *matrixAux, *newSeqsName;
  alignment *newAlig;
  newValues counter;
  int i, j;

  /* Delete those range of sequences defines by the seqs vector */
  for(i = init; i < size + init; i += 2)
    for(j = seqs[i]; j <= seqs[i+1]; j++)
      saveSequences[j] = -1;

  /* Once the columns/sequences selection is done, turn it around
   * if complementary flag is active */
  if(complementary == true)
    computeComplementaryAlig(false, true);

  /* Check for any additional column/sequence to be removed */
  /* Compute new sequences and columns numbers */
  counter = removeCols_SeqsAllGaps();

  /* Allocate memory  for selected sequences/columns */
  matrixAux = new string[counter.sequences];
  newSeqsName = new string[counter.sequences];

  /* Fill local allocated memory with previously selected data */
  fillNewDataStructure(matrixAux, newSeqsName);

  /* When we have all parameters, we create the new alignment */
  newAlig = new alignment(filename, aligInfo, matrixAux, newSeqsName, seqsInfo,
    counter.sequences, counter.residues, iformat, oformat, shortNames, dataType,
    isAligned, reverse, terminalGapOnly, left_boundary, right_boundary,
    keepSequences, keepHeader, sequenNumber, residNumber,
    residuesNumber, saveResidues, saveSequences, ghWindow, shWindow, blockSize,
    identities, overlaps);

  /* Free local memory */
  delete [] matrixAux;
  delete [] newSeqsName;

  /* Return the new alignment reference */
  return newAlig;
}

/* Function for computing the complementary alignment. It just turn around the
 * current columns/sequences selection */
void alignment::computeComplementaryAlig(bool residues, bool sequences) {
  int i;

  for(i = 0; i < residNumber && residues; i++)
    saveResidues[i] = (saveResidues[i] == -1) ? i : -1;

  for(i = 0; i < sequenNumber && sequences; i++)
    saveSequences[i] = (saveSequences[i] == -1) ? i : -1;
}

/* Function designed for identifying right and left borders between central
 * and terminal regions in the alignment. The borders are those columns, first
 * and last, composed by only residues. Everything inbetween left and right
 * borders are keept independently of the applied methods */
bool alignment::removeOnlyTerminal(void) {

  int i;
  const int *gInCol;

  if((left_boundary == -1) and (right_boundary == -1)) {
    /* Get alignments gaps stats and copy it */
    if(calculateGapStats() != true) {
      cerr << endl << "WARNING: Impossible to apply 'terminal-only' method"
        << endl << endl;
      return false;
    }
    gInCol = sgaps -> getGapsWindow();

    /* Identify left and right borders. First and last columns with no gaps */
    for(i = 0; i < residNumber && gInCol[i] != 0; i++) ;
    left_boundary = i;

    for(i = residNumber - 1; i > -1 && gInCol[i] != 0; i--) ;
    right_boundary = i;
  }

  else if(left_boundary >= right_boundary) {
    cerr << endl << "ERROR: Check your manually set left '"<< left_boundary
      << "' and right '" << right_boundary << "' boundaries'" << endl << endl;
    return false;
  }

  /* We increase the right boundary in one position because we use lower strict
   * condition to get back columns inbetween boundaries removed by any method */
   right_boundary += 1;

  /* Once the interal boundaries have been established, if these limits exist
   * then retrieved all columns inbetween both boundaries. Columns out of these
   * limits will remain selected or not depending on the algorithm applied */
  for(i = left_boundary; i < right_boundary; i++)
    saveResidues[i] = i;

  return true;
}

/* Function designed to identify and remove those columns blocks smaller than
 * a given size */
void alignment::removeSmallerBlocks(int blockSize) {

  int i, j, pos, block;

  if(blockSize == 0)
    return ;

  /* Traverse the alignment looking for blocks greater than BLOCKSIZE, everytime
   * than a column hasn't been selected, check whether the current block is big
   * enough to be kept or it should be removed from the final alignment */
  for(i = 0, pos = 0, block = 0; i < residNumber; i++) {
    if(saveResidues[i] != -1)
      block++;
    else {
      /* Remove columns from blocks smaller than input blocks size */
      if(block < blockSize)
        for(j = pos; j <= i; j++)
          saveResidues[j] = -1;
      pos = i + 1;
      block = 0;
    }
  }

  /* Check final block separately since it could happen than last block is not
   * big enough but because the loop end could provoke to ignore it */
  if(block < blockSize)
    for(j = pos; j < i; j++)
      saveResidues[j] = -1;
  return ;
}

/* Function designed to identify columns and sequences composed only by gaps.
 * Once these columns/sequences have been identified, they are removed from
 * final alignment. */
newValues alignment::removeCols_SeqsAllGaps(void) {
  int i, j, valid, gaps;
  bool warnings = false;
  newValues counter;

  /* Check all valid columns looking for those composed by only gaps */
  for(i = 0, counter.residues = 0; i < residNumber; i++) {
    if(saveResidues[i] == -1)
      continue;

    for(j = 0, valid = 0, gaps = 0; j < sequenNumber; j++) {
      if (saveSequences[j] == -1)
        continue;
      if (sequences[j][i] == '-')
        gaps ++;
      valid ++;
    }
    /* Once a column has been identified, warm about it and remove it */
    if(gaps == valid) {
      if(!warnings)
        cerr << endl;
      warnings = true;
      cerr << "WARNING: Removing column '" << i << "' composed only by gaps"
        << endl;
      saveResidues[i] = -1;
    } else {
      counter.residues ++;
    }
  }

  /* Check for those selected sequences to see whether there is anyone with
   * only gaps */
  for(i = 0, counter.sequences = 0; i < sequenNumber; i++) {
    if(saveSequences[i] == -1)
      continue;

    for(j = 0, valid = 0, gaps = 0; j < residNumber; j++) {
      if(saveResidues[j] == -1)
        continue;
      if (sequences[i][j] == '-')
        gaps ++;
      valid ++;
    }
    /* Warm about it and remove each sequence composed only by gaps */
    if(gaps == valid) {
      if(!warnings)
        cerr << endl;
      warnings = true;

      if(keepSequences) {
        cerr << "WARNING: Keeping sequence '" << seqsName[i]
          << "' composed only by gaps" << endl;
        counter.sequences ++;
      } else {
        cerr << "WARNING: Removing sequence '" << seqsName[i]
          << "' composed only by gaps" << endl;
        saveSequences[i] = -1;
      }
    } else {
      counter.sequences ++;
    }
  }
  if(warnings)
    cerr << endl;

  counter.matrix = new string[counter.sequences];
  counter.seqsName = new string[counter.sequences];

  return counter;
}

/* Function for copying to previously allocated memory those data selected
 * for being in the final alignment */
void alignment::fillNewDataStructure(string *newMatrix, string *newNames) {
  int i, j, k;

  /* Copy only those sequences/columns selected */
  for(i = 0, j = 0; i < sequenNumber; i++) {
    if(saveSequences[i] == -1)
      continue;

    newNames[j] = seqsName[i];
    for(k = 0; k < residNumber; k++) {
      if(saveResidues[k] == -1)
        continue;
      newMatrix[j].resize(newMatrix[j].size() + 1, sequences[i][k]);
    }
    j++;
  }
}

/* Function for copying to previously allocated memory those data selected
 * for being in the final alignment */
void alignment::fillNewDataStructure(newValues *data) {
  int i, j, k;

  /* Copy only those sequences/columns selected */
  for(i = 0, j = 0; i < sequenNumber; i++) {
    if(saveSequences[i] == -1)
      continue;

    data -> seqsName[j] = seqsName[i];
    for(k = 0; k < residNumber; k++) {
      if(saveResidues[k] == -1)
        continue;
      data -> matrix[j].resize(data -> matrix[j].size() + 1, sequences[i][k]);
    }
    j++;
  }
  // cerr << data -> seqsName[j-1] << endl;
}

/* Check if CDS file is correct based on: Residues are DNA/RNA (at most). There
 * is not gaps in the whole dataset. Each sequence is multiple of 3. At the same
 * time, the function will remove stop codons if appropiate flags are used */
bool alignment::prepareCodingSequence(bool splitByStopCodon, bool ignStopCodon,\
  alignment *proteinAlig) {

  bool warning = false;
  size_t found;
  int i;

  /* New code: We now care about the presence of wildcards/indeterminations
   * characters such as 'X' or 'B' into protein sequences as well as about the
   * presence of Selenocysteines ('U') or Pyrrolysines ('O'). It only works, by
   * now, with the universal genetic code */
  int *protSeqsLengths, numbProtSeqs, current_prot;
  string *protSeqsNames, *protSequences;
  char aminoAcid;

  numbProtSeqs =  proteinAlig -> getNumSpecies();
  protSeqsNames = new string[numbProtSeqs];
  protSequences = new string[numbProtSeqs];
  protSeqsLengths = new int[numbProtSeqs];

  proteinAlig -> getSequences(protSeqsNames, protSequences, protSeqsLengths);

  /* Check read sequences are real DNA/RNA */
  if (getTypeAlignment() == AAType) {
    cerr << endl << "ERROR: Check input CDS file. It seems to content protein "
      << "residues." << endl << endl;
      return false;
  }

  for(i = 0; i < sequenNumber; i++) {

    /* Get protein sequence to compare against any potential stop codon in the
     * coding sequence. If there is not protein sequence for current coding
     * sequence, skip its analysis */
    for(current_prot = 0; current_prot < numbProtSeqs; current_prot++)
      if(protSeqsNames[current_prot] == seqsName[i])
        break;
    if(current_prot == numbProtSeqs)
      continue;

    if(sequences[i].find("-") != string::npos) {
      if (!warning)
        cerr << endl;
      cerr << "ERROR: Sequence \"" << seqsName[i] << "\" has, at least, one gap"
        << endl << endl;
      return false;
    }

    if((sequences[i].length() % 3) != 0) {
      if (!warning)
        cerr << endl;
      warning = true;
      cerr << "WARNING: Sequence length \"" << seqsName[i] << "\" is not "
        << "multiple of 3 (length: " << sequences[i].length() << ")" << endl;
    }

    /* Ignore stop codons from the CDS if set by the user */
    if (ignStopCodon)
      continue;

    /* Detect universal stop codons in the CDS. Then, compare those stop codons
     * against the protein sequence to see whether they are real stop codons or
     * are representing rare amino-acids such as Selenocysteines or Pyrrolysines
     * It also allows stop-codons when there are wildcards/indet characters in
     * the protein sequence. CDS sequences could be splitted using stop codons
     * from the sequence itself */

    /* Initialize first appearence of a given stop codon to -1.
     * That means that it has not been found yet */
    found = -1;
    do {
      found = sequences[i].find("TGA", found + 1);

      /* If a stop codon has been found and its position is multiple of 3.
       * Analize it */
      if((found != string::npos) && (((int) found % 3) == 0)) {

        aminoAcid = (char) toupper(protSequences[current_prot][(int) found/3]);
        /* It may be a Selenocysteine ('TGA') which should be represented as 'U'
         * or wildcard/indet characters such as 'X' or 'B' */
        //~ if ((aminoAcid == 'U') || (aminoAcid == 'X') || (aminoAcid == 'B'))
        /* If a rare amino-acids such as 'U'/'O' or a wildcard/indet character
         * such as 'B'/'X' is present, skip current stop codon */
        if((aminoAcid == 'U') || (aminoAcid == 'O') || (aminoAcid == 'X') || \
          (aminoAcid == 'B'))
          continue;

        /* If split_by_stop_codon flag is activated then cut input CDS sequence
         * up to first appearance of a stop codon */
        else if(splitByStopCodon) {
          if (!warning)
            cerr << endl;
          warning = true;
          cerr << "WARNING: Cutting sequence \"" << seqsName[i] << "\" at first"
            << " appearance of stop codon \"TGA\" (residue \"" << aminoAcid
            << "\") at position " << (int) found + 1 << " (length: "
            << sequences[i].length() << ")" << endl;
          sequences[i].resize((int) found);
        }
        /* Otherwise, warn about it and return an error */
        else {
          if (!warning)
            cerr << endl;
          cerr << "ERROR: Sequence \"" << seqsName[i] << "\" has stop codon \""
            << "TGA\" (residue \"" << aminoAcid << "\") at position "
            << (int) found + 1 << " (length: " << sequences[i].length() << ")"
            << endl << endl;
          return false;
        }
      }
    /* Iterate over the CDS until not stop codon is found */
    } while(found != string::npos);

    /* Initialize first appearence of a given stop codon to -1.
     * That means that it has not been found yet */
    found = -1;
    do {
      found = sequences[i].find("TAA", found + 1);

      /* If a stop codon has been found and its position is multiple of 3.
       * Analize it */
      if((found != string::npos) && (((int) found % 3) == 0)) {

        aminoAcid = (char) toupper(protSequences[current_prot][(int) found/3]);
        /* Check if there is any wildcard/indet characters such as 'X' or 'B' */
        //~ if ((aminoAcid == 'X') || (aminoAcid == 'B'))
        /* If a rare amino-acids such as 'U'/'O' or a wildcard/indet character
         * such as 'B'/'X' is present, skip current stop codon */
        if((aminoAcid == 'U') || (aminoAcid == 'O') || (aminoAcid == 'X') || \
          (aminoAcid == 'B'))
          continue;

        /* If split_by_stop_codon flag is activated then cut input CDS sequence
         * up to first appearance of a stop codon */
        else if(splitByStopCodon) {
          if (!warning)
            cerr << endl;
          warning = true;
          cerr << "WARNING: Cutting sequence \"" << seqsName[i] << "\" at first"
            << " appearance of stop codon \"TAA\" (residue \"" << aminoAcid
            << "\") at position " << (int) found + 1 << " (length: "
            << sequences[i].length() << ")" << endl;
          sequences[i].resize((int) found);
        }
        /* Otherwise, warn about it and return an error */
        else {
          if (!warning)
            cerr << endl;
          cerr << "ERROR: Sequence \"" << seqsName[i] << "\" has stop codon \""
            << "TAA\" (residue \"" << aminoAcid << "\") at position "
            << (int) found + 1 << " (length: " << sequences[i].length() << ")"
            << endl << endl;
          return false;
        }
      }
    /* Iterate over the CDS until not stop codon is found */
    } while(found != string::npos);

    /* Initialize first appearence of a given stop codon to -1.
     * That means that it has not been found yet */
    found = -1;
    do {
      found = sequences[i].find("TAG", found + 1);
      /* If a stop codon has been found and its position is multiple of 3.
       * Analize it */
      if((found != string::npos) && (((int) found % 3) == 0)) {

        aminoAcid = (char) toupper(protSequences[current_prot][(int) found/3]);
        /* It may be a Pyrrolysine ('TAG') which should be represented as 'O'
         * or wildcard/indet characters such as 'X' or 'B' */
        //~ if ((aminoAcid == 'O') || (aminoAcid == 'X') || (aminoAcid == 'B'))
        /* If a rare amino-acids such as 'U'/'O' or a wildcard/indet character
         * such as 'B'/'X' is present, skip current stop codon */
        if((aminoAcid == 'U') || (aminoAcid == 'O') || (aminoAcid == 'X') || \
          (aminoAcid == 'B'))
          continue;

        /* If split_by_stop_codon flag is activated then cut input CDS sequence
         * up to first appearance of a stop codon */
        else if(splitByStopCodon) {
          if (!warning)
            cerr << endl;
          warning = true;
          cerr << "WARNING: Cutting sequence \"" << seqsName[i] << "\" at first"
            << " appearance of stop codon \"TAG\" (residue \"" << aminoAcid
            << "\") at position " << (int) found + 1 << " (length: "
            << sequences[i].length() << ")" << endl;
          sequences[i].resize((int) found);
        }
        /* Otherwise, warn about it and return an error */
        else {
          if (!warning)
            cerr << endl;
          cerr << "ERROR: Sequence \"" << seqsName[i] << "\" has stop codon \""
            << "TAG\" (residue \"" << aminoAcid << "\") at position "
            << (int) found + 1 << " (length: " << sequences[i].length() << ")"
            << endl << endl;
          return false;
        }
      }
    /* Iterate over the CDS until not stop codon is found */
    } while(found != string::npos);
  }

  /* If everything was return an OK to informat about it. */
  return true;
}

/* Function designed to check whether input CDS file is correct or not based on
 * some features: Sequences are in both files (it could be more on CDS file),
 * they have (more or less) same ength. Otherwise, some nucleotides could be
 * excluded or some 'N's added to fit protein length. */
bool alignment::checkCorrespondence(string *names, int *lengths, int \
  totalInputSeqs, int multiple = 1) {

  int i, j, seqLength, indet;
  bool warnings = false;
  string tmp;

  /* For each sequence in the current protein alignment, look for its coding
   * DNA sequence checking that they have the same size. */
  for(i = 0; i < sequenNumber; i++) {

    /* Get protein sequence length removing any possible gap. Get as well last
     * residue from current sequence */

    tmp = utils::removeCharacter('-', sequences[i]);
    seqLength = tmp.length() * multiple;
    indet = ((int) tmp.length() - utils::min((int) tmp.find_last_not_of("X"), \
      (int) tmp.find_last_not_of("x"))) - 1;

    /* Go through all available CDS looking for the one with the same ID */
    for(j = 0; j < totalInputSeqs; j++) {

      /* Once both ID matchs, compare its lengths */
      if(seqsName[i] == names[j]) {

        /* If both sequences have the same length, stop the search */
        if(seqLength == lengths[j])
          break;

        /* If nucleotide sequence is larger than protein sequence, warn about
         * it and continue the verification process. It will used the 'Nth'
         * first nucleotides for the conversion */
        else if(seqLength < lengths[j]) {
          if (!warnings)
            cerr << endl;
          warnings = true;
          cerr << "WARNING: Sequence \"" << seqsName[i] << "\" will be cut at "
            << "position " << seqLength << " (length: "<< lengths[j] << ")"
            << endl;
          break;
        }

        /* It has been detected some indeterminations at the end of the protein
         * sequence. That issue could be cause by some incomplete codons in the
         * nucleotide sequences. This issue is solved adding as much 'N' symbols
         * as it is needed to preserve the backtranslated alignment */
        else if((indet > 0) && (indet > (seqLength - lengths[j])/3)) {
          if (!warnings)
            cerr << endl;
          warnings = true;
          cerr << "WARNING: Sequence \"" << seqsName[i] << "\" has some inde"
            << "termination symbols 'X' at the end of sequence. They will be"
            << " included in the final alignment." << endl;
          break;
        }

        /* If nucleotide sequence is shorter than protein sequence, return an
         * error since it is not feasible to cut the input protein aligment to
         * fit it into CDNA sequences size */
        else {
          if (!warnings)
            cerr << endl;
          warnings = true;
          cerr << "WARNING: Sequence \"" << seqsName[i] << "\" has less nucleo"
            << "tides (" << lengths[j] << ") than expected (" << seqLength
            << "). It will be added N's to complete the sequence"  << endl;
          break;
        }
      }
    }

    /* Warn about a mismatch a sequences name level */
    if(j == totalInputSeqs) {
      cerr << endl << "ERROR: Sequence \"" << seqsName[i] << "\" is not in "
        << "CDS file." << endl << endl;
      return false;
    }
  }

  /* If everything is OK, return an appropiate flag */
  return true;
}
author	padge
date	Fri, 25 Mar 2022 17:10:43 +0000
parents
children