view clustalomega/clustal-omega-0.2.0/src/clustal/hhalign_wrapper.c @ 0:ff1768533a07

Migrated tool version 0.2 from old tool shed archive to new tool shed repository
author clustalomega
date Tue, 07 Jun 2011 17:04:25 -0400
parents
children
line wrap: on
line source

/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */

/*********************************************************************
 * Clustal Omega - Multiple sequence alignment
 *
 * Copyright (C) 2010 University College Dublin
 *
 * Clustal-Omega is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This file is part of Clustal-Omega.
 *
 ********************************************************************/

/*
 *  RCS $Id: hhalign_wrapper.c 241 2011-05-04 14:37:17Z fabian $
 */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>

#include "seq.h"
#include "tree.h"
#include "progress.h"
#include "hhalign/general.h"
#include "hhalign/hhfunc.h"
#include "hhalign/hhalign.h"

/* up to this level (from leaf) will background HMM info be applied */
#define APPLY_BG_HMM_UP_TO_TREE_DEPTH 10

#define TIMING 0

#define TRACE 0

/**
 * @brief get rid of unknown residues
 *
 * @note HHalignWrapper can be entered in 2 different ways: (i) all
 * sequences are un-aligned (ii) there are 2 (aligned) profiles.  in
 * the un-aligned case (i) the sequences come straight from Squid,
 * that is, they have been sanitised, all non-alphabetic residues 
 * have been rendered as X's. In profile mode (ii) one profile may 
 * have been produced internally. In that case residues may have 
 * been translated back into their 'native' form, that is, they may
 * contain un-sanitised residues. These will cause trouble  during
 * alignment
 * FS, r213->214
 */
void
SanitiseUnknown(mseq_t *mseq)
{

    int iS; /* iterator for sequence */
    int iR; /* iterator for residue */
    int iLen; /* length of sequence */
    char *pcRes = NULL;


    for (iS = 0; iS < mseq->nseqs; iS++){

        for (pcRes = mseq->seq[iS]; '\0' != *pcRes; pcRes++){

            if (isgap(*pcRes)){
                continue;
            }

            if (mseq->seqtype==SEQTYPE_PROTEIN) {
                if (NULL == strchr(AMINO_ALPHABET, toupper(*pcRes))) {
                    *pcRes = AMINOACID_ANY;
                }
            } else if (mseq->seqtype==SEQTYPE_DNA) {
                if (NULL == strchr(DNA_ALPHABET, toupper(*pcRes))) {
                    *pcRes = NUCLEOTIDE_ANY;
                }
            } else if (mseq->seqtype==SEQTYPE_RNA) {
                if (NULL == strchr(RNA_ALPHABET, toupper(*pcRes))) {
                    *pcRes = NUCLEOTIDE_ANY;
                }
            }

        } /* !EO String */

    } /* 0 <= iS < mseq->nseqs */

    return;

} /*** end:  SanitiseUnknown()  ***/

/**
 * @brief translate unknown residues back to ambiguity codes;
 * hhalign translates ambiguity codes (B,Z) into unknown residue (X).
 * we still have the original (un-aligned) residue information,
 * by iterating along the original and aligned sequences we can
 * reconstruct where codes have been changed and restore them
 * to their original value
 *
 * @param[in,out] mseq
 * sequence/profile data, mseq->seq [in,out] is changed to conform
 * with mseq->orig_seq [in]
 *
 */
void
TranslateUnknown2Ambiguity(mseq_t *mseq)
{

    int iS; /* iterator for sequence */
    int iR, iRo; /* iterator for residue (original) */
    int iChange, iCase, iAmbi; /* counts how many replacements */
    static int siOffset = 'a' - 'A';

    for (iS = 0; iS < mseq->nseqs; iS++){

        iR = iRo = 0;
        iChange = iCase = iAmbi = 0;

        while(('\0' != mseq->seq[iS][iR]) &&
              ('\0' != mseq->orig_seq[iS][iRo])) {

            /* skip gaps in aligned sequences */
            while(isgap(mseq->seq[iS][iR])) {
                iR++;
            } /* was gap in unaligned seq
               * this should probably not happen */
            while(isgap(mseq->orig_seq[iS][iRo])) {
                iRo++;
            } /* was gap in aligned seq */

            /* check if we reached the end of the sequence after
             * skipping the gaps */
            if ( ('\0' == mseq->seq[iS][iR]) ||
                 ('\0' == mseq->orig_seq[iS][iRo]) ){
                break;
            }


            if (mseq->seq[iS][iR] != mseq->orig_seq[iS][iRo]){
                /* FIXME: count replacements, discard case changes */
                iChange++;
                if ( (mseq->seq[iS][iR] == mseq->orig_seq[iS][iRo]+siOffset) ||
                     (mseq->seq[iS][iR] == mseq->orig_seq[iS][iRo]-siOffset) ){
                    iCase++;
                }
                else {
                    iAmbi++;
                }
#if TRACE
                Log(&rLog, LOG_FORCED_DEBUG, "seq=%d, pos=(%d:%d), (%c:%c)",
                     iS, iR, iRo,
                     mseq->seq[iS][iR], mseq->orig_seq[iS][iRo]);
#endif
                mseq->seq[iS][iR] = mseq->orig_seq[iS][iRo];
            }

            iR++;
            iRo++;

        } /* !EO seq */

        Log(&rLog, LOG_DEBUG,
             "in seq %d re-translated %d residue codes (%d true, %d case)",
             iS, iChange, iAmbi, iCase);

        /* assert that both sequences (un/aligned) have terminated */
        /* skip gaps in aligned sequences */
        while(isgap(mseq->seq[iS][iR])) {
            iR++;
        } /* was gap in unaligned seq
           * this should probably not happen */
        while(isgap(mseq->orig_seq[iS][iRo])) {
            iRo++;
        } /* was gap in aligned seq */


        if ( ('\0' != mseq->seq[iS][iR]) ||
             ('\0' != mseq->orig_seq[iS][iRo]) ){

            Log(&rLog, LOG_FATAL, "inconsistency in un/aligned sequence %d\n>%s\n>%s\n",
                  iS, mseq->seq[iS], mseq->orig_seq[iS]);
        }

    } /* 0 <= iS < mseq->nseqs */

} /*** end: TranslateUnknown2Ambiguity() ***/


/**
 * @brief re-attach leading and trailing gaps to alignment
 *
 * @param[in,out] prMSeq,
 * alignment structure (at this stage there should be no un-aligned sequences)
 * @param[in] iProfProfSeparator
 * gives sizes of input profiles, -1 if no input-profiles but un-aligned sequences
 *
 * @note leading and tailing profile columns 
 * that only contain gaps have no effect on the alignment 
 * and are removed during the alignment. if they are 
 * encountered a warning message is printed to screen.
 * some users like to preserve these gap columns
 * FS, r213->214
 */
void
ReAttachLeadingGaps(mseq_t *prMSeq, int  iProfProfSeparator)
{

    int i, j;
    int iSize1 = 0; /* #seqs in 1st profile */
    int iSize2 = 0; /* #seqs in 2nd profile */
    int iPPS   = iProfProfSeparator;
    int iLeadO1  = 0; /* leading  gaps in 1st seq of 1st profile */
    int iLeadO2  = 0; /* leading  gaps in 1st seq of 2nd profile */
    int iLeadA1  = 0; /* leading  gaps in 1st seq of final alignment */
    int iLeadA2  = 0; /* leading  gaps in PPS seq of final alignment */
    int iTrailO1 = 0; /* trailing gaps in 1st seq of 1st profile */
    int iTrailO2 = 0; /* trailing gaps in 1st seq of 2nd profile */
    int iTrailA1 = 0; /* trailing gaps in 1st seq of final alignment */
    int iTrailA2 = 0; /* trailing gaps in PPS seq of final alignment */
    int iLen  = 0; /* length of final alignment */
    int iLen1 = 0; /* length of 1st profile */
    int iLen2 = 0; /* length of 2nd profile */
    int iCutHead = 0; /* make up truncation at head */
    int iCutTail = 0; /* make up truncation at tail */
    char *pcIter = NULL;

    if (-1 == iProfProfSeparator){
        return;
    }
    else {
        assert(prMSeq->aligned);
        assert(prMSeq->nseqs > iProfProfSeparator);
    }
    iSize1 = iProfProfSeparator;
    iSize2 = prMSeq->nseqs - iProfProfSeparator;
    iLen  = strlen(prMSeq->seq[0]);
    iLen1 = strlen(prMSeq->orig_seq[0]);
    iLen2 = strlen(prMSeq->orig_seq[iPPS]);

    /* count leading/trailing gaps in 1st sequence of 1st/2nd profile and final alignmant */
    for (iLeadO1  = 0, pcIter =  prMSeq->orig_seq[0];             isgap(*pcIter); pcIter++, iLeadO1++);
    for (iLeadO2  = 0, pcIter =  prMSeq->orig_seq[iPPS];          isgap(*pcIter); pcIter++, iLeadO2++);
    for (iLeadA1  = 0, pcIter =  prMSeq->seq[0];                  isgap(*pcIter); pcIter++, iLeadA1++);
    for (iLeadA2  = 0, pcIter =  prMSeq->seq[iPPS];               isgap(*pcIter); pcIter++, iLeadA2++);
    for (iTrailO1 = 0, pcIter = &prMSeq->orig_seq[0][iLen1-1];    isgap(*pcIter); pcIter--, iTrailO1++);
    for (iTrailO2 = 0, pcIter = &prMSeq->orig_seq[iPPS][iLen2-1]; isgap(*pcIter); pcIter--, iTrailO2++);
    for (iTrailA1 = 0, pcIter = &prMSeq->seq[0][iLen-1];          isgap(*pcIter); pcIter--, iTrailA1++);
    for (iTrailA2 = 0, pcIter = &prMSeq->seq[iPPS][iLen-1];       isgap(*pcIter); pcIter--, iTrailA2++);


    /* turn leading/trailing gaps into truncation */
    iLeadO1  = iLeadO1  > iLeadA1  ? iLeadO1-iLeadA1   : 0;
    iLeadO2  = iLeadO2  > iLeadA2  ? iLeadO2-iLeadA2   : 0;
    iTrailO1 = iTrailO1 > iTrailA1 ? iTrailO1-iTrailA1 : 0;
    iTrailO2 = iTrailO2 > iTrailA2 ? iTrailO2-iTrailA2 : 0;
    iCutHead = iLeadO1  > iLeadO2  ? iLeadO1  : iLeadO2;
    iCutTail = iTrailO1 > iTrailO2 ? iTrailO1 : iTrailO2;

    /* re-allocate and shift memory */
    for (i = 0; i < prMSeq->nseqs; i++){

        CKREALLOC(prMSeq->seq[i], iLen+iCutHead+iCutTail+1);
        memmove(prMSeq->seq[i]+iCutHead, prMSeq->seq[i], iLen);
        for (j = 0; j < iCutHead; j++){
            prMSeq->seq[i][j] = '-';
        }
        for (j = iLen+iCutHead; j < iLen+iCutHead+iCutTail; j++){
            prMSeq->seq[i][j] = '-';
        }
        prMSeq->seq[i][j] = '\0';
    }

} /***  end: ReAttachLeadingGaps()  ***/

/**
 * @brief reallocate enough memory for alignment and
 * attach sequence pointers to profiles
 *
 * @param[in,out] mseq
 * sequence/profile data, increase memory for sequences in profiles
 * @param[out] ppcProfile1
 * pointers to sequencese in 1st profile
 * @param[out] ppcProfile2
 * pointers to sequencese in 2nd profile
 * @param[out] pdWeightsL
 * weights (normalised to 1.0) for sequences in left  profile
 * @param[out] pdWeightsR
 * weights (normalised to 1.0) for sequences in right profile
 * @param[in] pdSeqWeights
 * weights for _all_ sequences in alignment
 * @param[in] iLeafCountL
 * number of sequences in 1st profile
 * @param[in] piLeafListL
 * array of integer IDs of sequences in 1st profile
 * @param[in] iLeafCountR
 * number of sequences in 2nd profile
 * @param[in] piLeafListR
 * array of integer IDs of sequences in 2nd profile
 *
 */
void
PrepareAlignment(mseq_t *mseq, char **ppcProfile1, char **ppcProfile2,
                 double *pdWeightsL, double *pdWeightsR, double *pdSeqWeights,
                 int iLeafCountL, int *piLeafListL,
                 int iLeafCountR, int *piLeafListR)
{

    int iLenL = 0; /* length of 1st profile */
    int iLenR = 0; /* length of 2nd profile */
    int iMaxLen = 0; /* maximum possible length of alignment */
    int i; /* aux */
    double dWeight = 0.00;
    double dWeightInv = 0.00;

    assert(NULL!=mseq);
    assert(NULL!=ppcProfile1);
    assert(NULL!=ppcProfile2);
    assert(NULL!=piLeafListL);
    assert(NULL!=piLeafListR);

    /* get length of profiles,
     * in a profile all sequences should have same length so only look at 1st
     */
    iLenL = strlen(mseq->seq[piLeafListL[0]]);
    iLenR = strlen(mseq->seq[piLeafListR[0]]);
    iMaxLen = iLenL + iLenR + 1;

    /* reallocate enough memory for sequences in alignment (for adding
     * gaps)
     */
    for (i = 0; i < iLeafCountL; i++){
        mseq->seq[piLeafListL[i]] =
            CKREALLOC(mseq->seq[piLeafListL[i]], iMaxLen);
    }
    for (i = 0; i < iLeafCountR; i++){
        mseq->seq[piLeafListR[i]] =
            CKREALLOC(mseq->seq[piLeafListR[i]], iMaxLen);
    }

    /* attach sequences to profiles
     */
    for (i = 0; i < iLeafCountL; i++){
        ppcProfile1[i] = mseq->seq[piLeafListL[i]];
    }
    ppcProfile1[i] = NULL;

    for (i = 0; i < iLeafCountR; i++){
        ppcProfile2[i] = mseq->seq[piLeafListR[i]];
    }
    ppcProfile2[i] = NULL;


    /* remove terminal 'X' for single sequences:
     * it is a quirk of hhalign() to delete 2 individual sequences
     * if 2 terminal X's meet during alignment,
     * just replace (one of) them.
     * this can be undone at the end.
     * profiles -consisting of more than 1 sequence-
     * appear to be all-right.
     * there seems to be no problem with B's and Z's
     */
    if ( (1 == iLeafCountL) && (1 == iLeafCountR) ){

        if ( ('X' == ppcProfile1[0][0]) && ('X' == ppcProfile2[0][0]) ){
#define NOTX 'N'
            ppcProfile1[0][0] = NOTX; /* FIXME: arbitrary assignment */
            ppcProfile2[0][0] = NOTX; /* FIXME: arbitrary assignment */
        }
        if ( ('X' == ppcProfile1[0][iLenL-1]) && ('X' == ppcProfile2[0][iLenR-1]) ){
            ppcProfile1[0][iLenL-1] = NOTX; /* FIXME: arbitrary assignment */
            ppcProfile2[0][iLenR-1] = NOTX; /* FIXME: arbitrary assignment */
        }
    }

    /* obtain sequence weights
     */
    if (NULL != pdSeqWeights){

        dWeight = 0.00;
        for (i = 0; i < iLeafCountL; i++){
            register double dAux = pdSeqWeights[piLeafListL[i]];
#ifndef NDEBUG
            if (dAux <= 0.00){
                Log(&rLog, LOG_DEBUG, "seq-weight %d = %f", piLeafListL[i], dAux);
            }
#endif
            pdWeightsL[i] = dAux;
            dWeight      += dAux;
        } /* 0 <= i < iLeafCountL */
        dWeightInv = 1.00 / dWeight;
        for (i = 0; i < iLeafCountL; i++){
            pdWeightsL[i] *= dWeightInv;
        }

        dWeight = 0.00;
        for (i = 0; i < iLeafCountR; i++){
            register double dAux = pdSeqWeights[piLeafListR[i]];
#ifndef NDEBUG
            if (dAux <= 0.00){
                Log(&rLog, LOG_DEBUG, "seq-weight %d = %f", piLeafListR[i], dAux);
            }
#endif
            pdWeightsR[i] = dAux;
            dWeight      += dAux;
        } /* 0 <= i < iLeafCountL */
        dWeightInv = 1.00 / dWeight;
        for (i = 0; i < iLeafCountR; i++){
            pdWeightsR[i] *= dWeightInv;
        }
    } /* (NULL != pdSeqWeights) */
    else {
        pdWeightsL[0] = pdWeightsR[0] = -1.00;
    }

#if TRACE
    for (i = 0; i < iLeafCountL; i++){
        Log(&rLog, LOG_FORCED_DEBUG, "ppcProfile1[%d/%d] pointing to mseq %d = %s",
                  i, iLeafCountR, piLeafListL[i], ppcProfile1[i]);
    }
    for (i = 0; i < iLeafCountR; i++){
        Log(&rLog, LOG_FORCED_DEBUG, "ppcProfile2[%d/%d] pointing to mseq %d = %s",
                  i, iLeafCountR, piLeafListR[i], ppcProfile2[i]);
    }
#endif
    
    return;

} /*** end: PrepareAlignment() ***/


/**
 * @brief wrapper for hhalign. This is a frontend function to
 * the ported hhalign code.
 *
 * @param[in,out] prMSeq
 * holds the unaligned sequences [in] and the final alignment [out]
 * @param[in] piOrderLR
 * holds order in which sequences/profiles are to be aligned,
 * even elements specify left nodes, odd elements right nodes,
 * if even and odd are same then it is a leaf
 * @param[in] pdSeqWeights
 * Weight per sequence. No weights used if NULL
 * @param[in] iNodeCount
 * number of nodes in tree, piOrderLR has 2*iNodeCount elements
 * @param[in] prHMMList
 * List of background HMMs (transition/emission probabilities)
 * @param[in] iHMMCount
 * Number of input background HMMs
 * @param[in] iProfProfSeparator
 * Gives the number of sequences in the first profile, if in
 * profile/profile alignment mode (iNodeCount==3). That assumes mseqs
 * holds the sequences of profile 1 and profile 2.
 * @param[in] rHhalignPara
 * various parameters read from commandline
 *
 * @return score of the alignment FIXME what is this?
 *
 * @note complex function. could use some simplification, more and
 * documentation and a struct'uring of piOrderLR
 * 
 * @note HHalignWrapper can be entered in 2 different ways:
 * (i) all sequences are un-aligned (ii) there are 2 (aligned) profiles.
 * in the un-aligned case (i) the sequences come straight from Squid, 
 * that is, they have been sanitised, all non-alphabetic residues 
 * have been rendered as X's. In profile mode (ii) one profile may 
 * have been produced internally. In that case residues may have 
 * been translated back into their 'native' form, that is, they 
 * may contain un-sanitised residues. These will cause trouble 
 * during alignment
 *
 * @note: introduced argument hhalign_para rHhalignPara; FS, r240 -> r241
 */

double
HHalignWrapper(mseq_t *prMSeq, int *piOrderLR,
               double *pdSeqWeights, int iNodeCount,
               hmm_light *prHMMList, int iHMMCount,
               int iProfProfSeparator, hhalign_para rHhalignPara)
{
    int iN; /* node iterator */
    int *piLeafCount = NULL; /* number of leaves beneath a certain node */
    int **ppiLeafList = NULL; /* list of leaves beneath a certain node */
    char **ppcProfile1 = NULL; /* pointer to sequences in profile */
    char **ppcProfile2 = NULL; /* pointer to sequences in profile */
    char *pcReprsnt1 = NULL; /* representative of HMM aligned to left */
    char *pcReprsnt2 = NULL; /* representative of HMM aligned to right */
    char **ppcReprsnt1 = &pcReprsnt1; /* representative of HMM aligned to L */
    char **ppcReprsnt2 = &pcReprsnt2; /* representative of HMM aligned to R */
    char *pcConsens1 = NULL; /* copy of  left sequence */
    char *pcConsens2 = NULL; /* copy of right sequence */
    char **ppcCopy1 = /*&pcCopy1*/NULL; /* copy of  left sequences */
    char **ppcCopy2 = /*&pcCopy2*/NULL; /* copy of right sequences */
    double *pdScores = NULL; /* alignment scores (seq/HMM) */
    double dScore = 0.0; /* alignment score (seq/HMM) */
    int iAux_FS = 0;
    char zcAux[10000] = {0};
    char zcError[10000] = {0};
    int i; /* aux */
    progress_t *prProgress;
    int iAlnLen; /* alignment length */
    double *pdWeightsL = NULL; /* sequence weights of left  profile */
    double *pdWeightsR = NULL; /* sequence weights of right profile */
    int iMergeNodeCounter = 0;
    hmm_light *prHMM = NULL;
    bool bPrintCR = (rLog.iLogLevelEnabled<=LOG_VERBOSE) ? FALSE : TRUE;
#if TIMING
    char zcStopwatchMsg[1024];
    Stopwatch_t *stopwatch = StopwatchCreate();
    StopwatchZero(stopwatch);
    StopwatchStart(stopwatch);
#endif
    
    
    if (NULL != prHMMList) {
        if (iHMMCount>1) {
            Log(&rLog, LOG_WARN, "FIXME: Using only first of %u HMMs (needs implementation)", iHMMCount);
        }
        prHMM = &(prHMMList[0]);
    } else {
        /* FIXME: prHMM not allowed to be NULL and therefore pseudo allocated here */
        prHMM = (hmm_light *) CKCALLOC(1, sizeof(hmm_light));
    }
    
    assert(NULL!=prMSeq);
    
    if (NULL==piOrderLR) {
        assert(3==iNodeCount);
    }
    SanitiseUnknown(prMSeq);

    /* hhalign was not made for DNA/RNA. So warn if sequences are not
     * protein
     */
    if (SEQTYPE_PROTEIN != prMSeq->seqtype) {
        Log(&rLog, LOG_WARN, "Sequence type is %s. HHalign has only been tested on protein.",
             SeqTypeToStr(prMSeq->seqtype));
    }

    /* hhalign produces funny results if sequences contain gaps, so
     * dealign. Only way to use alignment info is to use it as a
     * background HMM
     */
    if (TRUE == prMSeq->aligned) {
        Log(&rLog, LOG_DEBUG, "Dealigning aligned sequences (inside %s)", __FUNCTION__);
        DealignMSeq(prMSeq);
    }

#if TRACE
    Log(&rLog, LOG_FORCED_DEBUG, "iNodeCount = %d", iNodeCount);
#endif

    
    /* allocate top-level memory for leaf tracking arrays and profiles,
     * and sequence weights*/
    piLeafCount = CKCALLOC(iNodeCount, sizeof(int));
    ppiLeafList = CKCALLOC(iNodeCount, sizeof(int *));
    ppcProfile1 = CKCALLOC(prMSeq->nseqs*2-1, sizeof(char *));
    ppcProfile2 = CKCALLOC(prMSeq->nseqs*2-1, sizeof(char *));
    pdScores    = CKCALLOC(iNodeCount, sizeof(double));
    pdWeightsL  = CKCALLOC(iNodeCount, sizeof(double));
    pdWeightsR  = CKCALLOC(iNodeCount, sizeof(double));

    NewProgress(&prProgress, LogGetFP(&rLog, LOG_INFO),
                "Progressive alignment progress", bPrintCR);

    
    /* Profile-profile alignment? Then setup piLeafCount and
     * piLeafList here. FIXME this is just an awful haaaack
     */
    if (iNodeCount==3 && NULL==piOrderLR) {
        int iSizeProf1 = iProfProfSeparator;
        int iSizeProf2 = prMSeq->nseqs - iProfProfSeparator;

        piLeafCount[0] = iSizeProf1;
        ppiLeafList[0] = (int *)CKMALLOC(iSizeProf1 * sizeof(int));
        for (i=0;i<iSizeProf1;i++) {
            ppiLeafList[0][i] = i;
        }
        piLeafCount[1] = iSizeProf2;
        ppiLeafList[1] = (int *)CKMALLOC(iSizeProf2 * sizeof(int));
        for (i=0;i<iSizeProf2;i++) {
            ppiLeafList[1][i] = i+iSizeProf1;
        }
        
        /* awful hack inside an awful hack: we had to setup piLeafCount
         * and piLeafList outside the node iteration. this which is
         * normally done at leaf level inside the node iteration. to
         * avoid overwriting the already setup vars set...
         */
        iNodeCount=1;

        piOrderLR =  (int *)CKCALLOC(DIFF_NODE, sizeof(int));
        piOrderLR[LEFT_NODE] = 0;
        piOrderLR[RGHT_NODE] = 1;
        piOrderLR[PRNT_NODE] = 2;
    }
    


    iMergeNodeCounter = 0;
    for (iN = 0; iN < iNodeCount; iN++){

        register int riAux = DIFF_NODE * iN;

        /*LOG_DEBUG("node %d ", iN);*/

        if (piOrderLR[riAux+LEFT_NODE] == piOrderLR[riAux+RGHT_NODE]){

            register int riLeaf = piOrderLR[riAux+LEFT_NODE];
#if TRACE
            if (NULL == pdSeqWeights) {
                Log(&rLog, LOG_FORCED_DEBUG, "node %d is a leaf with entry %d (seq %s)",
                          iN, riLeaf, prMSeq->sqinfo[riLeaf].name);
            } else {
                Log(&rLog, LOG_FORCED_DEBUG, "node %d is a leaf with entry %d  (seq %s) and weight %f",
                          iN, riLeaf, prMSeq->sqinfo[riLeaf].name, pdSeqWeights[riLeaf]);
            }
#endif
            /* left/right entry same, this is a leaf */
            piLeafCount[piOrderLR[riAux+PRNT_NODE]] = 1; /* number of leaves is '1' */
            ppiLeafList[piOrderLR[riAux+PRNT_NODE]] = (int *)CKMALLOC(1 * sizeof(int));
            ppiLeafList[piOrderLR[riAux+PRNT_NODE]][0] = riLeaf;

        } /* was a leaf */
        else {
            int iL, iR, iP; /* ID of left/right nodes, parent */
            int i, j; /* aux */

            Log(&rLog, LOG_DEBUG,
                "merge profiles at node %d", iN, piOrderLR[riAux]);

            /* iNodeCount - prMSeq->nseqs = total # of merge-nodes 
             * unless in profile/profile alignment mode
             */
            if (1 == iNodeCount) {
                ProgressLog(prProgress, ++iMergeNodeCounter,
                            1, FALSE);
            } else {
                ProgressLog(prProgress, ++iMergeNodeCounter,
                            iNodeCount - prMSeq->nseqs, FALSE);                
            }

            /* left/right entry are not same, this is a merge node */
            iL = piOrderLR[riAux+LEFT_NODE];
            iR = piOrderLR[riAux+RGHT_NODE];
            iP = piOrderLR[riAux+PRNT_NODE];
            piLeafCount[iP] = piLeafCount[iL] + piLeafCount[iR];
            ppiLeafList[iP] = (int *)CKMALLOC(piLeafCount[iP] * sizeof(int));

            for (i = j = 0; i < piLeafCount[iL]; i++, j++){
                ppiLeafList[iP][j] = ppiLeafList[iL][i];
            }
            for (i = 0; i < piLeafCount[iR]; i++, j++){
                ppiLeafList[iP][j] = ppiLeafList[iR][i];
            }

            /* prepare simulation arena:
             * - make sure enough memory in sequences
             * - attach sequence pointers to profiles
             */
            /* idea: switch template and query according to nseq? */

            PrepareAlignment(prMSeq, ppcProfile1, ppcProfile2,
                             pdWeightsL, pdWeightsR, pdSeqWeights,
                             piLeafCount[iL], ppiLeafList[iL],
                             piLeafCount[iR], ppiLeafList[iR]);
            if (rLog.iLogLevelEnabled <= LOG_DEBUG){
                int i;
                FILE *fp = LogGetFP(&rLog, LOG_DEBUG);
                Log(&rLog, LOG_DEBUG, "merging profiles %d & %d", iL, iR);
                for (i = 0; i < piLeafCount[iL]; i++){
                    fprintf(fp, "L/#=%3d (ID=%3d, w=%f): %s\n",
                           i, ppiLeafList[iL][i], pdWeightsL[i], ppcProfile1[i]);
                }
                for (i = 0; i < piLeafCount[iR]; i++){
                    fprintf(fp, "R/#=%3d (ID=%3d, w=%f): %s\n",
                           i, ppiLeafList[iR][i], pdWeightsR[i], ppcProfile2[i]);
                }
            }


            
            /* align individual sequences to HMM;
             * - use representative sequence to get gapping
             * - create copies of both, individual/representative sequences
             *   as we don't want to introduce gaps into original
             *
             * FIXME: representative sequence is crutch, should use
             * full HMM but that does not seem to work at all
             * -- try harder! Fail better!
             */
            if ( (piLeafCount[iL] <= APPLY_BG_HMM_UP_TO_TREE_DEPTH) && (0 != prHMM->L) ){
                int i, j;
                pcReprsnt1 = CKCALLOC(prHMM->L+strlen(ppcProfile1[0])+1, sizeof(char));
                for (i = 0; i < prHMM->L; i++){
                    pcReprsnt1[i] = prHMM->seq[prHMM->ncons][i+1];
                }
                ppcCopy1 = CKCALLOC(piLeafCount[iL], sizeof(char *));
                for (j = 0; j < piLeafCount[iL]; j++){
                    ppcCopy1[j] = CKCALLOC(prHMM->L+strlen(ppcProfile1[0])+1, sizeof(char));
                    for (i = 0; i < (int) strlen(ppcProfile1[0]); i++){
                        ppcCopy1[j][i] = ppcProfile1[j][i];
                    }
                }

                {
                    /* the size of the elements in the forward/backward matrices 
                       depends very much on the lengths of the profiles _and_ 
                       in which position (1st/2nd) the longer/shorter profile/HMM is.
                       the matrix elements can easily exceed the size of a (long?) double
                       if the longer profile/HMM is associated with the query (q) and the 
                       shorter with the target (t). 
                       FIXME: however, pseudo-count adding may also depend on position, 
                       this is only really tested for the HMM being in the 1st position (q)
                       MUST TEST THIS MORE THOGOUGLY

                       this switch appears to be most easily (although unelegantly) 
                       effected here. Don't want to do it (upstairs) in PrepareAlignment() 
                       because it might jumble up the nodes. Don't want to do it in hhalign() 
                       either because ppcProfile1/2 and q/t may be used independently.
                       FS, r236 -> r237
                    */
                    int iLenA = strlen(ppcCopy1[0]);
                    int iLenH = prHMM->L;
                    
                    if (iLenH < iLenA){
                        hhalign(ppcReprsnt1, 0/* only one representative seq*/, NULL,
                                ppcCopy1, piLeafCount[iL], pdWeightsL,
                                &dScore, prHMM,
                                NULL, NULL, NULL, NULL,
                                rHhalignPara, 
                                iAux_FS++, /* DEBUG ARGUMENT */
                                zcAux, zcError);
                    }
                    else {
                        hhalign(ppcCopy1, piLeafCount[iL], pdWeightsL,
                                ppcReprsnt1, 0/* only one representative seq*/, NULL,
                                &dScore, prHMM,
                                NULL, NULL, NULL, NULL,
                                rHhalignPara, 
                                iAux_FS++, /* DEBUG ARGUMENT */
                                zcAux, zcError);
                    }
                }
                pdScores[ppiLeafList[iL][0]] = dScore;
#if 0
                printf("score: %f\nL: %s\nH: %s\n",
                       dScore, ppcCopy1[0], ppcReprsnt1[0]);
#endif
                /* assemble 'consensus';
                 * this is not a real consensus, it is more a gap indicator,
                 * for each position it consists of residues/gaps in the 1st sequences,
                 * or a residue (if any) of the other sequences.
                 * it only contains a gap if all sequences of the profile
                 * have a gap at this position
                 */
                pcConsens1 = CKCALLOC(prHMM->L+strlen(ppcProfile1[0])+1, sizeof(char));
                for (i = 0; i < prHMM->L; i++){
                    for (j = 0, pcConsens1[i] = '-'; (j < piLeafCount[iL]) && ('-' == pcConsens1[i]); j++){
                        pcConsens1[i] = ppcCopy1[j][i];
                    }
                }
#if 0
                for (j = 0; (j < piLeafCount[iL]); j++){
                    printf("L%d:%s\n", j, ppcCopy1[j]);
                }
                printf("LC:%s\n", pcConsens1);
#endif
            } /* ( (1 == piLeafCount[iL]) && (0 != prHMM->L) ) */

            if ( (piLeafCount[iR] <= APPLY_BG_HMM_UP_TO_TREE_DEPTH) && (0 != prHMM->L) ){
                int i, j;

                pcReprsnt2 = CKCALLOC(prHMM->L+strlen(ppcProfile2[0])+1, sizeof(char));
                for (i = 0; i < prHMM->L; i++){
                    pcReprsnt2[i] = prHMM->seq[prHMM->ncons][i+1];
                }
                ppcCopy2 = CKCALLOC(piLeafCount[iR], sizeof(char *));
                for (j = 0; j < piLeafCount[iR]; j++){
                    ppcCopy2[j] = CKCALLOC(prHMM->L+strlen(ppcProfile2[0])+1, sizeof(char));
                    for (i = 0; i < (int) strlen(ppcProfile2[0]); i++){
                        ppcCopy2[j][i] = ppcProfile2[j][i];
                    }
                }

                {
                    /* the size of the elements in the forward/backward matrices 
                       depends very much on the lengths of the profiles _and_ 
                       in which position (1st/2nd) the longer/shorter profile/HMM is.
                       the matrix elements can easily exceed the size of a (long?) double
                       if the longer profile/HMM is associated with the query (q) and the 
                       shorter with the target (t). 
                       FIXME: however, pseudo-count adding may also depend on position, 
                       this is only really tested for the HMM being in the 1st position (q)
                       MUST TEST THIS MORE THOGOUGLY
                       
                       this switch appears to be most easily (although unelegantly) 
                       effected here. Don't want to do it (upstairs) in PrepareAlignment() 
                       because it might jumble up the nodes. Don't want to do it in hhalign() 
                       either because ppcProfile1/2 and q/t may be used independently.
                       FS, r236 -> r237
                    */
                    int iLenA = strlen(ppcCopy2[0]);
                    int iLenH = prHMM->L;
                    
                    if (iLenH < iLenA){
                        hhalign(ppcReprsnt2, 0/* only one representative seq */, NULL,
                                ppcCopy2,    piLeafCount[iR], pdWeightsR,
                                &dScore, prHMM,
                                NULL, NULL, NULL, NULL,
                                rHhalignPara, 
                                iAux_FS++, /* DEBUG ARGUMENT */
                                zcAux, zcError);
                    }
                    else {
                        hhalign(ppcCopy2,    piLeafCount[iR], pdWeightsR,
                                ppcReprsnt2, 0/* only one representative seq */, NULL,
                                &dScore, prHMM,
                                NULL, NULL, NULL, NULL,
                                rHhalignPara, 
                                iAux_FS++, /* DEBUG ARGUMENT */
                                zcAux, zcError);
                    }
                }
                pdScores[ppiLeafList[iR][0]] = dScore;
#if 0
                printf("H: %s\nR: %s\nscore: %f\n",
                       ppcReprsnt2[0], ppcCopy2[0], dScore);
#endif
                /* assemble 'consensus';
                 * this is not a real consensus, it is more a gap indicator,
                 * for each position it consists of residues/gaps in the 1st sequences,
                 * or a residue (if any) of the other sequences.
                 * it only contains a gap if all sequences of the profile
                 * have a gap at this position
                 */
                pcConsens2 = CKCALLOC(prHMM->L+strlen(ppcProfile2[0])+1, sizeof(char));
                for (i = 0; i < prHMM->L; i++){
                    for (j = 0, pcConsens2[i] = '-'; (j < piLeafCount[iR]) && ('-' == pcConsens2[i]); j++){
                        pcConsens2[i] = ppcCopy2[j][i];
                    }
                }
#if 0
                for (j = 0; (j < piLeafCount[iR]); j++){
                    printf("R%d:%s\n", j, ppcCopy2[j]);
                }
                printf("RC:%s\n", pcConsens2);
#endif
            } /*  ( (1 == piLeafCount[iR]) && (0 != prHMM->L) ) */

            

            /* do alignment here (before free)
             */
            {
                /* the size of the elements in the forward/backward matrices 
                   depends very much on the lengths of the profiles _and_ 
                   in which position (1st/2nd) the longer/shorter profile is.
                   the matrix elements can easily exceed the size of a (long?) double
                   if the longer profile is associated with the query (q) and the 
                   shorter with the target (t). 
                   this switch appears to be most easily (although unelegantly) 
                   effected here. Don't want to do it (upstairs) in PrepareAlignment() 
                   because it might jumble up the nodes. Don't want to do it in hhalign() 
                   either because ppcProfile1/2 and q/t may be used independently.
                   FS, r228 -> 229
                 */
                int iLen1 = strlen(ppcProfile1[0]);
                int iLen2 = strlen(ppcProfile2[0]);

                if (iLen1 < iLen2){
                    hhalign(ppcProfile1, piLeafCount[iL], pdWeightsL,
                            ppcProfile2, piLeafCount[iR], pdWeightsR,
                            &dScore, prHMM,
                            pcConsens1, pcReprsnt1,
                            pcConsens2, pcReprsnt2,
                            rHhalignPara, 
                            iAux_FS++, /* DEBUG ARGUMENT */
                            zcAux, zcError);
                }
                else {
                    hhalign(ppcProfile2, piLeafCount[iR], pdWeightsR,
                            ppcProfile1, piLeafCount[iL], pdWeightsL,
                            &dScore, prHMM,
                            pcConsens2, pcReprsnt2,
                            pcConsens1, pcReprsnt1,
                            rHhalignPara, 
                            iAux_FS++, /* DEBUG ARGUMENT */
                            zcAux, zcError);
                }

            }
            /* free left/right node lists,
             * after alignment left/right profiles no longer needed
             */
            if (NULL != ppcCopy1){
                int i;
                for (i = 0; i <  piLeafCount[iL]; i++){
                    CKFREE(ppcCopy1[i]);
                }
                CKFREE(ppcCopy1);
                CKFREE(pcReprsnt1);
                CKFREE(pcConsens1);
            }
            if (NULL != ppcCopy2){
                int i;
                for (i = 0; i <  piLeafCount[iR]; i++){
                    CKFREE(ppcCopy2[i]);
                }
                CKFREE(ppcCopy2);
                CKFREE(pcReprsnt2);
                CKFREE(pcConsens2);
            }
            ppiLeafList[iL] = CKFREE(ppiLeafList[iL]);
            ppiLeafList[iR] = CKFREE(ppiLeafList[iR]);
            piLeafCount[iL] = piLeafCount[iR] = 0;

        } /* was a merge node */

        if (rLog.iLogLevelEnabled <= LOG_DEBUG){
            int i, j;
            FILE *fp = LogGetFP(&rLog, LOG_DEBUG);
            for (i = 0; i < iNodeCount; i++){
                if (0 == piLeafCount[i]){
                    continue;
                }
                fprintf(fp, "node %3d, #leaves=%d:\t", i, piLeafCount[i]);
                for (j = 0; ppiLeafList && (j < piLeafCount[i]); j++){
                    fprintf(fp, "%d,", ppiLeafList[i][j]);
                }
                fprintf(fp, "\n");
            }
        }


    } /* 0 <= iN < iNodeCount */
    ProgressDone(prProgress);


    /* check length and set length info
     */
    iAlnLen = strlen(prMSeq->seq[0]);
    for (i=0; i<prMSeq->nseqs; i++) {
#if 0
        Log(&rLog, LOG_FORCED_DEBUG, "seq no %d: name %s; len %d; %s",
                  i, prMSeq->sqinfo[i].name, strlen(prMSeq->seq[i]), prMSeq->seq[i]);
#endif
        
#ifndef NDEBUG
        assert(iAlnLen == strlen(prMSeq->seq[i]));
#endif
        prMSeq->sqinfo[i].len = iAlnLen;
    }
    prMSeq->aligned = TRUE;


    if (rLog.iLogLevelEnabled <= LOG_DEBUG){
        if (0 != prHMM->L){
            int i;
            Log(&rLog, LOG_DEBUG, "Alignment scores with HMM:");
            for (i = 0; /*pdScores[i] > 0.0*/i < prMSeq->nseqs; i++){
                Log(&rLog, LOG_DEBUG, "%2d:\t%f\n", i, pdScores[i]);
            }
        }
    }


    /** translate back ambiguity residues
     * hhalign translates ambiguity codes (B,Z) into unknown residues (X).
     * as we still have the original input we can substitute them back
     */
    TranslateUnknown2Ambiguity(prMSeq);
    ReAttachLeadingGaps(prMSeq, iProfProfSeparator);

    if (NULL == prHMMList){
        CKFREE(prHMM);
    }
    CKFREE(ppcProfile2);
    CKFREE(ppcProfile1);
    CKFREE(ppiLeafList[piOrderLR[DIFF_NODE*(iNodeCount-1)+PRNT_NODE]]);
    CKFREE(ppiLeafList);
    CKFREE(piLeafCount);
    CKFREE(pdScores);
    FreeProgress(&prProgress);
    CKFREE(pdWeightsL);
    CKFREE(pdWeightsR);

#if TIMING
    StopwatchStop(stopwatch);
    StopwatchDisplay(stdout, "Total time for HHalignWrapper():" , stopwatch);
    StopwatchFree(stopwatch);
#endif

    return dScore; /* FIXME alternative: return averaged pdScores */

}
/***   end: HHalignWrapper() ***/