Mercurial > repos > clustalomega > clustalomega

/* -*- mode: c; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */

/*********************************************************************
 * Clustal Omega - Multiple sequence alignment
 *
 * Copyright (C) 2010 University College Dublin
 *
 * Clustal-Omega is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This file is part of Clustal-Omega.
 *
 ********************************************************************/

/*
 * RCS $Id: hhhmm-C.h 224 2011-03-23 12:13:33Z fabian $
 */


// hhhmm.C

#ifndef MAIN
#define MAIN
#include <iostream>   // cin, cout, cerr
#include <fstream>    // ofstream, ifstream
#include <stdio.h>    // printf
using std::cout;
using std::cerr;
using std::endl;
using std::ios;
using std::ifstream;
using std::ofstream;
#include <stdlib.h>   // exit
#include <string>     // strcmp, strstr
#include <math.h>     // sqrt, pow
#include <limits.h>   // INT_MIN
#include <float.h>    // FLT_MIN
#include <time.h>     // clock
#include <ctype.h>    // islower, isdigit etc
#include "util-C.h"     // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
#include "list.h"     // list data structure
#include "hash.h"     // hash data structure
#include "hhdecl-C.h"
#include "hhutil-C.h"   // imax, fmax, iround, iceil, ifloor, strint, strscn, strcut, substr, uprstr, uprchr, Basename etc.
#endif

// #ifndef WNLIB
// #define WNLIB
// #include "wnconj.h"   // Will Naylor's wnlib for optimization in C
// #endif

//////////////////////////////////////////////////////////////////////////////
//// Class HMM
//////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////////////////////////////////
// Object constructor
//////////////////////////////////////////////////////////////////////////////
HMM::HMM(int maxseqdis, int maxres)
{
  sname = new char*[maxseqdis];   // names of stored sequences
  for (int i = 0; i < maxseqdis; i++){ sname[i] = NULL;}
  seq = new char*[maxseqdis];     // residues of stored sequences (first at pos 1!)
  for (int i = 0; i < maxseqdis; i++){ seq[i] = NULL;}
  Neff_M = new float[maxres];     // Neff_M[i] = diversity of subalignment of seqs that have residue in col i
  Neff_I = new float[maxres];     // Neff_I[i] = diversity of subalignment of seqs that have insert in col i
  Neff_D = new float[maxres];     // Neff_D[i] = diversity of subalignment of seqs that have delete in col i
  longname = new char[DESCLEN];   // Full name of first sequence of original alignment (NAME field)
  ss_dssp = new char[maxres];     // secondary structure determined by dssp 0:-  1:H  2:E  3:C  4:S  5:T  6:G  7:B
  sa_dssp = new char[maxres];     // solvent accessibility state determined by dssp 0:-  1:A (absolutely buried) 2:B  3:C  4:D  5:E (exposed)
  ss_pred = new char[maxres];     // predicted secondary structure          0:-  1:H  2:E  3:C
  ss_conf = new char[maxres];     // confidence value of prediction         0:-  1:0 ... 10:9
  Xcons   = NULL;                 // create only when needed: consensus sequence in internal representation (A=0 R=1 N=2 D=3 ...)
  l = new int[maxres];            // l[i] = pos. of j'th match state in aligment
  /* FS introduced sentinel, NULL terminates loop in destructor, FS, r221->222 */
  f = new float*[maxres+1];   f[maxres] = NULL;  // f[i][a] = prob of finding amino acid a in column i WITHOUT pseudocounts
  g = new float*[maxres+1];   g[maxres] = NULL;  // f[i][a] = prob of finding amino acid a in column i WITH pseudocounts
  p = new float*[maxres+1];   p[maxres] = NULL;  // p[i][a] = prob of finding amino acid a in column i WITH OPTIMUM pseudocounts
  tr = new float*[maxres+1]; tr[maxres] = NULL;  // log2 of transition probabilities M2M M2I M2D I2M I2I D2M D2D M2M_GAPOPEN GAPOPEN GAPEXTD
//   tr_lin = new float*[maxres];    // linear transition probabilities M2M M2I M2D I2M I2I D2M D2D M2M_GAPOPEN GAPOPEN GAPEXTD
  for (int i=0; i<maxres; i++) {f[i]=new(float[NAA+3]);}
  for (int i=0; i<maxres; i++) {g[i]=new(float[NAA]);}
  for (int i=0; i<maxres; i++) {p[i]=new(float[NAA]);}
  for (int i=0; i<maxres; i++) {tr[i]=new(float[NTRANS]);}
//   for (int i=0; i<maxres; i++) tr_lin[i]=new(float[NTRANS]);
  L=0;
  Neff_HMM=0;
  n_display=N_in=N_filtered=0;
  nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
//   lamda_hash.New(37,0.0); // Set size and NULL element for hash
//   mu_hash.New(37,0.0);    // Set size and NULL element for hash
  lamda=0.0; mu=0.0;
  name[0]=longname[0]=fam[0]='\0';
  trans_lin=0; // transition probs in log space
}


//////////////////////////////////////////////////////////////////////////////
// Object destructor
//////////////////////////////////////////////////////////////////////////////
HMM::~HMM()
{
  //Delete name and seq matrices
    if (NULL != sname){
        for (int k=0; (k < n_display) && (NULL != sname[k]); k++){
            delete[] sname[k]; sname[k] = NULL;
        }
        delete[] sname;    sname = NULL;
    }
    if (NULL != seq){
        for (int k=0; (k < n_display) && (NULL != seq[k]); k++){
            delete[] seq[k];  seq[k] = NULL;
        }
        delete[] seq;    seq = NULL;
    }
  delete[] Neff_M;    Neff_M = NULL;
  delete[] Neff_D;    Neff_D = NULL;
  delete[] Neff_I;    Neff_I = NULL;
  delete[] longname;    longname = NULL;
  delete[] ss_dssp;  ss_dssp = NULL;
  delete[] sa_dssp;  sa_dssp = NULL;
  delete[] ss_pred;  ss_pred = NULL;
  delete[] ss_conf;  ss_conf = NULL;
  delete[] Xcons;  Xcons = NULL;
  delete[] l;  l = NULL;
  for (int i=0; i</*MAXRES*/par.maxResLen; i++){
    if (f[i]){
      delete[] f[i];   f[i] = NULL;
    }
    else break;
  }
  for (int i=0; i</*MAXRES*/par.maxResLen; i++){
    if (g[i]){
      delete[] g[i];   g[i] = NULL;
    }
    else break;
  }
  for (int i=0; i</*MAXRES*/par.maxResLen; i++){
    if (p[i]){
      delete[] p[i];  p[i] = NULL;
    }
    else break;
  }
  for (int i=0; i</*MAXRES*/par.maxResLen; i++){
    if (tr[i]){
      delete[] tr[i];  tr[i] = NULL;
    }
    else break;
  }
  //   for (int i=0; i</*MAXRES*/par.maxResLen; i++) if (tr_lin[i]) delete[] tr_lin[i]; else break;
  delete[] f;  f = NULL;
  delete[] g;  g = NULL;
  delete[] p;  p = NULL;
  delete[] tr;  tr = NULL;
//   delete[] tr_lin;
}

//////////////////////////////////////////////////////////////////////////////
// Deep-copy constructor
//////////////////////////////////////////////////////////////////////////////
HMM& HMM::operator=(HMM& q)
{
  L=q.L;
  for (int i=0; i<=L+1; ++i)
    {
      for (int a=0; a<NAA; ++a)
	{
	  f[i][a]=q.f[i][a];
	  g[i][a]=q.g[i][a];
	  p[i][a]=q.p[i][a];
	}
      for (int a=0; a<NTRANS; ++a)
	tr[i][a]=q.tr[i][a];
      ss_dssp[i]=q.ss_dssp[i];
      sa_dssp[i]=q.sa_dssp[i];
      ss_pred[i]=q.ss_pred[i];
      ss_conf[i]=q.ss_conf[i];
      l[i]=q.l[i];
    }
  if (q.Xcons)
    for (int i=0; i<=L+1; ++i)
      Xcons[i]  =q.Xcons[i];

  n_display=q.n_display;
  for (int k=0; k<n_display; k++) {
    sname[k]=new(char[strlen(q.sname[k])+1]);
    if (!sname[k]) MemoryError("array of names for sequences to display");
    strcpy(sname[k],q.sname[k]);
  }
  for (int k=0; k<n_display; k++) {
    seq[k]=new(char[strlen(q.seq[k])+1]);
    if (!seq[k]) MemoryError("array of names for sequences to display");
    strcpy(seq[k],q.seq[k]);
  }
  ncons=q.ncons;
  nfirst=q.nfirst;
  nss_dssp=q.nss_dssp;
  nsa_dssp=q.nsa_dssp;
  nss_pred=q.nss_pred;
  nss_conf=q.nss_conf;

  for (int i=0; i<=L+1; ++i) Neff_M[i]=q.Neff_M[i];
  for (int i=0; i<=L+1; ++i) Neff_I[i]=q.Neff_I[i];
  for (int i=0; i<=L+1; ++i) Neff_D[i]=q.Neff_D[i];
  Neff_HMM=q.Neff_HMM;

  strcpy(longname,q.longname);
  strcpy(name,q.name);
  strcpy(fam,q.fam);
  strcpy(sfam,q.sfam);
  strcpy(fold,q.fold);
  strcpy(cl,q.cl);
  strcpy(file,q.file);

  lamda=q.lamda;
  mu=q.mu;

  for (int a=0; a<NAA; ++a) pav[a]=q.pav[a];
  N_in=q.N_in;
  N_filtered=q.N_filtered;
  trans_lin=q.trans_lin;
  return (HMM&) (*this);
}


///////////////////////////////////////////////////////////////////////////////
/**
 * @brief Read an HMM from an HHsearch .hhm file; return 0 at end of file
 */
int
HMM::Read(FILE* dbf, char* path)
{
  char line[LINELEN]="";    // input line
  char str3[8]="",str4[8]=""; // first 3 and 4 letters of input line
  char* ptr;                // pointer for string manipulation
  int i=0;                  // index for match state (first=1)
  int a;                    // amino acid index
  static int warn=0;

  trans_lin=0;
  L=0;
  Neff_HMM=0;
  n_display=N_in=N_filtered=0;
  nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
  lamda=mu=0.0;
  trans_lin=0; // transition probs in log space
  name[0]=longname[0]=fam[0]='\0';
  //If at the end of while-loop L is still 0 then we have reached end of db file

  //Do not delete name and seq vectors because their adresses are transferred to hitlist as part of a hit!!

  while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/'))
    {

      if (strscn(line)==NULL) continue;    // skip lines that contain only white space
      substr(str3,line,0,2);               // copy the first three characters into str3
      substr(str4,line,0,3);               // copy the first four characters into str4

      if (!strncmp("HH",line,2)) continue;

      if (!strcmp("NAME",str4))
	{
	  ptr=strscn(line+4);              //advance to first non-white-space character
	  if (ptr)
	    {
	      strncpy(longname,ptr,DESCLEN-1); //copy full name to longname
	      longname[DESCLEN-1]='\0';
	      strncpy(name,ptr,NAMELEN-1);     //copy longname to name...
	      strcut(name);                    //...cut after first word...
	    }
	  else
	    {
	      strcpy(longname,"undefined");
	      strcpy(name,"undefined");
	    }
	  if (v>=4) cout<<"Reading in HMM "<<name<<":\n";
	}

      else if (!strcmp("FAM",str3))
	{
	  ptr=strscn(line+3);              //advance to first non-white-space character
	  if (ptr) strncpy(fam,ptr,IDLEN-1); else strcpy(fam,""); //copy family name to basename
	  ScopID(cl,fold,sfam,fam);        //get scop classification from basename (e.g. a.1.2.3.4)
	}

      else if (!strcmp("FILE",str4))
	{
	  if (path) strncpy(file,path,NAMELEN-1); else *file='\0'; // copy path to file variable
	  ptr=strscn(line+4);              //advance to first non-white-space character
	  if (ptr)
	    strncat(file,ptr,NAMELEN-1-strlen(file));   // append file name read from file to path
	  else strcat(file,"*");
	}

      else if (!strcmp("LENG",str4))
	{
	  ptr=line+4;
	  L=strint(ptr);                   //read next integer (number of match states)
	}
      else if (!strcmp("FILT",str4) || !strcmp("NSEQ",str4))
	{
	  ptr=line+4;
	  N_filtered=strint(ptr);          //read next integer: number of sequences after filtering
	  N_in=strint(ptr);                //read next integer: number of sequences in alignment
	}

      else if (!strcmp("NEFF",str4) || !strcmp("NAA",str3)) sscanf(line+6,"%f",&Neff_HMM);

      else if (!strcmp("EVD",str3))
	{
// 	  char key[IDLEN];
	  sscanf(line+6,"%f %f",&lamda,&mu);
// 	  sscanf(line+22,"%s",key);
//  	  lamda_hash.Add(key,lamda);
//  	  mu_hash.Add(key,mu);
	}

      else if (!strcmp("DESC",str4)) continue;
      else if (!strcmp("COM",str3))  continue;
      else if (!strcmp("DATE",str4)) continue;

      /////////////////////////////////////////////////////////////////////////////////////
      // Read template sequences that should get displayed in output alignments
      else if (!strcmp("SEQ",str3))
	{
        //char cur_seq[MAXCOL]=""; //Sequence currently read in
        char *cur_seq = new(char[par.maxColCnt]); //Sequence currently read in
	  int k;                // sequence index; start with -1; after reading name of n'th sequence-> k=n
	  int h;                // index for character in input line
	  int l=1;              // index of character in sequence seq[k]
	  int i=1;              // index of match states in ss_dssp[i] and ss_pred[i] sequence
	  int n_seq=0;          // number of sequences to be displayed EXCLUDING ss sequences
	  cur_seq[0]='-';       // overwrite '\0' character at beginning to be able to do strcpy(*,cur_seq)
	  k=-1;
	  while (fgetline(line,LINELEN-1,dbf) && line[0]!='#')
	    {
	      if (v>=4) cout<<"Read from file:"<<line<<"\n"; //DEBUG
	      if (line[0]=='>') //line contains sequence name
		{
		  if (k>=MAXSEQDIS-1) //maximum number of allowable sequences exceeded
		    {while (fgetline(line,LINELEN-1,dbf) && line[0]!='#'); break;}
		  k++;
		  if      (!strncmp(line,">ss_dssp",8)) nss_dssp=k;
		  else if (!strncmp(line,">sa_dssp",8)) nsa_dssp=k;
		  else if (!strncmp(line,">ss_pred",8)) nss_pred=k;
		  else if (!strncmp(line,">ss_conf",8)) nss_conf=k;
		  else if (!strncmp(line,">Cons-",6) || !strncmp(line,">Consensus",10)) ncons=k;
		  else
		    {
		      if (nfirst==-1) nfirst=k;
		      if (n_seq>=par.nseqdis)
			{while (fgetline(line,LINELEN-1,dbf) && line[0]!='#'); k--; break;}
		      n_seq++;
		    }

		  //If this is not the first sequence then store residues of previous sequence
		  if (k>0) {
		    seq[k-1]=new(char[strlen(cur_seq)+1]);
		    if (!seq[k-1]) MemoryError("array of sequences to display");
		    strcpy(seq[k-1],cur_seq);
		  }

		  // store sequence name
		  strcut(line+1); //find next white-space character and overwrite it with end-of-string character
		  sname[k] = new (char[strlen(line+1)+1]); //+1 for terminating '\0'
		  if (!sname[k]) MemoryError("array of names for sequences to display");
		  strcpy(sname[k],line+1);           //store sequence name in **name
		  l=1; i=1;
		}
	      else //line contains sequence residues
		{
		  if (k==-1)
		    {
		      cerr<<endl<<"WARNING: Ignoring following line while reading HMM"<<name<<":\n\'"<<line<<"\'\n";
		      continue;
		    }

		  h=0; //counts characters in current line

		  // Check whether all characters are correct; store into cur_seq
		  if (k==nss_dssp) // lines with dssp secondary structure states (. - H E C S T G B)
		    {
		      while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
			{
			  if (ss2i(line[h])>=0 && line[h]!='.')
			    {
			      char c=ss2ss(line[h]);
			      cur_seq[l]=c;
			      if (c!='.' && !(c>='a' && c<='z')) ss_dssp[i++]=ss2i(c);
			      l++;
			    }
			  else if (v && ss2i(line[h])==-2)
			    cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
			  h++;
			}
		    }
		  if (k==nsa_dssp) // lines with dssp secondary solvent accessibility (- A B C D E)
		    {
		      while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
			{
			  if (sa2i(line[h])>=0)
			    {
			      char c=line[h];
			      cur_seq[l]=c;
			      if (c!='.' && !(c>='a' && c<='z')) sa_dssp[i++]=sa2i(c);
			      l++;
			    }
			  else if (v && sa2i(line[h])==-2)
			    cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
			  h++;
			}
		    }
		  else if (k==nss_pred) // lines with predicted secondary structure (. - H E C)
		    {
		      while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
			{
			  if (ss2i(line[h])>=0 && ss2i(line[h])<=3 && line[h]!='.')
			    {
			      char c=ss2ss(line[h]);
			      cur_seq[l]=c;
			      if (c!='.' && !(c>='a' && c<='z')) ss_pred[i++]=ss2i(c);
			      l++;
			    }
			  else if (v && ss2i(line[h])==-2)
			    cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
			  h++;
			}
		    }
		  else if (k==nss_conf) // lines with confidence values should contain only 0-9, '-', or '.'
		    {
		      while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
			{
			  if (line[h]=='-' || (line[h]>='0' && line[h]<='9'))
			    {
			      cur_seq[l]=line[h];
			      ss_conf[l]=cf2i(line[h]);
			      l++;
			    }
			  else if (v && cf2i(line[h])==-2)
			    cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
			  h++;
			}
		    }
		  else // normal line containing residues
		    {
		      while (h<LINELEN && line[h]>'\0' && l</*MAXCOL*/par.maxColCnt-1)
			{
			  if (aa2i(line[h])>=0 && line[h]!='.') // ignore '.' and white-space characters ' ', \t and \n (aa2i()==-1)
			    {cur_seq[l]=line[h]; l++;}
			  else if (aa2i(line[h])==-2 && v)
			    cerr<<endl<<"WARNING: invalid symbol \'"<<line[h]<<"\' at pos. "<<h<<" in line '"<<line<<"' of HMM "<<name<<"\n";
			  h++;
			}
		    }
		  cur_seq[l]='\0';  //Ensure that cur_seq ends with a '\0' character

		} //end else
	    } //while(getline)
	  //If this is not the first sequence some residues have already been read in
	  if (k>=0) {
	    seq[k]=new(char[strlen(cur_seq)+1]);
	    if (!seq[k]) MemoryError("array of sequences to display");
	    strcpy(seq[k],cur_seq);
	  }
	  n_display=k+1;

	  // DEBUG
	  if (v>=4)
	    {
	      printf("nss_dssp=%i  nsa_dssp=%i  nss_pred=%i  nss_conf=%i  nfirst=%i\n",nss_dssp,nsa_dssp,nss_pred,nss_conf,nfirst);
	      for (k=0; k<n_display; k++)
		{
		  int j;
		  cout<<">"<<sname[k]<<"(k="<<k<<")\n";
		  if      (k==nss_dssp) {for (j=1; j<=L; j++) cout<<char(i2ss(ss_dssp[j]));}
		  else if (k==nsa_dssp) {for (j=1; j<=L; j++) cout<<char(i2sa(sa_dssp[j]));}
		  else if (k==nss_pred) {for (j=1; j<=L; j++) cout<<char(i2ss(ss_pred[j]));}
		  else if (k==nss_conf) {for (j=1; j<=L; j++) cout<<int(ss_conf[j]-1);}
		  else                  {for (j=1; j<=L; j++) cout<<seq[k][j];}
		  cout<<"\n";
		}
	    }

	} //end if("SEQ")

      /////////////////////////////////////////////////////////////////////////////////////
      // Read average amino acid frequencies for HMM
      else if (!strcmp("FREQ",str4))
	{
	  fprintf(stderr,"Error: hhm file has obsolete format.\n");
	  fprintf(stderr,"Please use hhmake version > 1.1 to generate hhm files.\n");
	  exit(1);
	}

      else if (!strcmp("AVER",str4)) {} // AVER line scrapped
      else if (!strcmp("NULL",str4))
	{
	  ptr=line+4;
	  for (a=0; a<20 && ptr; ++a)
	    //s2[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
	    pb[s2a[a]] = (float) fpow2(float(-strinta(ptr))/HMMSCALE);
	  if (!ptr) return Warning(dbf,line,name);
	  if (v>=4)
	    {
	      printf("\nNULL  ");
	      for (a=0; a<20; ++a) printf("%5.1f ",100.*pb[s2a[a]]);
	      printf("\n");
	    }
	}

      /////////////////////////////////////////////////////////////////////////////////////
      // Read transition probabilities from start state
      else if (!strcmp("HMM",str3))
	{
	  fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
	  fgetline(line,LINELEN-1,dbf); // Skip line with transition labels
	  ptr=line;
	  for (a=0; a<=D2D && ptr; ++a)
	    tr[0][a] = float(-strinta(ptr))/HMMSCALE; //store transition probabilites as log2 values
	    // strinta returns next integer in string and puts ptr to first char
	    // after the integer. Returns -99999 if '*' is found.
	    // ptr is set to 0 if no integer is found after ptr.
	  Neff_M[0] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with M->? transition
	  Neff_I[0] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with I->? transition
	  Neff_D[0] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with D->? transition
	  if (!ptr) return Warning(dbf,line,name);

	  /////////////////////////////////////////////////////////////////////////////////////
	  // Read columns of HMM
	  int next_i=0;  // index of next column
	  while (fgetline(line,LINELEN-2,dbf) &&  !(line[0]=='/' && line[1]=='/') && line[0]!='#')
	    {
	      if (strscn(line)==NULL) continue; // skip lines that contain only white space

 	      // Read in AA probabilities
	      ptr=line+1;
	      int prev_i = next_i;
	      next_i = strint(ptr); ++i;
	      if (v && next_i!=prev_i+1)
		if (++warn<=5)
		  {
		    cerr<<endl<<"WARNING: in HMM "<<name<<" state "<<prev_i<<" is followed by state "<<next_i<<"\n";
		    if (warn==5) cerr<<endl<<"WARNING: further warnings while reading HMMs will be suppressed.\n";
		  }
	      if (i>L)
		{
		  cerr<<endl<<"WARNING: in HMM "<<name<<" there are more columns than the stated length "<<L<<". Skipping HMM\n";
		  return 2;
		}
	      if (i>=/*MAXRES*/par.maxResLen-2)
		{
		  fgetline(line,LINELEN-1,dbf); // Skip line
		  continue;
		}

	      for (a=0; a<20 && ptr; ++a)
//		f[i][s2a[a]] = (float)pow(2.,float(-strinta(ptr))/HMMSCALE);
		f[i][s2a[a]] = fpow2(float(-strinta(ptr))/HMMSCALE);       // speed-up ~5 s for 10000 SCOP domains

	      //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
	      l[i]=strint(ptr);
	      if (!ptr) return Warning(dbf,line,name);
	      if (v>=4)
		{
		  printf("%s",line);
		  printf("%6i ",i);
		  for (a=0; a<20; ++a) printf("%5.1f ",100*f[i][s2a[a]]);
		  printf("%5i",l[i]);
		  printf("\n");
		}

	      // Read transition probabilities
	      fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
	      if (line[0]!=' ' && line[0]!='\t') return Warning(dbf,line,name);
	      ptr=line;
	      for (a=0; a<=D2D && ptr; ++a)
		tr[i][a] = float(-strinta(ptr))/HMMSCALE; //store transition prob's as log2-values
	      Neff_M[i] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with M->? transition
	      Neff_I[i] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with I->? transition
	      Neff_D[i] = float(strinta(ptr))/HMMSCALE;  // Read eff. number of sequences with D->? transition
	      if (!ptr) return Warning(dbf,line,name);
	      if (v>=4)
		{
		  printf("       ");
		  for (a=0; a<=D2D; ++a) printf("%5.1f ",100*fpow2(tr[i][a]));
		  printf("%5.1f %5.1f %5.1f \n",Neff_M[i],Neff_I[i],Neff_D[i]);
		}
	    }
	  if (line[0]=='/' && line[1]=='/') break;
	}
      else if (v) cerr<<endl<<"WARNING: Ignoring line\n\'"<<line<<"\'\nin HMM "<<name<<"\n";

    } //while(getline)

  if (L==0) return 0; //End of db file -> stop reading in

  // Set coefficients of EVD (= 0.0 if not calibrated for these parameters)
//   lamda = lamda_hash.Show(par.Key());
//   mu    = mu_hash.Show(par.Key());
  if (lamda && v>=3) printf("HMM %s is already calibrated: lamda=%-5.3f, mu=%-5.2f\n",name,lamda,mu);

  if (v && i!=L) cerr<<endl<<"Warning: in HMM "<<name<<" there are only "<<i<<" columns while the stated length is "<<L<<"\n";
  if (v && i>/*MAXRES*/par.maxResLen-2) {i=/*MAXRES*/par.maxResLen-2; cerr<<endl<<"WARNING: maximum number "<</*MAXRES*/par.maxResLen-2<<" of residues exceeded while reading HMM "<<name<<"\n";}
  if (v && !i)  cerr<<endl<<"WARNING: HMM "<<name<<" contains no match states. Check the alignment that gave rise to this HMM.\n";
  if (v>=2) cout<<"Read in HMM "<<name<<" with "<<L<<" match states and effective number of sequences = "<<Neff_HMM<<"\n";
  L = i;

  // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities
  for (a=0; a<20; ++a) f[0][a]=f[L+1][a]=pb[a];
  Neff_M[L+1]=1.0f;
  Neff_I[L+1]=Neff_D[L+1]=0.0f;

  return 1; //return status: ok

} /* this is the end of HMM::Read() */


/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Read an HMM from a HMMer .hmm file; return 0 at end of file
 */
int
HMM::ReadHMMer(FILE* dbf, char* filestr)
{
  char line[LINELEN]="";    // input line
  char desc[DESCLEN]="";    // description of family
  char str4[5]="";          // first 4 letters of input line
  char* ptr;                // pointer for string manipulation
  int i=0;                  // index for match state (first=1)
  int a;                    // amino acid index
  char dssp=0;              // 1 if a consensus SS has been found in the transition prob lines
  char annot=0;             // 1 if at least one annotation character in insert lines is ne '-' or ' '
  int k=0;                  // index for seq[k]
  static char ignore_hmmer_cal = 0;
  char* annotchr;           // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
  //annotchr = new char[MAXRES]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
  annotchr = new char[par.maxResLen]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
  static int warn=0;
  int iAlpha = 20; /* size of alphabet, default is protein = 20 */
  double dAlphaInv = 1.00 / (double)(iAlpha); /* weight of AA */

  trans_lin=0;
  L=0;
  Neff_HMM=0;
  n_display=N_in=N_filtered=0;
  nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
  lamda=mu=0.0;
  trans_lin=0; // transition probs in log space
  name[0]=longname[0]=desc[0]=fam[0]='\0';
  //If at the end of while-loop L is still 0 then we have reached end of db file

  // Do not delete name and seq vectors because their adresses are transferred to hitlist as part of a hit!!

  while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/'))
    {

      if (strscn(line)==NULL) continue;   // skip lines that contain only white space
      if (!strncmp("HMMER",line,5)) continue;

      substr(str4,line,0,3);              // copy the first four characters into str4

      if (!strcmp("NAME",str4) && name[0]=='\0')
	{
	  ptr=strscn(line+4);             // advance to first non-white-space character
	  strncpy(name,ptr,NAMELEN-1);    // copy full name to name
	  strcut(name);                   // ...cut after first word...
	  if (v>=4) cout<<"Reading in HMM "<<name<<":\n";
	}

      else if (!strcmp("ACC ",str4))
	{
	  ptr=strscn(line+4);              // advance to first non-white-space character
	  strncpy(longname,ptr,DESCLEN-1); // copy Accession id to longname...
	}

      else if (!strcmp("DESC",str4))
	{
	  ptr=strscn(line+4);             // advance to first non-white-space character
	  if (ptr)
	    {
	      strncpy(desc,ptr,DESCLEN-1);   // copy description to name...
	      desc[DESCLEN-1]='\0';
	      strcut(ptr);                   // ...cut after first word...
	    }
	  if (!ptr || ptr[1]!='.' || strchr(ptr+3,'.')==NULL) strcpy(fam,""); else strcpy(fam,ptr); // could not find two '.' in name?
	}

      else if (!strcmp("LENG",str4))
	{
	  ptr=line+4;
	  L=strint(ptr);                  //read next integer (number of match states)
	}

      else if (!strcmp("ALPH",str4)) {

          ptr=strscn(line+4);

          if (0 == strcmp(ptr, "Amino")){
              iAlpha = 20;
          }
          else if (0 == strcmp(ptr, "Nucleic")){
              iAlpha = 4;
              printf("%s:%s:%d: WARNING: HMM reading does not work for DNA/RNA\n",
                     __FUNCTION__, __FILE__, __LINE__);
          }
          else {
              return Warning(dbf,line,name);
          }
          dAlphaInv = 1.00 / (double)(iAlpha);
          //continue;
      }
      else if (!strcmp("RF  ",str4)) continue;
      else if (!strcmp("CS  ",str4)) continue;
      else if (!strcmp("MAP ",str4)) continue;
      else if (!strcmp("COM ",str4)) continue;
      else if (!strcmp("NSEQ",str4))
	{
	  ptr=line+4;
	  N_in=N_filtered=strint(ptr);    //read next integer: number of sequences after filtering
	}

      else if (!strcmp("DATE",str4)) continue;
      else if (!strncmp("CKSUM ",line,5)) continue;
      else if (!strcmp("GA  ",str4)) continue;
      else if (!strcmp("TC  ",str4)) continue;
      else if (!strcmp("NC  ",str4)) continue;

      else if (!strncmp("SADSS",line,5))
	{
	  if (nsa_dssp<0)
	    {
	      nsa_dssp=k++;
	      seq[nsa_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
	      sname[nsa_dssp] = new(char[15]);
	      strcpy(seq[nsa_dssp]," ");
	      strcpy(sname[nsa_dssp],"sa_dssp");

	    }
	  ptr=strscn(line+5);
	  if (ptr)
	    {
	      strcut(ptr);
	      if (strlen(seq[nsa_dssp])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
		printf("\nWARNING: HMM %s has SADSS records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
	      else strcat(seq[nsa_dssp],ptr);
	    }
	}

      else if (!strncmp("SSPRD",line,5))
	{
	  if (nss_pred<0)
	    {
	      nss_pred=k++;
	      seq[nss_pred] = new(char[/*MAXRES*/par.maxResLen+2]);
	      sname[nss_pred] = new(char[15]);
	      strcpy(seq[nss_pred]," ");
	      strcpy(sname[nss_pred],"ss_pred");

	    }
	  ptr=strscn(line+5);
	  if (ptr)
	    {
	      strcut(ptr);
	      if (strlen(seq[nss_pred])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
		printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
	      else strcat(seq[nss_pred],ptr);
	    }
	}

      else if (!strncmp("SSCON",line,5))
	{
	  if (nss_conf<0)
	    {
	      nss_conf=k++;
	      seq[nss_conf] = new(char[/*MAXRES*/par.maxResLen+2]);
	      sname[nss_conf] = new(char[15]);
	      strcpy(seq[nss_conf]," ");
	      strcpy(sname[nss_conf],"ss_conf");
	    }
	  ptr=strscn(line+5);
	  if (ptr)
	    {
	      strcut(ptr);
	      if (strlen(seq[nss_conf])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
		printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
	      else strcat(seq[nss_conf],ptr);
	    }
	}

      else if (!strncmp("SSCIT",line,5)) continue;
      else if (!strcmp("XT  ",str4)) continue;
      else if (!strcmp("NULT",str4)) continue;

      else if (!strcmp("NULE",str4))
	{
	  ptr=line+4;
	  for (a=0; (a < iAlpha) && ptr; ++a){
	    /* FIXME: FS introduced alphabet size (was '20')
	       and dAlphaInv (was '0.05' = 1/20) */
	    //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
	    pb[s2a[a]] = (float) dAlphaInv * fpow2(float(strinta(ptr,-99999))/HMMSCALE);  /* dAlphaInv */
	  }
	  for (a = iAlpha; a < 20; a++){
	    pb[s2a[a]] = 0.0;
	  }
	  if (!ptr) return Warning(dbf,line,name);
	  if (v>=4)
	    {
	      printf("\nNULL  ");
	      for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
		printf("%5.1f ",100.*pb[s2a[a]]);
	      }
	      printf("\n");
	    }
	}

      else if (!strcmp("EVD ",str4))
	{
	  char* ptr=line+4;
	  ptr = strscn(ptr);
	  sscanf(ptr,"%f",&lamda);
	  ptr = strscn(ptr);
	  sscanf(ptr,"%f",&mu);
	  if (lamda<0)
	    {
	      if (v>=2 && ignore_hmmer_cal==0)
		cerr<<endl<<"Warning: some HMMs have been calibrated with HMMER's 'hmmcalibrate'. These calibrations will be ignored\n";
	      ignore_hmmer_cal=1;
	      mu = lamda = 0.0;
	    }
	}

      /////////////////////////////////////////////////////////////////////////////////////
      // Read transition probabilities from start state
      else if (!strncmp("HMM",line,3))
	{
	  fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
	  fgetline(line,LINELEN-1,dbf); // Skip line with transition labels
	  ptr=line;
	  for (a=0; a<=M2D && ptr; ++a)
	    tr[0][a] = float(strinta(ptr,-99999))/HMMSCALE; //store transition probabilites as log2 values
	    // strinta returns next integer in string and puts ptr to first char
	    // after the integer. Returns -99999 if '*' is found.
	    // ptr is set to 0 if no integer is found after ptr.
	  tr[0][I2M] = tr[0][D2M] = 0.0;
	  tr[0][I2I] = tr[0][D2D] = -99999.0;
	  if (!ptr) return Warning(dbf,line,name);
	  if (v>=4)
	    {
	      printf("       ");
	      for (a=0; a<=D2D && ptr; ++a) printf("%5.1f ",100*fpow2(tr[i][a]));
	      printf("\n");
	    }

	  // Prepare to store DSSP states (if there are none, delete afterwards)
	  nss_dssp=k++;
	  seq[nss_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
	  sname[nss_dssp] = new(char[15]);
	  strcpy(sname[nss_dssp],"ss_dssp");

	  /////////////////////////////////////////////////////////////////////////////////////
	  // Read columns of HMM
	  int next_i=0;  // index of next column
	  while (fgetline(line,LINELEN-1,dbf) &&  !(line[0]=='/' && line[1]=='/') && line[0]!='#')
	    {
	      if (strscn(line)==NULL) continue; // skip lines that contain only white space

 	      // Read in AA probabilities
	      ptr=line;
	      int prev_i = next_i;
	      next_i = strint(ptr); ++i;
	      if (v && next_i!=prev_i+1)
		if (++warn<5)
		  {
		    cerr<<endl<<"WARNING: in HMM "<<name<<" state "<<prev_i<<" is followed by state "<<next_i<<"\n";
		    if (warn==5) cerr<<endl<<"WARNING: further warnings while reading HMMs will be suppressed.\n";
		  }
	      if (i>L)
		{
		  cerr<<endl<<"Error: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
		  return 2;
		}
	      if (i>L && v)
		cerr<<endl<<"WARNING: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
	      if (i>=/*MAXRES*/par.maxResLen-2)
		{
		  fgetline(line,LINELEN-1,dbf); // Skip two lines
		  fgetline(line,LINELEN-1,dbf);
		  continue;
		}

	      for (a=0; (a<iAlpha) && ptr; ++a){ /* FIXME: FS introduced iAlpha, was '20' */
              f[i][s2a[a]] = (float) pb[s2a[a]]*fpow2(float(strinta(ptr,-99999))/HMMSCALE);
              //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
	      }
	      for (a = iAlpha; a < 20; a++){
              f[i][s2a[a]] = 0.0;
	      }
	      if (!ptr) return Warning(dbf,line,name);
	      if (v>=4)
              {
                  printf("%6i ",i);
                  for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
                      printf("%5.1f ",100*f[i][s2a[a]]);
                  }
                  printf("\n");
              }

	      // Read insert emission line
	      fgetline(line,LINELEN-1,dbf);
	      ptr = strscn(line);
	      if (!ptr) return Warning(dbf,line,name);
	      annotchr[i]=uprchr(*ptr);
	      if (*ptr!='-' && *ptr!=' ') annot=1;

	      // Read annotation character and seven transition probabilities
	      fgetline(line,LINELEN-1,dbf);
	      ptr = strscn(line);
	      switch (*ptr)
		{
		case 'H':
		  ss_dssp[i]=1;
		  seq[nss_dssp][i]=*ptr;
		  dssp=1;
		  break;
		case 'E':
		  ss_dssp[i]=2;
		  seq[nss_dssp][i]=*ptr;
		  dssp=1;
		  break;
		case 'C':
		  ss_dssp[i]=3;
		  seq[nss_dssp][i]=*ptr;
		  dssp=1;
		  break;
		case 'S':
		  ss_dssp[i]=4;
		  seq[nss_dssp][i]=*ptr;
		  dssp=1;
		  break;
		case 'T':
		  ss_dssp[i]=5;
		  seq[nss_dssp][i]=*ptr;
		  dssp=1;
		  break;
		case 'G':
		  ss_dssp[i]=6;
		  seq[nss_dssp][i]=*ptr;
		  dssp=1;
		  break;
		case 'B':
		  ss_dssp[i]=7;
		  seq[nss_dssp][i]=*ptr;
		  dssp=1;
		  break;
		case 'I':
		  dssp=1;
		case '~':
		  ss_dssp[i]=3;
		  seq[nss_dssp][i]=*ptr;
		  break;
		case '-':
		default:
		  ss_dssp[i]=0;
		  seq[nss_dssp][i]=*ptr;
		  break;

		}

	      ptr+=2;
	      for (a=0; a<=D2D && ptr; ++a)
		tr[i][a] = float(strinta(ptr,-99999))/HMMSCALE; //store transition prob's as log2-values
	      if (!ptr) return Warning(dbf,line,name);
	      if (v>=4)
		{
		  printf("       ");
		  for (a=0; a<=D2D; ++a) printf("%5.1f ",100*fpow2(tr[i][a]));
		  printf("\n");
		}
	    }

	  if (line[0]=='/' && line[1]=='/') break;

	} /* strncmp("HMM") */

    } //while(getline)

  if (L==0) return 0; //End of db file -> stop reading in

  // Set coefficients of EVD (= 0.0 if not calibrated for these parameters)
  //   lamda = lamda_hash.Show(par.Key());
  //   mu    = mu_hash.Show(par.Key());
  if (lamda && v>=2) printf("HMM %s is already calibrated: lamda=%-5.3f, mu=%-5.2f\n",name,lamda,mu);

  if (v && i!=L) cerr<<endl<<"Warning: in HMM "<<name<<" there are only "<<i<<" columns while the stated length is "<<L<<"\n";
  if (v && i>=/*MAXRES*/par.maxResLen-2) {i=/*MAXRES*/par.maxResLen-2; cerr<<endl<<"WARNING: maximum number "<</*MAXRES*/par.maxResLen-2<<" of residues exceeded while reading HMM "<<name<<"\n";}
  if (v && !i)  cerr<<endl<<"WARNING: HMM "<<name<<" contains no match states. Check the alignment that gave rise to this HMM.\n";
  L = i;

  if (strlen(longname)>0) strcat(longname," ");
  strncat(longname,name,DESCLEN-strlen(longname)-1);  // longname = ACC NAME DESC
  if (strlen(name)>0) strcat(longname," ");
  strncat(longname,desc,DESCLEN-strlen(longname)-1);
  longname[DESCLEN-1]='\0';
  ScopID(cl,fold,sfam,fam);// get scop classification from basename (e.g. a.1.2.3.4)
  RemoveExtension(file,filestr); // copy name of dbfile without extension into 'file'

  // Secondary structure
  if (!dssp)
    {
      // remove dssp sequence
      // memory that had been allocated in case ss_dssp was given needs to be freed
      delete[] seq[nss_dssp];    seq[nss_dssp] = NULL;
      // memory that had been allocated in case ss_dssp was given needs to be freed
      delete[] sname[nss_dssp];    sname[nss_dssp] = NULL;
      nss_dssp=-1;
      k--;
    }
  if (nss_pred>=0)
    {
      for (i=1; i<=L; ++i) ss_pred[i] = ss2i(seq[nss_pred][i]);
      if (nss_conf>=0)
	for (i=1; i<=L; ++i) ss_conf[i] = cf2i(seq[nss_conf][i]);
      else
	for (i=1; i<=L; ++i) ss_conf[i] = 5;
    }

  // Copy query (first sequence) and consensus  residues?
  if (par.showcons)
    {
      sname[k]=new(char[10]);
      strcpy(sname[k],"Consensus");
      sname[k+1]=new(char[strlen(longname)+1]);
      strcpy(sname[k+1],longname);
      seq[k]=new(char[L+2]);
      seq[k][0]=' ';
      seq[k][L+1]='\0';
      seq[k+1]=new(char[L+2]);
      seq[k+1][0]=' ';
      seq[k+1][L+1]='\0';
      for (i=1; i<=L; ++i)
	{
	  float pmax=0.0;
	  int amax=0;
	  for (a=0; a<NAA; ++a)
	    if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
	  if (pmax>0.6) seq[k][i]=i2aa(amax);
	  else if (pmax>0.4) seq[k][i]=lwrchr(i2aa(amax));
	  else seq[k][i]='x';
	  seq[k+1][i]=i2aa(amax);
	}
      ncons=k++; // nfirst is set later!
    }
  else
    {
      sname[k]=new(char[strlen(longname)+1]);
      /* FIXME valgrind says bytes get lost here during hmm iteration --
         fixed in HMM::ClobberGlobal(), I (FS) think */
      strcpy(sname[k],longname);
      seq[k]=new(char[L+2]);
      seq[k][0]=' ';
      seq[k][L+1]='\0';
    }

  if (annot) // read in some annotation characters?
    {
      annotchr[0]=' ';
      annotchr[L+1]='\0';
      strcpy(seq[k],annotchr); // overwrite the consensus sequence with the annotation characters
    }
  else if (!par.showcons)  // we have not yet calculated the consensus, but we need it now as query (first sequence)
    {
      /* FIXME: FS set ncons=k
	 don't understand why it is not set but seem to need it */
      ncons = k;
      for (i=1; i<=L; ++i)
	{
	  float pmax=0.0;
	  int amax=0;
	  for (a=0; a<NAA; ++a)
	    if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
	  seq[k][i]=i2aa(amax);
	}
    }
//   printf("%i query name=%s  seq=%s\n",n,sname[n],seq[n]);
  nfirst=k++;

  n_display=k;

  // Calculate overall Neff_HMM
  Neff_HMM=0;
  for (i=1; i<=L; ++i)
    {
      float S=0.0;
      for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
	if (f[i][a]>1E-10) S-=f[i][a]*fast_log2(f[i][a]);
      }
      Neff_HMM+=(float) fpow2(S);
    }
  Neff_HMM/=L;
  for (i=0; i<=L; ++i) Neff_M[i] = Neff_I[i] = Neff_D[i] = 10.0; // to add only little additional pseudocounts!
  if (v>=2)
    cout<<"Read in HMM "<<name<<" with "<<L<<" match states and effective number of sequences = "<<Neff_HMM<<"\n";

  // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities
  for (a=0; a<iAlpha; ++a) { /* FIXME: FS introduced iAlpha, was '20' */
    f[0][a]=f[L+1][a]=pb[a];
  }
  delete[] annotchr;    annotchr = NULL;

  return 1; //return status: ok

} /* this is the end of HMM::ReadHMMer() */


/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Read an HMM from a HMMER3 .hmm file; return 0 at end of file
 */
int
HMM::ReadHMMer3(FILE* dbf, char* filestr)
{
    char line[LINELEN]="";    // input line
    char desc[DESCLEN]="";    // description of family
    char str4[5]="";          // first 4 letters of input line
    char* ptr;                // pointer for string manipulation
    int i=0;                  // index for match state (first=1)
    int a;                    // amino acid index
    char dssp=0;              // 1 if a consensus SS has been found in the transition prob lines
    char annot=0;             // 1 if at least one annotation character in insert lines is ne '-' or ' '
    int k=0;                  // index for seq[k]
    char* annotchr;           // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
    //annotchr = new char[MAXRES]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
    annotchr = new char[par.maxResLen]; // consensus amino acids in ASCII format, or, in HMMER format, the reference annotation character in insert line
    static int warn=0;
    int iAlpha = 20; /* size of alphabet, default is protein = 20 */
    double dAlphaInv = 1.00 / (double)(iAlpha); /* weight of AA */

 trans_lin=0;
 L=0;
 Neff_HMM=0;
 n_seqs=n_display=N_in=N_filtered=0;
 nss_dssp=nsa_dssp=nss_pred=nss_conf=nfirst=ncons=-1;
 lamda=mu=0.0;
 trans_lin=0; // transition probs in log space
 name[0]=longname[0]=desc[0]=fam[0]='\0';
 //If at the end of while-loop L is still 0 then we have reached end of db file

 // Do not delete name and seq vectors because their adresses are transferred to hitlist as part of a hit!!


 while (fgetline(line,LINELEN-1,dbf) && !(line[0]=='/' && line[1]=='/'))
     {

         if (strscn(line)==NULL) continue;   // skip lines that contain only white space
         if (!strncmp("HMMER",line,5)) continue;

         substr(str4,line,0,3);              // copy the first four characters into str4

         if (!strcmp("NAME",str4) && name[0]=='\0')
             {
                 ptr=strscn(line+4);             // advance to first non-white-space character
                 strncpy(name,ptr,NAMELEN-1);    // copy full name to name
                 strcut(name);                   // ...cut after first word...
                 if (v>=4) cout<<"Reading in HMM "<<name<<":\n";
             }

         else if (!strcmp("ACC ",str4))
             {
                 ptr=strscn(line+4);              // advance to first non-white-space character
                 strncpy(longname,ptr,DESCLEN-1); // copy Accession id to longname...
             }

         else if (!strcmp("DESC",str4))
             {
                 ptr=strscn(line+4);             // advance to first non-white-space character
                 if (ptr)
                     {
                         strncpy(desc,ptr,DESCLEN-1);   // copy description to name...
                         desc[DESCLEN-1]='\0';
                         strcut(ptr);                   // ...cut after first word...
                     }
                 if (!ptr || ptr[1]!='.' || strchr(ptr+3,'.')==NULL) strcpy(fam,""); else strcpy(fam,ptr); // could not find two '.' in name?
             }

         else if (!strcmp("LENG",str4))
             {
                 ptr=line+4;
                 L=strint(ptr);                  //read next integer (number of match states)
             }

         else if (!strcmp("ALPH",str4)) {

             ptr=strscn(line+4);

             if (0 == strcmp(ptr, "amino")){
                 iAlpha = 20;
             }
             else if (0 == strcmp(ptr, "Nucleic")){
                 iAlpha = 4;
                 printf("%s:%s:%d: WARNING: HMM reading does not work for DNA/RNA\n",
                        __FUNCTION__, __FILE__, __LINE__);
             }
             else {
                 return Warning(dbf,line,name);
             }
             dAlphaInv = 1.00 / (double)(iAlpha);
             //continue;
         }
         else if (!strcmp("RF  ",str4)) continue;
         else if (!strcmp("CS  ",str4)) continue;
         else if (!strcmp("MAP ",str4)) continue;
         else if (!strcmp("COM ",str4)) continue;
         else if (!strcmp("NSEQ",str4))
             {
                 ptr=line+4;
                 N_in=N_filtered=strint(ptr);    //read next integer: number of sequences after filtering
             }

         else if (!strcmp("DATE",str4)) continue;
         else if (!strncmp("CKSUM ",line,5)) continue;
         else if (!strcmp("GA  ",str4)) continue;
         else if (!strcmp("TC  ",str4)) continue;
         else if (!strcmp("NC  ",str4)) continue;

         //////////////////////////////////////////////////////////////////////////////////////////////////////
         // Still needed???

         else if (!strncmp("SADSS",line,5))
             {
                 if (nsa_dssp<0)
                     {
                         nsa_dssp=k++;
                         seq[nsa_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
                         sname[nsa_dssp] = new(char[15]);
                         strcpy(seq[nsa_dssp]," ");
                         strcpy(sname[nsa_dssp],"sa_dssp");

                     }
                 ptr=strscn(line+5);
                 if (ptr)
                     {
                         strcut(ptr);
                         if (strlen(seq[nsa_dssp])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
                             printf("\nWARNING: HMM %s has SADSS records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
                         else strcat(seq[nsa_dssp],ptr);
                     }
             }

         else if (!strncmp("SSPRD",line,5))
             {
                 if (nss_pred<0)
                     {
                         nss_pred=k++;
                         seq[nss_pred] = new(char[/*MAXRES*/par.maxResLen+2]);
                         sname[nss_pred] = new(char[15]);
                         strcpy(seq[nss_pred]," ");
                         strcpy(sname[nss_pred],"ss_pred");

                     }
                 ptr=strscn(line+5);
                 if (ptr)
                     {
                         strcut(ptr);
                         if (strlen(seq[nss_pred])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
                             printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
                         else strcat(seq[nss_pred],ptr);
                     }
             }

         else if (!strncmp("SSCON",line,5))
             {
                 if (nss_conf<0)
                     {
                         nss_conf=k++;
                         seq[nss_conf] = new(char[/*MAXRES*/par.maxResLen+2]);
                         sname[nss_conf] = new(char[15]);
                         strcpy(seq[nss_conf]," ");
                         strcpy(sname[nss_conf],"ss_conf");
                     }
                 ptr=strscn(line+5);
                 if (ptr)
                     {
                         strcut(ptr);
                         if (strlen(seq[nss_conf])+strlen(ptr)>=(unsigned)(/*MAXRES*/par.maxResLen))
                             printf("\nWARNING: HMM %s has SSPRD records with more than %i residues.\n",name,/*MAXRES*/par.maxResLen);
                         else strcat(seq[nss_conf],ptr);
                     }
             }

         else if (!strncmp("SSCIT",line,5)) continue;
         else if (!strcmp("XT  ",str4)) continue;
         //////////////////////////////////////////////////////////////////////////////////////////////////////
         else if (!strncmp("STATS LOCAL",line,11)) continue;

         else if (!strcmp("EFFN",str4))
             {
                 ptr=line+4;
                 float effn = strflt(ptr);
                 // Calculate Neff_HMM by using f(x) = ax^0.1 + bx^0.5 + cx + d  (fitted with scop25 dataset)
                 Neff_HMM = -1.403534 * pow(effn, 0.1) + 4.428118 * pow(effn, 0.5) - 0.2885410 * effn - 1.108568;
             }

         /////////////////////////////////////////////////////////////////////////////////////
         // Read transition probabilities from start state
         else if (!strncmp("HMM",line,3))
             {
                 fgetline(line,LINELEN-1,dbf); // Skip line with amino acid labels
                 fgetline(line,LINELEN-1,dbf); // Skip line with transition labels
                 ptr=strscn(line);

                 if (!strncmp("COMPO",ptr,5))
                     {
                         ptr=ptr+5;
                         for (a=0; a<20 && ptr; ++a)
                             //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
                             pb[s2a[a]] = (float) exp(-1.0*strflta(ptr,99999));
                         if (!ptr) return Warning(dbf,line,name);
                         if (v>=4)
                             {
                                 printf("\nNULL ");
                                 for (a=0; a<20; ++a) printf("%6.3g ",100.*pb[s2a[a]]);
                                 printf("\n");
                             }
                         fgetline(line,LINELEN-1,dbf); // Read next line
                     }

                 fgetline(line,LINELEN-1,dbf); // Skip line with 0-states insert probabilities

                 ptr = strscn(line);
                 for (a=0; a<=D2D && ptr; ++a)
                     tr[0][a] = log2((float) exp(-1.0*strflta(ptr,99999))); //store transition probabilites as log2 values
                 // strinta returns next integer in string and puts ptr to first char
                 // after the integer. Returns -99999 if '*' is found.
                 // ptr is set to 0 if no integer is found after ptr.
                 if (!ptr) return Warning(dbf,line,name);
                 if (v>=4)
                     {
                         printf("       ");
                         for (a=0; a<=D2D && ptr; ++a) printf("%6.3g ",100*fpow2(tr[i][a]));
                         printf("\n");
                     }

                 // Prepare to store DSSP states (if there are none, delete afterwards)
                 nss_dssp=k++;
                 seq[nss_dssp] = new(char[/*MAXRES*/par.maxResLen+2]);
                 sname[nss_dssp] = new(char[15]);
                 strcpy(sname[nss_dssp],"ss_dssp");

                 /////////////////////////////////////////////////////////////////////////////////////
                 // Read columns of HMM
                 int next_i=0;  // index of next column
                 while (fgetline(line,LINELEN-1,dbf) &&  !(line[0]=='/' && line[1]=='/') && line[0]!='#')
                     {
                         if (strscn(line)==NULL) continue; // skip lines that contain only white space

                         // Read in AA probabilities
                         ptr=line;
                         int prev_i = next_i;
                         next_i = strint(ptr); ++i;
                         if (v && next_i!=prev_i+1)
                             if (++warn<5)
                                 {
                                     cerr<<endl<<"WARNING: in HMM "<<name<<" state "<<prev_i<<" is followed by state "<<next_i<<"\n";
                                     if (warn==5) cerr<<endl<<"WARNING: further warnings while reading HMMs will be suppressed.\n";
                                 }
                         if (i>L)
                             {
                                 cerr<<endl<<"Error: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
                                 return 2;
                             }
                         if (i>L && v)
                             cerr<<endl<<"WARNING: in HMM "<<name<<" there are more columns than the stated length "<<L<<"\n";
                         if (i>=/*MAXRES*/par.maxResLen-2)
                             {
                                 fgetline(line,LINELEN-1,dbf); // Skip two lines
                                 fgetline(line,LINELEN-1,dbf);
                                 continue;
                             }

                         for (a=0; a<iAlpha && ptr; ++a){ /* FIXME: FS introduced iAlpha, was '20' */
                             f[i][s2a[a]] = (float) exp(-1.0*strflta(ptr,99999));
                             //s2a[a]: transform amino acids Sorted by alphabet -> internal numbers for amino acids
                         }
                         for (a = iAlpha; a < 20; a++){
                             f[i][s2a[a]] = 0.0;
                         }
                         if (!ptr) return Warning(dbf,line,name);
                         if (v>=4)
                             {
                                 printf("%6i ",i);
                                 for (a=0; a<iAlpha; ++a) printf("%6.3g ",100*f[i][s2a[a]]);
                                 printf("\n");
                             }

                         // Ignore MAP annotation
                         ptr = strscn(line); //find next word
                         ptr = strscn_ws(line); // ignore word

                         // Read RF and CS annotation
                         ptr = strscn(line);
                         if (!ptr) return Warning(dbf,line,name);
                         annotchr[i]=uprchr(*ptr);
                         if (*ptr!='-' && *ptr!=' ') annot=1;

                         ptr = strscn(line);
                         switch (*ptr)
                             {
                             case 'H':
                                 ss_dssp[i]=1;
                                 seq[nss_dssp][i]=*ptr;
                                 dssp=1;
                                 break;
                             case 'E':
                                 ss_dssp[i]=2;
                                 seq[nss_dssp][i]=*ptr;
                                 dssp=1;
                                 break;
                             case 'C':
                                 ss_dssp[i]=3;
                                 seq[nss_dssp][i]=*ptr;
                                 dssp=1;
                                 break;
                             case 'S':
                                 ss_dssp[i]=4;
                                 seq[nss_dssp][i]=*ptr;
                                 dssp=1;
                                 break;
                             case 'T':
                                 ss_dssp[i]=5;
                                 seq[nss_dssp][i]=*ptr;
                                 dssp=1;
                                 break;
                             case 'G':
                                 ss_dssp[i]=6;
                                 seq[nss_dssp][i]=*ptr;
                                 dssp=1;
                                 break;
                             case 'B':
                                 ss_dssp[i]=7;
                                 seq[nss_dssp][i]=*ptr;
                                 dssp=1;
                                 break;
                             case 'I':
                                 dssp=1;
                             case '~':
                                 ss_dssp[i]=3;
                                 seq[nss_dssp][i]=*ptr;
                                 break;
                             case '-': // no SS available from any template
                             case '.': // no clear consensus SS structure
                             case 'X': // no clear consensus SS structure
                                 ss_dssp[i]=0;
                                 seq[nss_dssp][i]='-';
                                 break;
                             default:
                                 ss_dssp[i]=0;
                                 seq[nss_dssp][i]=*ptr;
                                 break;
                             }

                         // Read insert emission line
                         fgetline(line,LINELEN-1,dbf);

                         // Read seven transition probabilities
                         fgetline(line,LINELEN-1,dbf);

                         ptr+=2;
                         for (a=0; a<=D2D && ptr; ++a)
                             tr[i][a] = log2((float) exp(-1.0*strflta(ptr,99999))); //store transition prob's as log2-values
                         if (!ptr) return Warning(dbf,line,name);
                         if (v>=4)
                             {
                                 printf("       ");
                                 for (a=0; a<=D2D; ++a) printf("%6.3g ",100*fpow2(tr[i][a]));
                                 printf("\n");
                             }
                     }

                 if (line[0]=='/' && line[1]=='/') break;

             } /* strncmp("HMM") */

     } //while(getline)

 if (L==0) return 0; //End of db file -> stop reading in

 if (v && i!=L) cerr<<endl<<"Warning: in HMM "<<name<<" there are only "<<i<<" columns while the stated length is "<<L<<"\n";
 if (v && i>=/*MAXRES*/par.maxResLen-2) {i=/*MAXRES*/par.maxResLen-2; cerr<<endl<<"WARNING: maximum number "<</*MAXRES*/par.maxResLen-2<<" of residues exceeded while reading HMM "<<name<<"\n";}
 if (v && !i)  cerr<<endl<<"WARNING: HMM "<<name<<" contains no match states. Check the alignment that gave rise to this HMM.\n";
 L = i;

 if (strlen(longname)>0) strcat(longname," ");
 strncat(longname,name,DESCLEN-strlen(longname)-1);  // longname = ACC NAME DESC
 if (strlen(name)>0) strcat(longname," ");
 strncat(longname,desc,DESCLEN-strlen(longname)-1);
 longname[DESCLEN-1]='\0';
 ScopID(cl,fold,sfam,fam);// get scop classification from basename (e.g. a.1.2.3.4)
 RemoveExtension(file,filestr); // copy name of dbfile without extension into 'file'

 // Secondary structure
 if (!dssp)
     {
         // remove dssp sequence
         delete[] seq[nss_dssp];    // memory that had been allocated in case ss_dssp was given needs to be freed
         delete[] sname[nss_dssp];  // memory that had been allocated in case ss_dssp was given needs to be freed
         nss_dssp=-1;
         k--;
     }
 else { seq[nss_dssp][0]='-'; seq[nss_dssp][L+1]='\0'; }

 if (nss_pred>=0)
     {
         for (i=1; i<=L; ++i) ss_pred[i] = ss2i(seq[nss_pred][i]);
         if (nss_conf>=0)
             for (i=1; i<=L; ++i) ss_conf[i] = cf2i(seq[nss_conf][i]);
         else
             for (i=1; i<=L; ++i) ss_conf[i] = 5;
     }

 // Copy query (first sequence) and consensus  residues?
 if (par.showcons)
     {
         sname[k]=new(char[10]);
         strcpy(sname[k],"Consensus");
         sname[k+1]=new(char[strlen(longname)+1]);
         strcpy(sname[k+1],longname);
         seq[k]=new(char[L+2]);
         seq[k][0]=' ';
         seq[k][L+1]='\0';
         seq[k+1]=new(char[L+2]);
         seq[k+1][0]=' ';
         seq[k+1][L+1]='\0';
         for (i=1; i<=L; ++i)
             {
                 float pmax=0.0;
                 int amax=0;
                 for (a=0; a<NAA; ++a)
                     if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
                 if (pmax>0.6) seq[k][i]=i2aa(amax);
                 else if (pmax>0.4) seq[k][i]=lwrchr(i2aa(amax));
                 else seq[k][i]='x';
                 seq[k+1][i]=i2aa(amax);
             }
         ncons=k++; // nfirst is set later!
     }
 else
     {
         sname[k]=new(char[strlen(longname)+1]);
         strcpy(sname[k],longname);
         seq[k]=new(char[L+2]);
         seq[k][0]=' ';
         seq[k][L+1]='\0';
     }

 if (annot) // read in some annotation characters?
     {
         annotchr[0]=' ';
         annotchr[L+1]='\0';
         strcpy(seq[k],annotchr); // overwrite the consensus sequence with the annotation characters
     }
 else if (!par.showcons)  // we have not yet calculated the consensus, but we need it now as query (first sequence)
     {
         for (i=1; i<=L; ++i)
             {
                 float pmax=0.0;
                 int amax=0;
                 for (a=0; a<NAA; ++a)
                     if (f[i][a]>pmax) {amax=a; pmax=f[i][a];}
                 seq[k][i]=i2aa(amax);
             }
     }
 //   printf("%i query name=%s  seq=%s\n",n,sname[n],seq[n]);
 nfirst=k++;

 n_display=k;
 n_seqs=k;

 // If no effektive number of sequences is given, calculate Neff_HMM by given profile
 if (Neff_HMM == 0) {
     for (i=1; i<=L; ++i)
         {
             float S=0.0;
             for (a=0; a<20; ++a)
                 if (f[i][a]>1E-10) S-=f[i][a]*fast_log2(f[i][a]);
             Neff_HMM+=(float) fpow2(S);
         }
     Neff_HMM/=L;
 }

 for (i=0; i<=L; ++i) Neff_M[i] = Neff_I[i] = Neff_D[i] = 10.0; // to add only little additional pseudocounts!
 Neff_M[L+1]=1.0f;
 Neff_I[L+1]=Neff_D[L+1]=0.0f;

 if (v>=2)
     cout<<"Read in HMM "<<name<<" with "<<L<<" match states and effective number of sequences = "<<Neff_HMM<<"\n";

 ///////////////////////////////////////////////////////////////////

 // Set emission probabilities of zero'th (begin) state and L+1st (end) state to background probabilities
 for (a=0; a<20; ++a) f[0][a]=f[L+1][a]=pb[a];
 delete[] annotchr;

 has_pseudocounts=true;

 return 1; //return status: ok


} /* this is the end of HMM::ReadHMMer3() */


//////////////////////////////////////////////////////////////////////////////
/**
 * @brief Add transition pseudocounts to HMM (and calculate lin-space transition probs)
 */
void
HMM::AddTransitionPseudocounts(float gapd, float gape, float gapf, float gapg, float gaph, float gapi, float gapb)
{
  int i;               //position in alignment
  float sum;
  float pM2M, pM2I, pM2D, pI2I, pI2M, pD2D, pD2M;
  float p0,p1,p2;
  if (par.gapb<=0) return;
  if (trans_lin==1) {fprintf(stderr,"Error: Adding transition pseudocounts to linear representation of %s not allowed. Please report this error to the HHsearch developers.\n",name); exit(6);}
  if (trans_lin==2) {fprintf(stderr,"Error: Adding transition pseudocounts twice is %s not allowed. Please report this error to the HHsearch developers.\n",name); exit(6);}
  trans_lin=2;

  // Calculate pseudocount transition probabilities
  pM2D=pM2I=gapd*0.0286;     //a-priori probability for inserts and deletions
  pM2M=1-pM2D-pM2I;
  // gape=0 -> pI2I=0   gape=1 -> pI2I=0.75    gape=inf -> pI2I=1.
  pI2I=1.0*gape/(gape-1+1.0/0.75);
  pI2M=1-pI2I;
  // gape=0 -> pD2D=0   gape=1 -> pD2D=0.75    gape=inf -> pD2D=1.
  pD2D=1.0*gape/(gape-1+1.0/0.75);
  pD2M=1-pD2D;

  for (i=0; i<=L; ++i) //for all columns in HMM
    {
      // Transitions from M state
      p0 = (Neff_M[i]-1)*fpow2(tr[i][M2M]) + gapb*pM2M;
      p1 = (Neff_M[i]-1)*fpow2(tr[i][M2D]) + gapb*pM2D;
      p2 = (Neff_M[i]-1)*fpow2(tr[i][M2I]) + gapb*pM2I;
      if (i==0) p1=p2=0;       //from M(0) no transition to D(1) and I(0) possible
      if (i==L) p1=p2=0;       //from M(L) no transition to D(L+1) and I(L+1) possible
      sum = p0+p1+p2+FLT_MIN;

//       p0 = p0/sum ;
//       p1 = pow(p1/sum,gapf);
//       p2 = pow(p2/sum,gapg);
//       sum = p0+p1+p2+FLT_MIN;
//       tr[i][M2M] = fast_log2(p0/sum);
//       tr[i][M2D] = fast_log2(p1/sum);
//       tr[i][M2I] = fast_log2(p2/sum);

      tr[i][M2M] = fast_log2(p0/sum);
      tr[i][M2D] = fast_log2(p1/sum)*gapf;
      tr[i][M2I] = fast_log2(p2/sum)*gapg;

      // Transitions from I state
      p0 = Neff_I[i]*fpow2(tr[i][I2M]) + gapb*pI2M;
      p1 = Neff_I[i]*fpow2(tr[i][I2I]) + gapb*pI2I;
      sum = p0+p1+FLT_MIN;

//       p0 = pow(p0/sum,gapg);
//       p1 = pow(p1/sum,gapi);
//       sum = p0+p1+FLT_MIN;
//       tr[i][I2M] = fast_log2(p0/sum);
//       tr[i][I2I] = fast_log2(p1/sum);

      tr[i][I2M] = fast_log2(p0/sum);
      tr[i][I2I] = fast_log2(p1/sum)*gapi;

      // Transitions from D state
      p0 = Neff_D[i]*fpow2(tr[i][D2M]) + gapb*pD2M;
      p1 = Neff_D[i]*fpow2(tr[i][D2D]) + gapb*pD2D;
      if (i==L) p1=0;          //from D(L) no transition to D(L+1) possible
      sum = p0+p1+FLT_MIN;

//       p0 = pow(p0/sum,gapf);
//       p1 = pow(p1/sum,gaph);
//       sum = p0+p1+FLT_MIN;
//       tr[i][D2M] = fast_log2(p0/sum);
//       tr[i][D2D] = fast_log2(p1/sum);

      tr[i][D2M] = fast_log2(p0/sum);
      tr[i][D2D] = fast_log2(p1/sum)*gaph;

      // SS-dependent gap penalties
      tr[i][M2M_GAPOPEN]=tr[i][M2M];
      tr[i][GAPOPEN]=0.0;
      tr[i][GAPEXTD]=0.0;
    }

  if (v>=4)
    {
      printf("\nPseudocount transition probabilities:\n");
      printf("pM2M=%4.1f%%, pM2I=%4.1f%%, pM2D=%4.1f%%, ",100*pM2M,100*pM2I,100*pM2D);
      printf("pI2M=%4.1f%%, pI2I=%4.1f%%, ",100*pI2M,100*pI2I);
      printf("pD2M=%4.1f%%, pD2D=%4.1f%% ",100*pD2M,100*pD2D);
      printf("tau = %4.1f%%\n\n",100.*gapb/(Neff_HMM-1+gapb));
      printf("Listing transition probabilities WITH pseudocounts:\n");
      printf("   i dssp pred sacc     M->M   M->I   M->D   I->M   I->I   D->M   D->D\n");

      for (i=1; i<=L; ++i) //for all columns in HMM
	{
	  printf("%4i  %1c    %1c    %1c    %6.3f %6.3f %6.3f ",i,i2ss(ss_dssp[i]),i2ss(ss_pred[i]),i2sa(sa_dssp[i]),fpow2(tr[i][M2M]),fpow2(tr[i][M2I]),fpow2(tr[i][M2D]));
	  printf("%6.3f %6.3f ",fpow2(tr[i][I2M]),fpow2(tr[i][I2I]));
	  printf("%6.3f %6.3f ",fpow2(tr[i][D2M]),fpow2(tr[i][D2D]));
	  printf("%1i %2i  %1i\n",ss_pred[i],ss_conf[i],ss_dssp[i]);
	}
      printf("\n");
      printf("nss_dssp=%i  nss_pred=%i\n",nss_dssp,nss_pred);
    }
  return;
}


//////////////////////////////////////////////////////////////////////////////
/**
 * @brief Use secondary structure-dependent gap penalties
 *    on top of the HMM transition penalties
 */
void
HMM::UseSecStrucDependentGapPenalties()
{
  int i;   // column in HMM
  int ii;
  //unsigned char iis[MAXRES]; // inside-integer array
  unsigned char iis[par.maxResLen]; // inside-integer array
  float d; // Additional penalty for opening gap whithin SS element
  float e; // Additional penalty for extending gap whithin SS element

  // Determine inside-integers:
  // CCSTCCCHHHHHHHHHHHCCCCCEEEEECCSBGGGCCCCEECC
  // 0000000123444432100000012210000000000001000
  ii=0;
  for (i=0; i<=L; ++i) // forward run
    {
      if (ss_dssp[i]==1 || ss_dssp[i]==2) {ii+=(ii<par.ssgapi);} else ii=0;
      iis[i]=ii;
    }  for (i=0; i<=L; ++i)
  ii=0;
  iis[0]=iis[L]=0;
  for (i=L; i>=0; i--) // backward run
    {
      if (ss_dssp[i]==1 || ss_dssp[i]==2) {ii+=(ii<par.ssgapi);} else ii=0;
      iis[i-1]=imin(ii,iis[i-1]);
    }

  // Add SS-dependent gap penalties to HMM transition penalties
  for (i=0; i<=L; ++i) //for all columns in HMM
    {
      d=-iis[i]*par.ssgapd;
      e=-iis[i]*par.ssgape;
      tr[i][GAPOPEN]=d;
      tr[i][GAPEXTD]=e;
      tr[i][M2M_GAPOPEN]+=d;
      tr[i][M2I]+=d;
      tr[i][I2M]+=d;
      tr[i][I2I]+=e;
      tr[i][M2D]+=d;
      tr[i][D2M]+=d;
      tr[i][D2D]+=e;
    }

  if (v>=3)
    {
      printf("Col SS II\n");
      for (i=0; i<=L; ++i) printf("%3i  %c %2i\n",i,i2ss(ss_dssp[i]),iis[i]);
    }
  return;
}

/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Generate an amino acid frequency matrix g[][] with full pseudocount admixture (tau=1)
 */
void
HMM::PreparePseudocounts()
{
  for (int i=0; i<=L+1; ++i)
    for (int a=0; a<20; ++a)
      g[i][a] = // produces fast code
       R[a][0]*f[i][0]  +R[a][1]*f[i][1]  +R[a][2]*f[i][2]  +R[a][3]*f[i][3]  +R[a][4]*f[i][4]
      +R[a][5]*f[i][5]  +R[a][6]*f[i][6]  +R[a][7]*f[i][7]  +R[a][8]*f[i][8]  +R[a][9]*f[i][9]
      +R[a][10]*f[i][10]+R[a][11]*f[i][11]+R[a][12]*f[i][12]+R[a][13]*f[i][13]+R[a][14]*f[i][14]
      +R[a][15]*f[i][15]+R[a][16]*f[i][16]+R[a][17]*f[i][17]+R[a][18]*f[i][18]+R[a][19]*f[i][19];
}

/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Add amino acid pseudocounts to HMM and calculate average protein aa probabilities pav[a]
 * Pseudocounts: t.p[i][a] = (1-tau)*f[i][a] + tau*g[i][a]
 */
void
HMM::AddAminoAcidPseudocounts(char pcm, float pca, float pcb, float pcc)
{
  int i;               //position in HMM
  int a;               //amino acid (0..19)
  float sum;
  float tau;           //tau = pseudocount admixture

  for (a=0; a<20; ++a) pav[a]=pb[a]*100.0f/Neff_HMM; // initialize vector of average aa freqs with pseudocounts

  // Calculate amino acid frequencies p[i][a] = (1-tau(i))*f[i][a] + tau(i)*g[i][a]
  switch (pcm)
    {
    case 0: //no pseudocounts whatsoever: tau=0
      for (i=1; i<=L; ++i)
	for (a=0; a<20; ++a)
	  pav[a] += ( p[i][a]=f[i][a] );
      break;
    case 1: //constant pseudocounts (for optimization): tau = pca
      tau = pca;
      for (i=1; i<=L; ++i)
	for (a=0; a<20; ++a)
	  pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
      break;
    case 2: //divergence-dependent pseudocounts
    case 4: //divergence-dependent pseudocounts and rate matrix rescaling
      if (par.pcc==1.0f)
	for (i=1; i<=L; ++i)
	  {
	    tau = fmin(1.0, pca/(1. + Neff_M[i]/pcb ) );
	    for (a=0; a<20; ++a)
	      pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
	  }
      else
	for (i=1; i<=L; ++i)
	  {
	    tau = fmin(1.0, pca/(1. + pow((Neff_M[i])/pcb,pcc)));
	    for (a=0; a<20; ++a)
	      pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
	  }
      break;
    case 3: // constant-divergence pseudocounts
      for (i=1; i<=L; ++i)
	{
	  float x = Neff_M[i]/pcb;
	  pca = 0.793 + 0.048*(pcb-10.0);
	  tau = fmax(0.0, pca*(1-x + pcc*x*(1-x)) );
	  for (a=0; a<20; ++a)
	    pav[a] += ( p[i][a] = (1.-tau)*f[i][a] + tau * g[i][a] );
	}
      if (v>=2) { printf("Divergence before / after addition of amino acid pseudocounts: %5.2f / %5.2f\n",Neff_HMM, CalcNeff()); }
     break;
    } //end switch (pcm)


  // Normalize vector of average aa frequencies pav[a]
  NormalizeTo1(pav,NAA);

  for (a=0; a<20; ++a)
    p[0][a] = p[L+1][a] = pav[a];

  // DEBUGGING output
  if (v>=3)
    {
      switch (pcm)
	{
	case 0:
	  cout<<"No pseudocounts added (-pcm 0)\n";
	  return;
	case 1:
	  cout<<"Adding constant AA pseudocount admixture of "<<pca<<" to HMM "<<name<<"\n";
	  break;
	case 2:
	  cout<<"Adding divergence-dependent AA pseudocounts (-pcm 2) with admixture of "
	      <<pca/(1.+pow((Neff_HMM-1.)/pcb,pcc))<<" to HMM "<<name<<"\n";
	  break;
	} //end switch (pcm)
      cout<<"\nAverage amino acid frequencies WITH pseudocounts in HMM: \nProf: ";
      for (a=0; a<20; ++a) printf("%4.1f ",100*pav[a]);
      cout<<"\n";
      if (v>=4)
	{
	  cout<<"\nAmino acid frequencies WITHOUT pseudocounts:\n       A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
	  for (i=1; i<=L; ++i)
	    {
	      printf("%3i:  ",i);
	      sum=0;
	      for (a=0; a<20; ++a)
		{
		  sum+=f[i][a];
		  printf("%4.1f ",100*f[i][a]);
		}
	      printf("  sum=%5.3f\n",sum);
	    }
	  cout<<"\nAmino acid frequencies WITH pseudocounts:\n       A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
	  for (i=1; i<=L; ++i)
	    {
	      printf("%3i:  ",i);
	      sum=0;
	      for (a=0; a<20; ++a)
		{
		  sum+=p[i][a];
		  printf("%4.1f ",100*p[i][a]);
		}
	      printf("  sum=%5.3f\n",sum);
	    }
	}
   }
  return;
}


/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Factor Null model into HMM t
 */
void
HMM::IncludeNullModelInHMM(HMM& q, HMM& t)
{

  int i,j;        //query and template match state indices
  int a;          //amino acid index

  switch (par.columnscore)
    {
    default:
    case 0: // Null model with background prob. from database
      for (a=0; a<20; ++a) pnul[a]=pb[a];
      break;

    case 1: // Null model with background prob. equal average from query and template
      for (a=0; a<20; ++a) pnul[a]=0.5*(q.pav[a]+t.pav[a]);
      break;

    case 2: // Null model with background prob. from template protein
      for (a=0; a<20; ++a) pnul[a]=t.pav[a];
      break;

    case 3: // Null model with background prob. from query protein
      for (a=0; a<20; ++a) pnul[a]=q.pav[a];
      break;

    case 4: // Null model with background prob. equal average from query and template
      for (a=0; a<20; ++a) pnul[a]=sqrt(q.pav[a]*t.pav[a]);
      break;

   case 10: // Separated column scoring for Stochastic Backtracing (STILL USED??)
      for (i=0; i<=q.L+1; ++i)
	{
	  float sum = 0.0;
	  for (a=0; a<20; ++a) sum += pb[a]*q.p[i][a];
	  sum = 1.0/sqrt(sum);
 	  for (a=0; a<20; ++a) q.p[i][a]*=sum;
	}
      for (j=0; j<=t.L+1; j++)
	{
	  float sum = 0.0;
	  for (a=0; a<20; ++a) sum += pb[a]*t.p[j][a];
	  sum = 1.0/sqrt(sum);
 	  for (a=0; a<20; ++a) t.p[j][a]*=sum;
	}
      break;

    case 11:  // log co-emission probability (no null model)
      for (a=0; a<20; ++a) pnul[a]=0.05;
      break;

   }

  // !!!!! ATTENTION!!!!!!!  after this t.p is not the same as after adding pseudocounts !!!
  //Introduce amino acid weights into template (for all but SOP scores)
  if (par.columnscore!=10)
    for (a=0; a<20; ++a)
      for (j=0; j<=t.L+1; j++)
	t.p[j][a]/=pnul[a];

  if (v>=5)
    {
      cout<<"\nAverage amino acid frequencies\n";
      cout<<"         A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\n";
      cout<<"Q:    ";
      for (a=0; a<20; ++a) printf("%4.1f ",100*q.pav[a]);
      cout<<"\nT:    ";
      for (a=0; a<20; ++a) printf("%4.1f ",100*t.pav[a]);
      cout<<"\nNull: ";
      for (a=0; a<20; ++a) printf("%4.1f ",100*pnul[a]);
      cout<<"\npb:   ";
      for (a=0; a<20; ++a) printf("%4.1f ",100*pb[a]);
    }


  return;
}


/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Write HMM to output file
 */
void
HMM::WriteToFile(char* outfile)
{
  const int SEQLEN=100;      // number of residues per line for sequences to be displayed
  int i,a;

  if (trans_lin) {fprintf(stderr,"Error: Writing transition pseudocounts in linear representation not allowed. Please report this error to the HHsearch developers.\n"); exit(6);}

  FILE *outf=NULL;
  if (strcmp(outfile,"stdout"))
    {
      if (par.append) outf=fopen(outfile,"a"); else outf=fopen(outfile,"w");
      if (!outf) OpenFileError(outfile);
    }
  else
    outf = stdout;
  if (v>=2) cout<<"Writing HMM to "<<outfile<<"\n";

//   fprintf(outf,"HHsearch HHM format 1.5\n");
  fprintf(outf,"HHsearch 1.5\n");         // format specification
  fprintf(outf,"NAME  %s\n",longname);    // name of first sequence
  fprintf(outf,"FAM   %s\n",fam);         // family name
  char file_nopath[NAMELEN];
  RemovePath(file_nopath,file);
  fprintf(outf,"FILE  %s\n",file_nopath); // base name of alignment file

  // Print command line
  fprintf(outf,"COM   ");
  for (int i=0; i<par.argc; i++)
    if (strlen(par.argv[i])<=100)
      fprintf(outf,"%s ",par.argv[i]);
    else
      fprintf(outf,"<%i characters> ",(int)strlen(par.argv[i]));
  fprintf(outf,"\n");

  // print out date stamp
  time_t* tp=new(time_t);
  *tp=time(NULL);
  fprintf(outf,"DATE  %s",ctime(tp));
  delete tp; tp = NULL; /* really? FS */

  // Print out some statistics of alignment
  fprintf(outf,"LENG  %i match states, %i columns in multiple alignment\n",L,l[L]);
  fprintf(outf,"FILT  %i out of %i sequences passed filter (-id %i -cov %i -qid %i -qsc %.2f -diff %i)\n",N_filtered,N_in,par.max_seqid,par.coverage,par.qid,par.qsc,par.Ndiff);
  fprintf(outf,"NEFF  %-4.1f\n",Neff_HMM);

  // Print selected sequences from alignment (including secondary structure and confidence values, if known)
  fprintf(outf,"SEQ\n");
  for (int n=0; n<n_display; n++)
    {
      fprintf(outf,">%s\n",sname[n]);
      //first sequence character starts at 1; 0 not used.
      for(unsigned int j=0; j<strlen(seq[n]+1); j+=SEQLEN) fprintf(outf,"%-.*s\n",SEQLEN,seq[n]+1+j);
    }
  fprintf(outf,"#\n");

  // print null model background probabilities from substitution matrix
  fprintf(outf,"NULL   ");
  for (a=0; a<20; ++a) fout(outf,-iround(fast_log2(pb[s2a[a]])*HMMSCALE ));
  fprintf(outf,"\n");

  // print table header line with amino acids
  fprintf(outf,"HMM    ");
  for (a=0; a<20; ++a) fprintf(outf,"%1c\t",i2aa(s2a[a]));
  fprintf(outf,"\n");

  // print table header line with state transitions
  fprintf(outf,"       M->M\tM->I\tM->D\tI->M\tI->I\tD->M\tD->D\tNeff\tNeff_I\tNeff_D\n");

  // print out transition probabilities from begin state (virtual match state)
  fprintf(outf,"       ");
  for (a=0; a<=D2D; ++a) fout(outf,-iround(tr[0][a]*HMMSCALE));
  fout(outf,iround(Neff_M[0]*HMMSCALE));
  fout(outf,iround(Neff_I[0]*HMMSCALE));
  fout(outf,iround(Neff_D[0]*HMMSCALE));
  fprintf(outf,"\n");

  // Start loop for printing HMM columns
  int h=1;
  for (i=1; i<=L; ++i)
    {

      while(islower(seq[nfirst][h]) && seq[nfirst][h]) h++;
      fprintf(outf,"%1c %-4i ",seq[nfirst][h++],i);

      // Print emission probabilities for match state
      for (a=0; a<20; ++a) fout(outf,-iround(fast_log2(p[i][s2a[a]])*HMMSCALE ));
      fprintf(outf,"%-i",l[i]);
      fprintf(outf,"\n");

      // Print transition probabilities
      fprintf(outf,"       ");
      for (a=0; a<=D2D; ++a) fout(outf,-iround(tr[i][a]*HMMSCALE));
      fout(outf,iround(Neff_M[i]*HMMSCALE));
      fout(outf,iround(Neff_I[i]*HMMSCALE));
      fout(outf,iround(Neff_D[i]*HMMSCALE));
      fprintf(outf,"\n\n");
    } // end for(i)-loop for printing HMM columns

  fprintf(outf,"//\n");
  fclose(outf);
}

/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Write HMM to output file
 */
void
HMM::InsertCalibration(char* infile)
{
  char* line =  new(char[LINELEN]);    // input line
  char** lines = new(char*[3*L+100000]);
  int nline=0;
  int l;
  char done=0;   // inserted new 'EVD mu sigma' line?

  // Read from infile all lines and insert the EVD line with lamda and mu coefficients
  ifstream inf;
  inf.open(infile, ios::in);
  if (!inf) OpenFileError(infile);
  if (v>=2) cout<<"Recording calibration coefficients in "<<infile<<"\n";

  while (inf.getline(line,LINELEN) && !(line[0]=='/' && line[1]=='/') && nline<2*/*MAXRES*/par.maxResLen)
    {

      // Found an EVD lamda mu line? -> remove
      while (!done && !strncmp(line,"EVD ",3) && !(line[0]=='/' && line[1]=='/') && nline<2*/*MAXRES*/par.maxResLen)
	inf.getline(line,LINELEN);
      if ((line[0]=='/' && line[1]=='/') || nline>=2*/*MAXRES*/par.maxResLen)
	{fprintf(stderr,"Error: wrong format in %s. Expecting hhm format\n",infile); exit(1);}

      // Found the SEQ line? -> insert calibration before this line
      if (!done && (!strncmp("SEQ",line,3) || !strncmp("HMM",line,3)) && (isspace(line[3]) || line[3]=='\0'))
	{
	  done=1;
	  lines[nline]=new(char[128]);
	  if (!lines[nline]) MemoryError("space to read in HHM file for calibration");
	  sprintf(lines[nline],"EVD   %-7.4f %-7.4f",lamda,mu);
	  nline++;
	}
      lines[nline]=new(char[strlen(line)+1]);
      if (!lines[nline]) MemoryError("space to read in HHM file for calibration");
      strcpy (lines[nline],line);
      nline++;
    }
  inf.close();

  // Write to infile all lines
  FILE* infout=fopen(infile,"w");
  if (!infout) {
    cerr<<endl<<"WARNING in "<<program_name<<": no calibration coefficients written to "<<infile<<":\n";
    cerr<<"Could not open file for writing.\n";
    return;
  }
  for (l=0; l<nline; l++) {
    fprintf(infout,"%s\n",lines[l]);
    delete[] lines[l];  lines[l] = NULL;
  }
  fprintf(infout,"//\n");
  fclose(infout);
  delete[] line;  line = NULL;
  delete[] lines;  lines = NULL;
  return;
}

/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Write HMM to output file in HMMER format
 */
void
HMM::WriteToFileHMMER(char* outfile)
{
  const int INTSCALE=1000;  //scaling factor in HMMER files
  const float pBD=0.50;
  const int LOG2pBD=iround(fast_log2(pBD)*INTSCALE);
  const int LOG2pBM=iround(fast_log2(1-pBD)*INTSCALE);
  const float pJB=1.0/350;
  const int LOG2pJB=iround(fast_log2(pJB)*INTSCALE);
  const int LOG2pJJ=iround(fast_log2(1-pJB)*INTSCALE);
  const float pEJ=0.5;
  const int LOG2pEJ=iround(fast_log2(pEJ)*INTSCALE);
  const int LOG2pEC=iround(fast_log2(1-pEJ)*INTSCALE);
  char c;
  int i,a;

  if (trans_lin) {fprintf(stderr,"Error: Writing transition pseudocounts in linear representation not allowed. Please report this error to the HHsearch developers.\n"); exit(6);}

  FILE *outf=NULL;
  if (strcmp(outfile,"stdout"))
    {
      if (par.append) outf=fopen(outfile,"a"); else outf=fopen(outfile,"w");
      if (!outf) OpenFileError(outfile);
    }
  else
    outf = stdout;
  if (v>=2) cout<<"Writing HMM to "<<outfile<<"\n";

  fprintf(outf,"HMMER2.0  [hhmake %s]\n",VERSION_AND_DATE);
  fprintf(outf,"NAME  %s\n",file);       // base name of alignment file
  fprintf(outf,"DESC  %s\n",longname);
  fprintf(outf,"LENG  %i\n",L);
  fprintf(outf,"ALPH  Amino\n");         // amino acid seuqences (not DNA)
  fprintf(outf,"RF    yes\n");            // reference annotation flag
  fprintf(outf,"CS    yes\n");           // consensus structure annotation flag
  fprintf(outf,"MAP   yes\n");           // write MA column number after each line of aa probabilities

  fprintf(outf,"COM   ");                // print out command line
  for (i=0; i<=par.argc-1; ++i) fprintf(outf,"%s ",par.argv[i]); fprintf(outf,"\n");

  fprintf(outf,"NSEQ  %i\n",N_filtered); // print number of sequences after filtering

  // Date stamp
  time_t* tp=new(time_t);
  *tp=time(NULL);
  fprintf(outf,"DATE  %s",ctime(tp));
  delete tp; tp = NULL; /* really? FS */

  // Print out secondary structure
  if (nss_dssp>=0)
    fprintf(outf,"SSDSS %s\n",seq[nss_dssp]);
  if (nsa_dssp>=0)
    fprintf(outf,"SADSS %s\n",seq[nsa_dssp]);
  if (nss_pred>=0)
    fprintf(outf,"SSPRD %s\n",seq[nss_pred]);
  if (nss_conf>=0)
    fprintf(outf,"SSCNF %s\n",seq[nss_conf]);


  // Special Plan7 transitions that control repeated detection of profile HMM within sequence
  fprintf(outf,"XT     %6i %6i %6i %6i %6i %6i %6i %6i\n",LOG2pJB,LOG2pJJ,LOG2pEC,LOG2pEJ,LOG2pJB,LOG2pJJ,LOG2pJB,LOG2pJJ);
  fprintf(outf,"NULT      -4  -8455\n");


  // Null model background probabilities from substitution matrix
  fprintf(outf,"NULE  ");
  for (a=0; a<20; ++a)
    {
      float lg2=fast_log2(pb[s2a[a]]*20.0);
      if (lg2<-99.999) fprintf(outf,"      *"); else fprintf(outf," %6i",iround(lg2*INTSCALE));
    }
  fprintf(outf,"\n");

  // Table header line with amino acids
  fprintf(outf,"HMM    ");
  for (a=0; a<20; ++a) fprintf(outf,"     %1c ",i2aa(s2a[a]));
  fprintf(outf,"\n");

  // Table header line with state transitions
  fprintf(outf,"         m->m   m->i   m->d   i->m   i->i   d->m   d->d   b->m   m->e\n");

  // Transition probabilities from begin state
  fprintf(outf,"       %6i      * %6i\n",LOG2pBM,LOG2pBD);

  // Start loop for printing HMM columns
  int h=1, hss=1;
  for (i=1; i<=L; ++i)
    {

      // Emission probabilities for match state
      fprintf(outf," %5i",i);
      for (a=0; a<20; ++a) fprintf(outf," %6i",imax(-9999,iround(fast_log2(p[i][s2a[a]]/pb[s2a[a]])*INTSCALE)));
      fprintf(outf," %5i",l[i]);
      fprintf(outf,"\n");

      // Emission probabilities (relative to null model) for insert state
      while(islower(seq[nfirst][h]) && seq[nfirst][h]) h++;
      if (i==L)
	fprintf(outf,"     %1c      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *      *\n",seq[nfirst][h++]);
      else
	fprintf(outf,"     %1c      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0      0\n",seq[nfirst][h++]);

      // Transition probabilities
      if (nss_dssp>=0)
	{
	  while(islower(seq[nss_dssp][hss]) && seq[nss_dssp][hss]) hss++;
	  c=seq[nss_dssp][hss++];
	}
      else c=' ';
      fprintf(outf,"     %1c",c);
      if (i==1)
	{
	  for (a=0; a<=D2D; ++a) fprintf(outf," %6i",imax(-9999,iround(tr[i][a]*INTSCALE)));
	  fprintf(outf," %6i      *\n",LOG2pBM);
	}
      else if (i==L)
	{
	  for (a=0; a<=D2D; ++a) fprintf(outf,"     *");
	  fprintf(outf,"      *      0\n");
	}
      else
	{
	  for (a=0; a<=D2D; ++a) fprintf(outf," %6i",imax(-9999,iround(tr[i][a]*INTSCALE)));
	  fprintf(outf,"      *      *\n");
	}
    }

  fprintf(outf,"//\n");
  fclose(outf);
}


/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Transform log to lin transition probs
 */
void
HMM::Log2LinTransitionProbs(float beta)
{
  if (trans_lin==1) return;
  trans_lin=1;
  for (int i=0; i<=L; ++i)
    {
      for (int a=0; a<NTRANS; ++a)
	tr[i][a] = fpow2(beta*tr[i][a]);
/* FIXME valgrind says: "Conditional jump or move depends on
 * uninitialised value(s)" when using hmm iteration
 */
    }
}


/**
 * @brief Set query columns in His-tags etc to Null model distribution
 */
void
HMM::NeutralizeTags()
{
  char* qseq = seq[nfirst];
  char* pt;
  int a,i;

  if (NULL == qseq){
      return;
  }

  // Neutralize His tag
  if ( (pt=strstr(qseq,"HHHHH")) )
    {
      int i0 = pt-qseq+1;
      if (v>=2) printf("Neutralized His-tag at position %i\n",i0);
      for (i=imax(i0-5,1); i<i0; ++i)   // neutralize leading 5 columns
	for (a=0; a<NAA; ++a) p[i][a]=pb[a];
      for (; (*pt)!='H'; ++i,++pt)      // neutralize His columns
	for (a=0; a<NAA; ++a) p[i][a]=pb[a];
      i0=i;
       for (; i<imin(i0+5,L+1); ++i)    // neutralize trailing 5 columns
	for (a=0; a<NAA; ++a) p[i][a]=pb[a];
       if (v>=3) printf("start:%i  end:%i\n",imax(i0-5,1),i-1);
    }

  // Neutralize C-myc tag
  if ( (pt=strstr(qseq,"EQKLISEEDL")) )
    {
      if (v>=2) printf("Neutralized C-myc-tag at position %i\n",int(pt-qseq)+1);
      for (i=pt-qseq+1; i<=pt-qseq+10; ++i)
	for (a=0; a<NAA; ++a) p[i][a]=pb[a];
    }
  // Neutralize FLAG tag
  if ( (pt=strstr(qseq,"DYKDDDDK")) )
    {
      if (v>=2) printf("Neutralized FLAG-tag at position %i\n",int(pt-qseq)+1);
      for (i=pt-qseq+1; i<=pt-qseq+8; ++i)
	for (a=0; a<NAA; ++a) p[i][a]=pb[a];
    }
}


/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Calculate effective number of sequences using profiles INCLUDING pseudocounts
 */
float
HMM::CalcNeff()
{
  float Neff=0;
  for (int i=1; i<=L; ++i)
    for (int a=0; a<20; ++a)
      if (p[i][a]>1E-10) Neff-=p[i][a]*fast_log2(p[i][a]);
  return fpow2(Neff/L);
}


/////////////////////////////////////////////////////////////////////////////////////
/**
 * @brief Calculate consensus of HMM (needed to merge HMMs later)
 */
void
HMM::CalculateConsensus()
{
  int i;      // position in query
  int a;      // amino acid
  if (!Xcons) Xcons = new char[/*MAXRES*/par.maxResLen+2];
  for (i=1; i<=L; ++i)
    {
      float max=f[i][0]-pb[0];
      for (a=1; a<20; ++a)
	if (f[i][a]-pb[a]>max) Xcons[i]=a;
    }
  Xcons[0]=Xcons[L+1]=ENDGAP;
}

// /////////////////////////////////////////////////////////////////////////////////////
// // Store linear transition probabilities
// /////////////////////////////////////////////////////////////////////////////////////
// void HMM::StoreLinearTransitionProbs()
// {
//   int i;      // position in query
//   for (i=0; i<=L+1; ++i) if (!tr_lin[i]) tr_lin[i] = new(float[NTRANS]);
//   for (i=0; i<=L+1; ++i)
//     {
//       tr_lin[i][M2M] = fpow2(tr[i][M2M]);
//       tr_lin[i][M2I] = fpow2(tr[i][M2I]);
//       tr_lin[i][M2D] = fpow2(tr[i][M2D]);
//       tr_lin[i][D2M] = fpow2(tr[i][M2D]);
//       tr_lin[i][D2D] = fpow2(tr[i][D2D]);
//       tr_lin[i][I2M] = fpow2(tr[i][I2M]);
//       tr_lin[i][I2I] = fpow2(tr[i][I2I]);
//     }
// }


// #define Weff(Neff) (1.0+par.neffa*(Neff-1.0)+(par.neffb-4.0*par.neffa)/16.0*(Neff-1.0)*(Neff-1.0))

// /////////////////////////////////////////////////////////////////////////////////////
// // Initialize f[i][a] with query HMM
// /////////////////////////////////////////////////////////////////////////////////////
// void HMM::MergeQueryHMM(HMM& q, float wk[])
// {
//   int i;      // position in query
//   int a;      // amino acid
//   float Weff_M, Weff_D, Weff_I;
//   for (i=1; i<=L; i++)
//     {
//       Weff_M = Weff(q.Neff_M[i]-1.0);
//       Weff_D = Weff(q.Neff_D[i]-1.0);
//       Weff_I = Weff(q.Neff_I[i]-1.0);
//       for (a=0; a<20; a++) f[i][a] = q.f[i][a]*wk[i]*Weff_M;
//       tr_lin[i][M2M] = q.tr_lin[i][M2M]*wk[i]*Weff_M;
//       tr_lin[i][M2I] = q.tr_lin[i][M2I]*wk[i]*Weff_M;
//       tr_lin[i][M2D] = q.tr_lin[i][M2D]*wk[i]*Weff_M;
//       tr_lin[i][D2M] = q.tr_lin[i][D2M]*wk[i]*Weff_D;
//       tr_lin[i][D2D] = q.tr_lin[i][D2D]*wk[i]*Weff_D;
//       tr_lin[i][I2M] = q.tr_lin[i][I2M]*wk[i]*Weff_I;
//       tr_lin[i][I2I] = q.tr_lin[i][I2I]*wk[i]*Weff_I;
//     }
// }


// /////////////////////////////////////////////////////////////////////////////////////
// // Normalize probabilities in total merged super-HMM
// /////////////////////////////////////////////////////////////////////////////////////
// void HMM::NormalizeHMMandTransitionsLin2Log()
// {
//   int i;      // position in query
//   int a;      // amino acid
//   for (i=0; i<=L+1; i++)
//     {
//       float sum=0.0;
//       for (a=0; a<20; a++) sum += f[i][a];
//       for (a=0; a<20; a++) f[i][a]/=sum;
//       sum = tr_lin[i][M2M] + tr_lin[i][M2I] + tr_lin[i][M2D];
//       tr_lin[i][M2M] /= sum;
//       tr_lin[i][M2I] /= sum;
//       tr_lin[i][M2D] /= sum;
//       tr[i][M2M] = fast_log2(tr_lin[i][M2M]);
//       tr[i][M2I] = fast_log2(tr_lin[i][M2I]);
//       tr[i][M2D] = fast_log2(tr_lin[i][M2D]);
//       sum = tr_lin[i][D2M] + tr_lin[i][D2D];
//       tr_lin[i][D2M] /= sum;
//       tr_lin[i][D2D] /= sum;
//       tr[i][D2M] = fast_log2(tr_lin[i][D2M]);
//       tr[i][D2D] = fast_log2(tr_lin[i][D2D]);
//       sum = tr_lin[i][I2M] + tr_lin[i][I2I];
//       tr_lin[i][I2M] /= sum;
//       tr_lin[i][I2I] /= sum;
//       tr[i][I2M] = fast_log2(tr_lin[i][I2M]);
//       tr[i][I2I] = fast_log2(tr_lin[i][I2I]);
//    }
// }


// UNCOMMENT TO ACTIVATE COMPOSITIONALLY BIASED PSEUDOCOUNTS BY RESCALING THE RATE MATRIX

// /////////////////////////////////////////////////////////////////////////////////////
// //// Function to minimize
// /////////////////////////////////////////////////////////////////////////////////////
// double RescaleMatrixFunc(double x[])
// {
//   double sum=0.0;
//   for (int a=0; a<20; ++a)
//     {
//       double za=0.0;
//       for (int b=0; b<20; ++b) za+=P[a][b]*x[b];
//       sum += (x[a]*za-qav[a])*(x[a]*za-qav[a]);
//     }
//   return sum;
// }

// /////////////////////////////////////////////////////////////////////////////////////
// //// Gradient of function to minimize
// /////////////////////////////////////////////////////////////////////////////////////
// void RescaleMatrixFuncGrad(double grad[], double x[])
// {
//   double z[20] = {0.0};
//   double w[20];
//   double tmp;
//   for (int a=0; a<20; ++a)
//     for (int b=0; b<20; ++b) z[a] += P[a][b]*x[b];

//   for (int a=0; a<20; ++a) w[a] = x[a]*z[a]-qav[a];
//   for (int a=0; a<20; ++a)
//     {
//       tmp = w[a]*z[a];
//       for (int b=0; b<20; ++b) tmp += P[a][b]*x[b]*w[b];
//       grad[a] = 2.0*tmp;
//     }
//   return;
// }


// /////////////////////////////////////////////////////////////////////////////////////
// //// Rescale a substitution matrix to biased aa frequencies in global vector qav[a]
// /////////////////////////////////////////////////////////////////////////////////////
// void HMM::RescaleMatrix()
// {
//   int a,b;
//   int code;
//   double x[21];     // scaling factor
//   double val_min;
//   const int len=20;
//   const int max_iterations=50;

//   if (v>=2) printf("Adjusting rate matrix to query amino acid composition ...\n");

//   // Put amino acid frequencies into global array (needed to call WNLIB's conjugate gradient method)
//   for (a=0; a<20; ++a) qav[a] = pav[a];

//   // Initialize scaling factors x[a]
//   for (a=0; a<20; ++a) x[a]=pow(qav[a]/pb[a],0.73);  // Initialize

//   // Call conjugate gradient minimization method from WNLIB
//   wn_conj_gradient_method(&code,&val_min,x,len,&RescaleMatrixFunc,&RescaleMatrixFuncGrad,max_iterations);


//   // Calculate z[a] = sum_b Pab*xb
//   float sum_err=0.0f;
//   float sum = 0.0f;
//   for (a=0; a<20; ++a)
//     {
//       float za=0.0f; // za = sum_b Pab*xb
//       for (b=0; b<20; ++b) za+=P[a][b]*x[b];
//       sum_err += (x[a]*za/qav[a]-1)*(x[a]*za/qav[a]-1);
//       sum += x[a]*za;
//    }
//   if (sum_err>1e-3 & v>=1) fprintf(stderr,"WARNING: adjusting rate matrix by CG resulted in residual error of %5.3f.\n",sum_err);

//   // Rescale rate matrix
//   for (a=0; a<20; ++a)
//     for (b=0; b<20; ++b)
//       {
// 	P[a][b] *= x[a]*x[b]/sum;
// 	R[a][b] = P[a][b]/qav[b];
//       }

//   // How well approximated?
//   if (v>=3)
//     {
//       // Calculate z[a] = sum_b Pab*xb
//       float z[21];
//       for (a=0; a<20; ++a)
// 	for (z[a]=0.0, b=0; b<20; ++b) z[a]+=P[a][b];
//       printf("Adjust   A    R    N    D    C    Q    E    G    H    I    L    K    M    F    P    S    T    W    Y    V\nErr?  ");
//       for (a=0; a<20; ++a) printf("%4.0f ",1000*z[a]/qav[a]);
//       cout<<endl<<"xa    ";
//       for (a=0; a<20; ++a) fprintf(stdout,"%4.2f ",x[a]);
//       cout<<endl;
//     }

//   // Evaluate sequence identity underlying substitution matrix
//   if (v>=3)
//     {
//       float id=0.0f;
//       float entropy=0.0f;
//       float entropy_qav=0.0f;
//       float mut_info=0.0f;
//       for (a=0; a<20; ++a) id += P[a][a];
//       for (a=0; a<20; ++a)  entropy_qav-=qav[a]*fast_log2(qav[a]);
//       for (a=0; a<20; ++a)
// 	  for (b=0; b<20; ++b)
// 	    {
// 	      entropy-=P[a][b]*fast_log2(R[a][b]);
// 	      mut_info += P[a][b]*fast_log2(P[a][b]/qav[a]/qav[b]);
// 	    }

//       fprintf(stdout,"Rescaling rate matrix: sequence identity = %2.0f%%; entropy per column = %4.2f bits (out of %4.2f); mutual information = %4.2f bits\n",100*id,entropy,entropy_qav,mut_info);
//     }
//   return;
// }


/* @* HMM::ClobberGlobal (eg, q,t)
 */
void
HMM::ClobberGlobal(void){

  for (int i = 0; i < n_display; i++){
    if (sname[i]){
      delete[] sname[i]; sname[i] = NULL;
    }
    if (seq[i]){
      delete[] seq[i]; seq[i] = NULL;
    }
  }
  Neff_M[0] = Neff_I[0] = Neff_D[0] = 0.0;
  longname[0] = '\0'; file[0] = '\0';
  ss_dssp[0] = sa_dssp[0] = ss_pred[0] = ss_conf[0] = '\0';
  Xcons   = NULL;
  l[0] = 0;
  L = 0;
  Neff_HMM = 0;
  n_display = N_in = N_filtered = 0;
  nss_dssp = nsa_dssp = nss_pred = nss_conf = nfirst = ncons = -1;
  lamda = 0.0; mu = 0.0;
  name[0] = longname[0] = fam[0] = '\0';

  for (int i = 0; i < NAA; i++){
    pav[i] = 0;
  }

  /* @= */
  return;

} /* this is the end of ClobberGlobal() */


/*
 * EOF hhhmm-C.h
 */
author	clustalomega
date	Fri, 22 Jul 2011 09:09:02 -0400
parents	bc707542e5de
children