diff spring_package/tmalign/TMalign.cpp @ 17:c790d25086dc draft

"planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
author guerler
date Wed, 28 Oct 2020 05:11:56 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spring_package/tmalign/TMalign.cpp	Wed Oct 28 05:11:56 2020 +0000
@@ -0,0 +1,5137 @@
+/* TM-align: sequence-independent structure alignment of monomer proteins by
+ * TM-score superposition. Please report issues to yangzhanglab@umich.edu
+ * 
+ * References to cite:
+ * Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005)
+ *
+ * DISCLAIMER:
+ *  Permission to use, copy, modify, and distribute the Software for any
+ *  purpose, with or without fee, is hereby granted, provided that the
+ *  notices on the head, the reference information, and this copyright
+ *  notice appear in all copies or substantial portions of the Software.
+ *  It is provided "as is" without express or implied warranty.
+ *
+ * ==========================
+ * How to install the program
+ * ==========================
+ * The following command compiles the program in your Linux computer:
+ *
+ *     g++ -static -O3 -ffast-math -lm -o TMalign TMalign.cpp
+ *
+ * The '-static' flag should be removed on Mac OS, which does not support
+ * building static executables.
+ *
+ * ======================
+ * How to use the program
+ * ======================
+ * You can run the program without argument to obtain the document.
+ * Briefly, you can compare two structures by:
+ *
+ *     ./TMalign structure1.pdb structure2.pdb
+ *
+ * ==============
+ * Update history
+ * ==============
+ * 2012/01/24: A C/C++ code of TM-align was constructed by Jianyi Yang
+ * 2016/05/21: Several updates of this program were made by Jianji Wu:
+ *            (1) fixed several compiling bugs
+ *            (2) made I/O of C/C++ version consistent with the Fortran version
+ *            (3) added outputs including full-atom and ligand structures
+ *            (4) added options of '-i', '-I' and '-m'
+ * 2016/05/25: Fixed a bug on PDB file reading
+ * 2018/06/04: Several updates were made by Chengxin Zhang, including
+ *            (1) Fixed bug in reading PDB files with negative residue index,
+ *            (2) Implemented the fTM-align algorithm (by the '-fast' option)
+ *                as described in R Dong, S Pan, Z Peng, Y Zhang, J Yang
+ *                (2018) Nucleic acids research. gky430.
+ *            (3) Included option to perform TM-align against a whole 
+ *                folder of PDB files. A full list of options not available
+ *                in the Fortran version can be explored by TMalign -h
+ * 2018/07/27: Added the -byresi option for TM-score superposition without
+ *             re-alignment as in TMscore and TMscore -c
+ * 2018/08/07: Added the -dir option
+ * 2018/08/14: Added the -split option
+ * 2018/08/16: Added the -infmt1, -infmt2 options.
+ * 2019/01/07: Added support for PDBx/mmCIF format.
+ * 2019/02/09: Fixed asymmetric alignment bug.
+ * 2019/03/17: Added the -cp option for circular permutation
+ * 2019/07/23: Supported RasMol output by '-o' option
+ * 2019/07/24: Fixed bug on PyMOL format output by '-o' option with mmCIF input
+ * 2019/08/18: Fixed bug on RasMol format output file *_atm. Removed excessive
+ *             circular permutation alignment by -cp
+ * 2019/08/20: Clarified PyMOL syntax.
+ * 2019/08/22: Added four additional PyMOL scripts.
+ */
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <string.h>
+#include <malloc/malloc.h>
+#include <sstream>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <vector>
+#include <iterator>
+#include <algorithm>
+#include <string>
+#include <map>
+
+using namespace std;
+
+void print_version()
+{
+    cout << 
+"\n"
+" *********************************************************************\n"
+" * TM-align (Version 20190822): protein structure alignment          *\n"
+" * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) *\n"
+" * Please email comments and suggestions to yangzhanglab@umich.edu   *\n"
+" *********************************************************************"
+    << endl;
+}
+
+void print_extra_help()
+{
+    cout <<
+"Additional options:\n"
+"    -dir     Perform all-against-all alignment among the list of PDB\n"
+"             chains listed by 'chain_list' under 'chain_folder'. Note\n"
+"             that the slash is necessary.\n"
+"             $ TMalign -dir chain_folder/ chain_list\n"
+"\n"
+"    -dir1    Use chain2 to search a list of PDB chains listed by 'chain1_list'\n"
+"             under 'chain1_folder'. Note that the slash is necessary.\n"
+"             $ TMalign -dir1 chain1_folder/ chain1_list chain2\n"
+"\n"
+"    -dir2    Use chain1 to search a list of PDB chains listed by 'chain2_list'\n"
+"             under 'chain2_folder'\n"
+"             $ TMalign chain1 -dir2 chain2_folder/ chain2_list\n"
+"\n"
+"    -suffix  (Only when -dir1 and/or -dir2 are set, default is empty)\n"
+"             add file name suffix to files listed by chain1_list or chain2_list\n"
+"\n"
+"    -atom    4-character atom name used to represent a residue.\n"
+"             Default is \" CA \" for proteins\n"
+"             (note the spaces before and after CA).\n"
+"\n"
+"    -ter     Strings to mark the end of a chain\n"
+"             3: (default) TER, ENDMDL, END or different chain ID\n"
+"             2: ENDMDL, END, or different chain ID\n"
+"             1: ENDMDL or END\n"
+"             0: (default in the first C++ TMalign) end of file\n"
+"\n"
+"    -split   Whether to split PDB file into multiple chains\n"
+"             0: (default) treat the whole structure as one single chain\n"
+"             1: treat each MODEL as a separate chain (-ter should be 0)\n"
+"             2: treat each chain as a seperate chain (-ter should be <=1)\n"
+"\n"
+"    -outfmt  Output format\n"
+"             0: (default) full output\n"
+"             1: fasta format compact output\n"
+"             2: tabular format very compact output\n"
+"            -1: full output, but without version or citation information\n"
+"\n"
+"    -byresi  Whether to assume residue index correspondence between the\n" 
+"             two structures.\n"
+"             0: (default) sequence independent alignment\n"
+"             1: (same as TMscore program) sequence-dependent superposition,\n"
+"                i.e. align by residue index\n"
+"             2: (same as TMscore -c, should be used with -ter <=1)\n"
+"                align by residue index and chain ID\n"
+"             3: (similar to TMscore -c, should be used with -ter <=1)\n"
+"                align by residue index and order of chain\n"
+"\n"
+"    -TMcut   -1: (default) do not consider TMcut\n"
+"             Values in [0.5,1): Do not proceed with TM-align for this\n"
+"                 structure pair if TM-score is unlikely to reach TMcut.\n"
+"                 TMcut is normalized is set by -a option:\n"
+"                 -2: normalized by longer structure length\n"
+"                 -1: normalized by shorter structure length\n"
+"                  0: (default, same as F) normalized by second structure\n"
+"                  1: same as T, normalized by average structure length\n"
+"\n"
+"    -mirror  Whether to align the mirror image of input structure\n"
+"             0: (default) do not align mirrored structure\n"
+"             1: align mirror of chain1 to origin chain2\n"
+"\n"
+"    -het     Whether to align residues marked as 'HETATM' in addition to 'ATOM  '\n"
+"             0: (default) only align 'ATOM  ' residues\n"
+"             1: align both 'ATOM  ' and 'HETATM' residues\n"
+"\n"
+"    -infmt1  Input format for chain1\n"
+"    -infmt2  Input format for chain2\n"
+"            -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
+"             0: PDB format\n"
+"             1: SPICKER format\n"
+"             2: xyz format\n"
+"             3: PDBx/mmCIF format\n"
+    <<endl;
+}
+
+void print_help(bool h_opt=false)
+{
+    print_version();
+    cout <<
+"\n"
+"Usage: TMalign PDB1.pdb PDB2.pdb [Options]\n"
+"\n"
+"Options:\n"
+"    -u    TM-score normalized by user assigned length (the same as -L)\n"
+"          warning: it should be >= minimum length of the two structures\n"
+"          otherwise, TM-score may be >1\n"
+"\n"
+"    -a    TM-score normalized by the average length of two structures\n"
+"          T or F, (default F)\n"
+"\n"
+"    -i    Start with an alignment specified in fasta file 'align.txt'\n"
+"\n"
+"    -I    Stick to the alignment specified in 'align.txt'\n"
+"\n"
+"    -m    Output TM-align rotation matrix\n"
+"\n"
+"    -d    TM-score scaled by an assigned d0, e.g. 5 Angstroms\n"
+"\n"
+"    -o    Output the superposition to 'TM_sup*'\n"
+"            $ TMalign PDB1.pdb PDB2.pdb -o TM_sup\n"
+"          View superposed C-alpha traces of aligned regions by RasMol or PyMOL:\n"
+"            $ rasmol -script TM_sup\n"
+"            $ pymol -d @TM_sup.pml\n"
+"          View superposed C-alpha traces of all regions:\n"
+"            $ rasmol -script TM_sup_all\n"
+"            $ pymol -d @TM_sup_all.pml\n"
+"          View superposed full-atom structures of aligned regions:\n"
+"            $ rasmol -script TM_sup_atm\n"
+"            $ pymol -d @TM_sup_atm.pml\n"
+"          View superposed full-atom structures of all regions:\n"
+"            $ rasmol -script TM_sup_all_atm\n"
+"            $ pymol -d @TM_sup_all_atm.pml\n"
+"          View superposed full-atom structures and ligands of all regions\n"
+"            $ rasmol -script TM_sup_all_atm_lig\n"
+"            $ pymol -d @TM_sup_all_atm_lig.pml\n"
+"\n"
+" -fast    Fast but slightly inaccurate alignment by fTM-align algorithm\n"
+"\n"
+"   -cp    Alignment with circular permutation\n"
+"\n"
+"    -v    Print the version of TM-align\n"
+"\n"
+"    -h    Print the full help message, including additional options\n"
+"\n"
+"    (Options -u, -a, -d, -o will not change the final structure alignment)\n\n"
+"Example usages:\n"
+"    TMalign PDB1.pdb PDB2.pdb\n"
+"    TMalign PDB1.pdb PDB2.pdb -u 100 -d 5.0\n"
+"    TMalign PDB1.pdb PDB2.pdb -a T -o PDB1.sup\n"
+"    TMalign PDB1.pdb PDB2.pdb -i align.txt\n"
+"    TMalign PDB1.pdb PDB2.pdb -m matrix.txt\n"
+"    TMalign PDB1.pdb PDB2.pdb -fast\n"
+"    TMalign PDB1.pdb PDB2.pdb -cp\n"
+    <<endl;
+
+    if (h_opt) print_extra_help();
+
+    exit(EXIT_SUCCESS);
+}
+
+
+/* Functions for the core TMalign algorithm, including the entry function
+ * TMalign_main */
+
+void PrintErrorAndQuit(const string sErrorString)
+{
+    cout << sErrorString << endl;
+    exit(1);
+}
+
+template <typename T> inline T getmin(const T &a, const T &b)
+{
+    return b<a?b:a;
+}
+
+template <class A> void NewArray(A *** array, int Narray1, int Narray2)
+{
+    *array=new A* [Narray1];
+    for(int i=0; i<Narray1; i++) *(*array+i)=new A [Narray2];
+}
+
+template <class A> void DeleteArray(A *** array, int Narray)
+{
+    for(int i=0; i<Narray; i++)
+        if(*(*array+i)) delete [] *(*array+i);
+    if(Narray) delete [] (*array);
+    (*array)=NULL;
+}
+
+string AAmap(char A)
+{
+    if (A=='A') return "ALA";
+    if (A=='B') return "ASX";
+    if (A=='C') return "CYS";
+    if (A=='D') return "ASP";
+    if (A=='E') return "GLU";
+    if (A=='F') return "PHE";
+    if (A=='G') return "GLY";
+    if (A=='H') return "HIS";
+    if (A=='I') return "ILE";
+    if (A=='K') return "LYS";
+    if (A=='L') return "LEU";
+    if (A=='M') return "MET";
+    if (A=='N') return "ASN";
+    if (A=='O') return "PYL";
+    if (A=='P') return "PRO";
+    if (A=='Q') return "GLN";
+    if (A=='R') return "ARG";
+    if (A=='S') return "SER";
+    if (A=='T') return "THR";
+    if (A=='U') return "SEC";
+    if (A=='V') return "VAL";
+    if (A=='W') return "TRP";    
+    if (A=='Y') return "TYR";
+    if (A=='Z') return "GLX";
+    return "UNK";
+}
+
+char AAmap(const string &AA)
+{
+    if (AA.compare("ALA")==0 || AA.compare("DAL")==0) return 'A';
+    if (AA.compare("ASX")==0) return 'B';
+    if (AA.compare("CYS")==0 || AA.compare("DCY")==0) return 'C';
+    if (AA.compare("ASP")==0 || AA.compare("DAS")==0) return 'D';
+    if (AA.compare("GLU")==0 || AA.compare("DGL")==0) return 'E';
+    if (AA.compare("PHE")==0 || AA.compare("DPN")==0) return 'F';
+    if (AA.compare("GLY")==0) return 'G';
+    if (AA.compare("HIS")==0 || AA.compare("DHI")==0) return 'H';
+    if (AA.compare("ILE")==0 || AA.compare("DIL")==0) return 'I';
+    if (AA.compare("LYS")==0 || AA.compare("DLY")==0) return 'K';
+    if (AA.compare("LEU")==0 || AA.compare("DLE")==0) return 'L';
+    if (AA.compare("MET")==0 || AA.compare("MED")==0 ||
+        AA.compare("MSE")==0) return 'M';
+    if (AA.compare("ASN")==0 || AA.compare("DSG")==0) return 'N';
+    if (AA.compare("PYL")==0) return 'O';
+    if (AA.compare("PRO")==0 || AA.compare("DPR")==0) return 'P';
+    if (AA.compare("GLN")==0 || AA.compare("DGN")==0) return 'Q';
+    if (AA.compare("ARG")==0 || AA.compare("DAR")==0) return 'R';
+    if (AA.compare("SER")==0 || AA.compare("DSN")==0) return 'S';
+    if (AA.compare("THR")==0 || AA.compare("DTH")==0) return 'T';
+    if (AA.compare("SEC")==0) return 'U';
+    if (AA.compare("VAL")==0 || AA.compare("DVA")==0) return 'V';
+    if (AA.compare("TRP")==0 || AA.compare("DTR")==0) return 'W';    
+    if (AA.compare("TYR")==0 || AA.compare("DTY")==0) return 'Y';
+    if (AA.compare("GLX")==0) return 'Z';
+    return 'X';
+}
+
+/* split a long string into vectors by whitespace 
+ * line          - input string
+ * line_vec      - output vector 
+ * delimiter     - delimiter */
+void split(const string &line, vector<string> &line_vec,
+    const char delimiter=' ')
+{
+    bool within_word = false;
+    for (int pos=0;pos<line.size();pos++)
+    {
+        if (line[pos]==delimiter)
+        {
+            within_word = false;
+            continue;
+        }
+        if (!within_word)
+        {
+            within_word = true;
+            line_vec.push_back("");
+        }
+        line_vec.back()+=line[pos];
+    }
+}
+
+/* split a long string into vectors by whitespace, return both whitespaces
+ * and non-whitespaces
+ * line          - input string
+ * line_vec      - output vector
+ * space_vec     - output vector 
+ * delimiter     - delimiter */
+void split_white(const string &line, vector<string> &line_vec,
+    vector<string>&white_vec, const char delimiter=' ')
+{
+    bool within_word = false;
+    for (int pos=0;pos<line.size();pos++)
+    {
+        if (line[pos]==delimiter)
+        {
+            if (within_word==true)
+            {
+                white_vec.push_back("");
+                within_word = false;
+            }
+            white_vec.back()+=delimiter;
+        }
+        else
+        {
+            if (within_word==false)
+            {
+                line_vec.push_back("");
+                within_word = true;
+            }
+            line_vec.back()+=line[pos];
+        }
+    }
+}
+
+size_t get_PDB_lines(const string filename,
+    vector<vector<string> >&PDB_lines, vector<string> &chainID_list,
+    vector<int> &mol_vec, const int ter_opt, const int infmt_opt,
+    const string atom_opt, const int split_opt, const int het_opt)
+{
+    size_t i=0; // resi i.e. atom index
+    string line;
+    char chainID=0;
+    string resi="";
+    bool select_atom=false;
+    size_t model_idx=0;
+    vector<string> tmp_str_vec;
+    
+    ifstream fin;
+    fin.open(filename.c_str());
+
+    if (infmt_opt==0||infmt_opt==-1) // PDB format
+    {
+        while (fin.good())
+        {
+            getline(fin, line);
+            if (infmt_opt==-1 && line.compare(0,5,"loop_")==0) // PDBx/mmCIF
+                return get_PDB_lines(filename,PDB_lines,chainID_list,
+                    mol_vec, ter_opt, 3, atom_opt, split_opt,het_opt);
+            if (i > 0)
+            {
+                if      (ter_opt>=1 && line.compare(0,3,"END")==0) break;
+                else if (ter_opt>=3 && line.compare(0,3,"TER")==0) break;
+            }
+            if (split_opt && line.compare(0,3,"END")==0) chainID=0;
+            if ((line.compare(0, 6, "ATOM  ")==0 || 
+                (line.compare(0, 6, "HETATM")==0 && het_opt))
+                && line.size()>=54 && (line[16]==' ' || line[16]=='A'))
+            {
+                if (atom_opt=="auto")
+                        select_atom=(line.compare(12,4," CA ")==0);
+                else    select_atom=(line.compare(12,4,atom_opt)==0);
+                if (select_atom)
+                {
+                    if (!chainID)
+                    {
+                        chainID=line[21];
+                        model_idx++;
+                        stringstream i8_stream;
+                        i=0;
+                        if (split_opt==2) // split by chain
+                        {
+                            if (chainID==' ')
+                            {
+                                if (ter_opt>=1) i8_stream << ":_";
+                                else i8_stream<<':'<<model_idx<<":_";
+                            }
+                            else
+                            {
+                                if (ter_opt>=1) i8_stream << ':' << chainID;
+                                else i8_stream<<':'<<model_idx<<':'<<chainID;
+                            }
+                            chainID_list.push_back(i8_stream.str());
+                        }
+                        else if (split_opt==1) // split by model
+                        {
+                            i8_stream << ':' << model_idx;
+                            chainID_list.push_back(i8_stream.str());
+                        }
+                        PDB_lines.push_back(tmp_str_vec);
+                        mol_vec.push_back(0);
+                    }
+                    else if (ter_opt>=2 && chainID!=line[21]) break;
+                    if (split_opt==2 && chainID!=line[21])
+                    {
+                        chainID=line[21];
+                        i=0;
+                        stringstream i8_stream;
+                        if (chainID==' ')
+                        {
+                            if (ter_opt>=1) i8_stream << ":_";
+                            else i8_stream<<':'<<model_idx<<":_";
+                        }
+                        else
+                        {
+                            if (ter_opt>=1) i8_stream << ':' << chainID;
+                            else i8_stream<<':'<<model_idx<<':'<<chainID;
+                        }
+                        chainID_list.push_back(i8_stream.str());
+                        PDB_lines.push_back(tmp_str_vec);
+                        mol_vec.push_back(0);
+                    }
+
+                    if (resi==line.substr(22,5))
+                        cerr<<"Warning! Duplicated residue "<<resi<<endl;
+                    resi=line.substr(22,5); // including insertion code
+
+                    PDB_lines.back().push_back(line);
+                    if (line[17]==' ' && (line[18]=='D'||line[18]==' ')) mol_vec.back()++;
+                    else mol_vec.back()--;
+                    i++;
+                }
+            }
+        }
+    }
+    else if (infmt_opt==1) // SPICKER format
+    {
+        int L=0;
+        float x,y,z;
+        stringstream i8_stream;
+        while (fin.good())
+        {
+            fin   >>L>>x>>y>>z;
+            getline(fin, line);
+            if (!fin.good()) break;
+            model_idx++;
+            stringstream i8_stream;
+            i8_stream << ':' << model_idx;
+            chainID_list.push_back(i8_stream.str());
+            PDB_lines.push_back(tmp_str_vec);
+            mol_vec.push_back(0);
+            for (i=0;i<L;i++)
+            {
+                fin   >>x>>y>>z;
+                i8_stream<<"ATOM   "<<setw(4)<<i+1<<"  CA  UNK  "<<setw(4)
+                    <<i+1<<"    "<<setiosflags(ios::fixed)<<setprecision(3)
+                    <<setw(8)<<x<<setw(8)<<y<<setw(8)<<z;
+                line=i8_stream.str();
+                i8_stream.str(string());
+                PDB_lines.back().push_back(line);
+            }
+            getline(fin, line);
+        }
+    }
+    else if (infmt_opt==2) // xyz format
+    {
+        int L=0;
+        char A;
+        stringstream i8_stream;
+        while (fin.good())
+        {
+            getline(fin, line);
+            L=atoi(line.c_str());
+            getline(fin, line);
+            for (i=0;i<line.size();i++)
+                if (line[i]==' '||line[i]=='\t') break;
+            if (!fin.good()) break;
+            chainID_list.push_back(':'+line.substr(0,i));
+            PDB_lines.push_back(tmp_str_vec);
+            mol_vec.push_back(0);
+            for (i=0;i<L;i++)
+            {
+                getline(fin, line);
+                i8_stream<<"ATOM   "<<setw(4)<<i+1<<"  CA  "
+                    <<AAmap(line[0])<<"  "<<setw(4)<<i+1<<"    "
+                    <<line.substr(2,8)<<line.substr(11,8)<<line.substr(20,8);
+                line=i8_stream.str();
+                i8_stream.str(string());
+                PDB_lines.back().push_back(line);
+                if (line[0]>='a' && line[0]<='z') mol_vec.back()++; // RNA
+                else mol_vec.back()--;
+            }
+        }
+    }
+    else if (infmt_opt==3) // PDBx/mmCIF format
+    {
+        bool loop_ = false; // not reading following content
+        map<string,int> _atom_site;
+        int atom_site_pos;
+        vector<string> line_vec;
+        string alt_id=".";  // alternative location indicator
+        string asym_id="."; // this is similar to chainID, except that
+                            // chainID is char while asym_id is a string
+                            // with possibly multiple char
+        string prev_asym_id="";
+        string AA="";       // residue name
+        string atom="";
+        string prev_resi="";
+        string model_index=""; // the same as model_idx but type is string
+        stringstream i8_stream;
+        while (fin.good())
+        {
+            getline(fin, line);
+            if (line.size()==0) continue;
+            if (loop_) loop_ = line.compare(0,2,"# ");
+            if (!loop_)
+            {
+                if (line.compare(0,5,"loop_")) continue;
+                while(1)
+                {
+                    if (fin.good()) getline(fin, line);
+                    else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
+                    if (line.size()) break;
+                }
+                if (line.compare(0,11,"_atom_site.")) continue;
+
+                loop_=true;
+                _atom_site.clear();
+                atom_site_pos=0;
+                _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
+
+                while(1)
+                {
+                    if (fin.good()) getline(fin, line);
+                    else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
+                    if (line.size()==0) continue;
+                    if (line.compare(0,11,"_atom_site.")) break;
+                    _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
+                }
+
+
+                if (_atom_site.count("group_PDB")*
+                    _atom_site.count("label_atom_id")*
+                    _atom_site.count("label_comp_id")*
+                   (_atom_site.count("auth_asym_id")+
+                    _atom_site.count("label_asym_id"))*
+                   (_atom_site.count("auth_seq_id")+
+                    _atom_site.count("label_seq_id"))*
+                    _atom_site.count("Cartn_x")*
+                    _atom_site.count("Cartn_y")*
+                    _atom_site.count("Cartn_z")==0)
+                {
+                    loop_ = false;
+                    cerr<<"Warning! Missing one of the following _atom_site data items: group_PDB, label_atom_id, label_atom_id, auth_asym_id/label_asym_id, auth_seq_id/label_seq_id, Cartn_x, Cartn_y, Cartn_z"<<endl;
+                    continue;
+                }
+            }
+
+            line_vec.clear();
+            split(line,line_vec);
+            if (line_vec[_atom_site["group_PDB"]]!="ATOM" && (het_opt==0 ||
+                line_vec[_atom_site["group_PDB"]]!="HETATM")) continue;
+            
+            alt_id=".";
+            if (_atom_site.count("label_alt_id")) // in 39.4 % of entries
+                alt_id=line_vec[_atom_site["label_alt_id"]];
+            if (alt_id!="." && alt_id!="A") continue;
+
+            atom=line_vec[_atom_site["label_atom_id"]];
+            if (atom[0]=='"') atom=atom.substr(1);
+            if (atom.size() && atom[atom.size()-1]=='"')
+                atom=atom.substr(0,atom.size()-1);
+            if (atom.size()==0) continue;
+            if      (atom.size()==1) atom=" "+atom+"  ";
+            else if (atom.size()==2) atom=" "+atom+" "; // wrong for sidechain H
+            else if (atom.size()==3) atom=" "+atom;
+            else if (atom.size()>=5) continue;
+
+            AA=line_vec[_atom_site["label_comp_id"]]; // residue name
+            if      (AA.size()==1) AA="  "+AA;
+            else if (AA.size()==2) AA=" " +AA;
+            else if (AA.size()>=4) continue;
+
+            if (atom_opt=="auto")
+                    select_atom=(atom==" CA ");
+            else    select_atom=(atom==atom_opt);
+
+            if (!select_atom) continue;
+
+            if (_atom_site.count("auth_asym_id"))
+                 asym_id=line_vec[_atom_site["auth_asym_id"]];
+            else asym_id=line_vec[_atom_site["label_asym_id"]];
+            if (asym_id==".") asym_id=" ";
+            
+            if (_atom_site.count("pdbx_PDB_model_num") && 
+                model_index!=line_vec[_atom_site["pdbx_PDB_model_num"]])
+            {
+                model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
+                if (PDB_lines.size() && ter_opt>=1) break;
+                if (PDB_lines.size()==0 || split_opt>=1)
+                {
+                    PDB_lines.push_back(tmp_str_vec);
+                    mol_vec.push_back(0);
+                    prev_asym_id=asym_id;
+
+                    if (split_opt==1 && ter_opt==0) chainID_list.push_back(
+                        ':'+model_index);
+                    else if (split_opt==2 && ter_opt==0)
+                        chainID_list.push_back(':'+model_index+':'+asym_id);
+                    else if (split_opt==2 && ter_opt==1)
+                        chainID_list.push_back(':'+asym_id);
+                }
+            }
+
+            if (prev_asym_id!=asym_id)
+            {
+                if (prev_asym_id!="" && ter_opt>=2) break;
+                if (split_opt>=2)
+                {
+                    PDB_lines.push_back(tmp_str_vec);
+                    mol_vec.push_back(0);
+
+                    if (split_opt==1 && ter_opt==0) chainID_list.push_back(
+                        ':'+model_index);
+                    else if (split_opt==2 && ter_opt==0)
+                        chainID_list.push_back(':'+model_index+':'+asym_id);
+                    else if (split_opt==2 && ter_opt==1)
+                        chainID_list.push_back(':'+asym_id);
+                }
+            }
+            if (prev_asym_id!=asym_id) prev_asym_id=asym_id;
+
+            if (AA[0]==' ' && (AA[1]=='D'||AA[1]==' ')) mol_vec.back()++;
+            else mol_vec.back()--;
+
+            if (_atom_site.count("auth_seq_id"))
+                 resi=line_vec[_atom_site["auth_seq_id"]];
+            else resi=line_vec[_atom_site["label_seq_id"]];
+            if (_atom_site.count("pdbx_PDB_ins_code") && 
+                line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                resi+=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+            else resi+=" ";
+
+            if (prev_resi==resi)
+                cerr<<"Warning! Duplicated residue "<<resi<<endl;
+            prev_resi=resi;
+
+            i++;
+            i8_stream<<"ATOM  "
+                <<setw(5)<<i<<" "<<atom<<" "<<AA<<" "<<asym_id[0]
+                <<setw(5)<<resi.substr(0,5)<<"   "
+                <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                <<setw(8)<<line_vec[_atom_site["Cartn_z"]];
+            PDB_lines.back().push_back(i8_stream.str());
+            i8_stream.str(string());
+        }
+        _atom_site.clear();
+        line_vec.clear();
+        alt_id.clear();
+        asym_id.clear();
+        AA.clear();
+    }
+
+    fin.close();
+    line.clear();
+    if (!split_opt) chainID_list.push_back("");
+    return PDB_lines.size();
+}
+
+/* read fasta file from filename. sequence is stored into FASTA_lines
+ * while sequence name is stored into chainID_list.
+ * if ter_opt >=1, only read the first sequence.
+ * if ter_opt ==0, read all sequences.
+ * if split_opt >=1 and ter_opt ==0, each sequence is a separate entry.
+ * if split_opt ==0 and ter_opt ==0, all sequences are combined into one */
+size_t get_FASTA_lines(const string filename,
+    vector<vector<string> >&FASTA_lines, vector<string> &chainID_list,
+    vector<int> &mol_vec, const int ter_opt=3, const int split_opt=0)
+{
+    string line;
+    vector<string> tmp_str_vec;
+    int l;
+    
+    ifstream fin;
+    fin.open(filename.c_str());
+
+    while (fin.good())
+    {
+        getline(fin, line);
+        if (line.size()==0 || line[0]=='#') continue;
+
+        if (line[0]=='>')
+        {
+            if (FASTA_lines.size())
+            {
+                if (ter_opt) break;
+                if (split_opt==0) continue;
+            }
+            FASTA_lines.push_back(tmp_str_vec);
+            FASTA_lines.back().push_back("");
+            mol_vec.push_back(0);
+            if (ter_opt==0 && split_opt)
+            {
+                line[0]=':';
+                chainID_list.push_back(line);
+            }
+            else chainID_list.push_back("");
+        }
+        else
+        {
+            FASTA_lines.back()[0]+=line;
+            for (l=0;l<line.size();l++) mol_vec.back()+=
+                ('a'<=line[l] && line[l]<='z')-('A'<=line[l] && line[l]<='Z');
+        }
+    }
+
+    line.clear();
+    fin.close();
+    return FASTA_lines.size();
+}
+
+
+/* extract pairwise sequence alignment from residue index vectors,
+ * assuming that "sequence" contains two empty strings.
+ * return length of alignment, including gap. */
+int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy,
+    const vector<string> resi_vec1, const vector<string> resi_vec2,
+    const int byresi_opt)
+{
+    sequence.clear();
+    sequence.push_back("");
+    sequence.push_back("");
+
+    int i1=0; // positions in resi_vec1
+    int i2=0; // positions in resi_vec2
+    int xlen=resi_vec1.size();
+    int ylen=resi_vec2.size();
+    map<char,int> chainID_map1;
+    map<char,int> chainID_map2;
+    if (byresi_opt==3)
+    {
+        vector<char> chainID_vec;
+        char chainID;
+        int i;
+        for (i=0;i<xlen;i++)
+        {
+            chainID=resi_vec1[i][5];
+            if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
+            {
+                chainID_vec.push_back(chainID);
+                chainID_map1[chainID]=chainID_vec.size();
+            }
+        }
+        chainID_vec.clear();
+        for (i=0;i<ylen;i++)
+        {
+            chainID=resi_vec2[i][5];
+            if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
+            {
+                chainID_vec.push_back(chainID);
+                chainID_map2[chainID]=chainID_vec.size();
+            }
+        }
+        chainID_vec.clear();
+    }
+    while(i1<xlen && i2<ylen)
+    {
+        if ((byresi_opt<=2 && resi_vec1[i1]==resi_vec2[i2]) || (byresi_opt==3
+             && resi_vec1[i1].substr(0,5)==resi_vec2[i2].substr(0,5)
+             && chainID_map1[resi_vec1[i1][5]]==chainID_map2[resi_vec2[i2][5]]))
+        {
+            sequence[0]+=seqx[i1++];
+            sequence[1]+=seqy[i2++];
+        }
+        else if (atoi(resi_vec1[i1].substr(0,4).c_str())<=
+                 atoi(resi_vec2[i2].substr(0,4).c_str()))
+        {
+            sequence[0]+=seqx[i1++];
+            sequence[1]+='-';
+        }
+        else
+        {
+            sequence[0]+='-';
+            sequence[1]+=seqy[i2++];
+        }
+    }
+    chainID_map1.clear();
+    chainID_map2.clear();
+    return sequence[0].size();
+}
+
+int read_PDB(const vector<string> &PDB_lines, double **a, char *seq,
+    vector<string> &resi_vec, const int byresi_opt)
+{
+    int i;
+    for (i=0;i<PDB_lines.size();i++)
+    {
+        a[i][0] = atof(PDB_lines[i].substr(30, 8).c_str());
+        a[i][1] = atof(PDB_lines[i].substr(38, 8).c_str());
+        a[i][2] = atof(PDB_lines[i].substr(46, 8).c_str());
+        seq[i]  = AAmap(PDB_lines[i].substr(17, 3));
+
+        if (byresi_opt>=2) resi_vec.push_back(PDB_lines[i].substr(22,5)+
+                                              PDB_lines[i][21]);
+        if (byresi_opt==1) resi_vec.push_back(PDB_lines[i].substr(22,5));
+    }
+    seq[i]='\0'; 
+    return i;
+}
+
+double dist(double x[3], double y[3])
+{
+    double d1=x[0]-y[0];
+    double d2=x[1]-y[1];
+    double d3=x[2]-y[2];
+ 
+    return (d1*d1 + d2*d2 + d3*d3);
+}
+
+double dot(double *a, double *b)
+{
+    return (a[0] * b[0] + a[1] * b[1] + a[2] * b[2]);
+}
+
+void transform(double t[3], double u[3][3], double *x, double *x1)
+{
+    x1[0]=t[0]+dot(&u[0][0], x);
+    x1[1]=t[1]+dot(&u[1][0], x);
+    x1[2]=t[2]+dot(&u[2][0], x);
+}
+
+void do_rotation(double **x, double **x1, int len, double t[3], double u[3][3])
+{
+    for(int i=0; i<len; i++)
+    {
+        transform(t, u, &x[i][0], &x1[i][0]);
+    }    
+}
+
+/* strip white space at the begining or end of string */
+string Trim(const string &inputString)
+{
+    string result = inputString;
+    int idxBegin = inputString.find_first_not_of(" \n\r\t");
+    int idxEnd = inputString.find_last_not_of(" \n\r\t");
+    if (idxBegin >= 0 && idxEnd >= 0)
+        result = inputString.substr(idxBegin, idxEnd + 1 - idxBegin);
+    return result;
+}
+
+/* read user specified pairwise alignment from 'fname_lign' to 'sequence'.
+ * This function should only be called by main function, as it will
+ * terminate a program if wrong alignment is given */
+void read_user_alignment(vector<string>&sequence, const string &fname_lign,
+    const int i_opt)
+{
+    if (fname_lign == "")
+        PrintErrorAndQuit("Please provide a file name for option -i!");
+    // open alignment file
+    int n_p = 0;// number of structures in alignment file
+    string line;
+    
+    ifstream fileIn(fname_lign.c_str());
+    if (fileIn.is_open())
+    {
+        while (fileIn.good())
+        {
+            getline(fileIn, line);
+            if (line.compare(0, 1, ">") == 0)// Flag for a new structure
+            {
+                if (n_p >= 2) break;
+                sequence.push_back("");
+                n_p++;
+            }
+            else if (n_p > 0 && line!="") sequence.back()+=line;
+        }
+        fileIn.close();
+    }
+    else PrintErrorAndQuit("ERROR! Alignment file does not exist.");
+    
+    if (n_p < 2)
+        PrintErrorAndQuit("ERROR: Fasta format is wrong, two proteins should be included.");
+    if (sequence[0].size() != sequence[1].size())
+        PrintErrorAndQuit("ERROR! FASTA file is wrong. The length in alignment should be equal for the two aligned proteins.");
+    if (i_opt==3)
+    {
+        int aligned_resNum=0;
+        for (int i=0;i<sequence[0].size();i++) 
+            aligned_resNum+=(sequence[0][i]!='-' && sequence[1][i]!='-');
+        if (aligned_resNum<3)
+            PrintErrorAndQuit("ERROR! Superposition is undefined for <3 aligned residues.");
+    }
+    line.clear();
+    return;
+}
+
+/* read list of entries from 'name' to 'chain_list'.
+ * dir_opt is the folder name (prefix).
+ * suffix_opt is the file name extension (suffix_opt).
+ * This function should only be called by main function, as it will
+ * terminate a program if wrong alignment is given */
+void file2chainlist(vector<string>&chain_list, const string &name,
+    const string &dir_opt, const string &suffix_opt)
+{
+    ifstream fp(name.c_str());
+    if (! fp.is_open())
+        PrintErrorAndQuit(("Can not open file: "+name+'\n').c_str());
+    string line;
+    while (fp.good())
+    {
+        getline(fp, line);
+        if (! line.size()) continue;
+        chain_list.push_back(dir_opt+Trim(line)+suffix_opt);
+    }
+    fp.close();
+    line.clear();
+}
+
+/**************************************************************************
+Implemetation of Kabsch algoritm for finding the best rotation matrix
+---------------------------------------------------------------------------
+x    - x(i,m) are coordinates of atom m in set x            (input)
+y    - y(i,m) are coordinates of atom m in set y            (input)
+n    - n is number of atom pairs                            (input)
+mode  - 0:calculate rms only                                (input)
+1:calculate u,t only                                (takes medium)
+2:calculate rms,u,t                                 (takes longer)
+rms   - sum of w*(ux+t-y)**2 over all atom pairs            (output)
+u    - u(i,j) is   rotation  matrix for best superposition  (output)
+t    - t(i)   is translation vector for best superposition  (output)
+**************************************************************************/
+bool Kabsch(double **x, double **y, int n, int mode, double *rms,
+    double t[3], double u[3][3])
+{
+    int i, j, m, m1, l, k;
+    double e0, rms1, d, h, g;
+    double cth, sth, sqrth, p, det, sigma;
+    double xc[3], yc[3];
+    double a[3][3], b[3][3], r[3][3], e[3], rr[6], ss[6];
+    double sqrt3 = 1.73205080756888, tol = 0.01;
+    int ip[] = { 0, 1, 3, 1, 2, 4, 3, 4, 5 };
+    int ip2312[] = { 1, 2, 0, 1 };
+
+    int a_failed = 0, b_failed = 0;
+    double epsilon = 0.00000001;
+
+    //initializtation
+    *rms = 0;
+    rms1 = 0;
+    e0 = 0;
+    double c1[3], c2[3];
+    double s1[3], s2[3];
+    double sx[3], sy[3], sz[3];
+    for (i = 0; i < 3; i++)
+    {
+        s1[i] = 0.0;
+        s2[i] = 0.0;
+
+        sx[i] = 0.0;
+        sy[i] = 0.0;
+        sz[i] = 0.0;
+    }
+
+    for (i = 0; i<3; i++)
+    {
+        xc[i] = 0.0;
+        yc[i] = 0.0;
+        t[i] = 0.0;
+        for (j = 0; j<3; j++)
+        {
+            u[i][j] = 0.0;
+            r[i][j] = 0.0;
+            a[i][j] = 0.0;
+            if (i == j)
+            {
+                u[i][j] = 1.0;
+                a[i][j] = 1.0;
+            }
+        }
+    }
+
+    if (n<1) return false;
+
+    //compute centers for vector sets x, y
+    for (i = 0; i<n; i++)
+    {
+        for (j = 0; j < 3; j++)
+        {
+            c1[j] = x[i][j];
+            c2[j] = y[i][j];
+
+            s1[j] += c1[j];
+            s2[j] += c2[j];
+        }
+
+        for (j = 0; j < 3; j++)
+        {
+            sx[j] += c1[0] * c2[j];
+            sy[j] += c1[1] * c2[j];
+            sz[j] += c1[2] * c2[j];
+        }
+    }
+    for (i = 0; i < 3; i++)
+    {
+        xc[i] = s1[i] / n;
+        yc[i] = s2[i] / n;
+    }
+    if (mode == 2 || mode == 0)
+        for (int mm = 0; mm < n; mm++)
+            for (int nn = 0; nn < 3; nn++)
+                e0 += (x[mm][nn] - xc[nn]) * (x[mm][nn] - xc[nn]) + 
+                      (y[mm][nn] - yc[nn]) * (y[mm][nn] - yc[nn]);
+    for (j = 0; j < 3; j++)
+    {
+        r[j][0] = sx[j] - s1[0] * s2[j] / n;
+        r[j][1] = sy[j] - s1[1] * s2[j] / n;
+        r[j][2] = sz[j] - s1[2] * s2[j] / n;
+    }
+
+    //compute determinat of matrix r
+    det = r[0][0] * (r[1][1] * r[2][2] - r[1][2] * r[2][1])\
+        - r[0][1] * (r[1][0] * r[2][2] - r[1][2] * r[2][0])\
+        + r[0][2] * (r[1][0] * r[2][1] - r[1][1] * r[2][0]);
+    sigma = det;
+
+    //compute tras(r)*r
+    m = 0;
+    for (j = 0; j<3; j++)
+    {
+        for (i = 0; i <= j; i++)
+        {
+            rr[m] = r[0][i] * r[0][j] + r[1][i] * r[1][j] + r[2][i] * r[2][j];
+            m++;
+        }
+    }
+
+    double spur = (rr[0] + rr[2] + rr[5]) / 3.0;
+    double cof = (((((rr[2] * rr[5] - rr[4] * rr[4]) + rr[0] * rr[5])\
+        - rr[3] * rr[3]) + rr[0] * rr[2]) - rr[1] * rr[1]) / 3.0;
+    det = det*det;
+
+    for (i = 0; i<3; i++) e[i] = spur;
+
+    if (spur>0)
+    {
+        d = spur*spur;
+        h = d - cof;
+        g = (spur*cof - det) / 2.0 - spur*h;
+
+        if (h>0)
+        {
+            sqrth = sqrt(h);
+            d = h*h*h - g*g;
+            if (d<0.0) d = 0.0;
+            d = atan2(sqrt(d), -g) / 3.0;
+            cth = sqrth * cos(d);
+            sth = sqrth*sqrt3*sin(d);
+            e[0] = (spur + cth) + cth;
+            e[1] = (spur - cth) + sth;
+            e[2] = (spur - cth) - sth;
+
+            if (mode != 0)
+            {//compute a                
+                for (l = 0; l<3; l = l + 2)
+                {
+                    d = e[l];
+                    ss[0] = (d - rr[2]) * (d - rr[5]) - rr[4] * rr[4];
+                    ss[1] = (d - rr[5]) * rr[1] + rr[3] * rr[4];
+                    ss[2] = (d - rr[0]) * (d - rr[5]) - rr[3] * rr[3];
+                    ss[3] = (d - rr[2]) * rr[3] + rr[1] * rr[4];
+                    ss[4] = (d - rr[0]) * rr[4] + rr[1] * rr[3];
+                    ss[5] = (d - rr[0]) * (d - rr[2]) - rr[1] * rr[1];
+
+                    if (fabs(ss[0]) <= epsilon) ss[0] = 0.0;
+                    if (fabs(ss[1]) <= epsilon) ss[1] = 0.0;
+                    if (fabs(ss[2]) <= epsilon) ss[2] = 0.0;
+                    if (fabs(ss[3]) <= epsilon) ss[3] = 0.0;
+                    if (fabs(ss[4]) <= epsilon) ss[4] = 0.0;
+                    if (fabs(ss[5]) <= epsilon) ss[5] = 0.0;
+
+                    if (fabs(ss[0]) >= fabs(ss[2]))
+                    {
+                        j = 0;
+                        if (fabs(ss[0]) < fabs(ss[5])) j = 2;
+                    }
+                    else if (fabs(ss[2]) >= fabs(ss[5])) j = 1;
+                    else j = 2;
+
+                    d = 0.0;
+                    j = 3 * j;
+                    for (i = 0; i<3; i++)
+                    {
+                        k = ip[i + j];
+                        a[i][l] = ss[k];
+                        d = d + ss[k] * ss[k];
+                    }
+
+
+                    //if( d > 0.0 ) d = 1.0 / sqrt(d);
+                    if (d > epsilon) d = 1.0 / sqrt(d);
+                    else d = 0.0;
+                    for (i = 0; i<3; i++) a[i][l] = a[i][l] * d;
+                }//for l
+
+                d = a[0][0] * a[0][2] + a[1][0] * a[1][2] + a[2][0] * a[2][2];
+                if ((e[0] - e[1]) >(e[1] - e[2]))
+                {
+                    m1 = 2;
+                    m = 0;
+                }
+                else
+                {
+                    m1 = 0;
+                    m = 2;
+                }
+                p = 0;
+                for (i = 0; i<3; i++)
+                {
+                    a[i][m1] = a[i][m1] - d*a[i][m];
+                    p = p + a[i][m1] * a[i][m1];
+                }
+                if (p <= tol)
+                {
+                    p = 1.0;
+                    for (i = 0; i<3; i++)
+                    {
+                        if (p < fabs(a[i][m])) continue;
+                        p = fabs(a[i][m]);
+                        j = i;
+                    }
+                    k = ip2312[j];
+                    l = ip2312[j + 1];
+                    p = sqrt(a[k][m] * a[k][m] + a[l][m] * a[l][m]);
+                    if (p > tol)
+                    {
+                        a[j][m1] = 0.0;
+                        a[k][m1] = -a[l][m] / p;
+                        a[l][m1] = a[k][m] / p;
+                    }
+                    else a_failed = 1;
+                }//if p<=tol
+                else
+                {
+                    p = 1.0 / sqrt(p);
+                    for (i = 0; i<3; i++) a[i][m1] = a[i][m1] * p;
+                }//else p<=tol  
+                if (a_failed != 1)
+                {
+                    a[0][1] = a[1][2] * a[2][0] - a[1][0] * a[2][2];
+                    a[1][1] = a[2][2] * a[0][0] - a[2][0] * a[0][2];
+                    a[2][1] = a[0][2] * a[1][0] - a[0][0] * a[1][2];
+                }
+            }//if(mode!=0)       
+        }//h>0
+
+        //compute b anyway
+        if (mode != 0 && a_failed != 1)//a is computed correctly
+        {
+            //compute b
+            for (l = 0; l<2; l++)
+            {
+                d = 0.0;
+                for (i = 0; i<3; i++)
+                {
+                    b[i][l] = r[i][0] * a[0][l] + 
+                              r[i][1] * a[1][l] + r[i][2] * a[2][l];
+                    d = d + b[i][l] * b[i][l];
+                }
+                //if( d > 0 ) d = 1.0 / sqrt(d);
+                if (d > epsilon) d = 1.0 / sqrt(d);
+                else d = 0.0;
+                for (i = 0; i<3; i++) b[i][l] = b[i][l] * d;
+            }
+            d = b[0][0] * b[0][1] + b[1][0] * b[1][1] + b[2][0] * b[2][1];
+            p = 0.0;
+
+            for (i = 0; i<3; i++)
+            {
+                b[i][1] = b[i][1] - d*b[i][0];
+                p += b[i][1] * b[i][1];
+            }
+
+            if (p <= tol)
+            {
+                p = 1.0;
+                for (i = 0; i<3; i++)
+                {
+                    if (p<fabs(b[i][0])) continue;
+                    p = fabs(b[i][0]);
+                    j = i;
+                }
+                k = ip2312[j];
+                l = ip2312[j + 1];
+                p = sqrt(b[k][0] * b[k][0] + b[l][0] * b[l][0]);
+                if (p > tol)
+                {
+                    b[j][1] = 0.0;
+                    b[k][1] = -b[l][0] / p;
+                    b[l][1] = b[k][0] / p;
+                }
+                else b_failed = 1;
+            }//if( p <= tol )
+            else
+            {
+                p = 1.0 / sqrt(p);
+                for (i = 0; i<3; i++) b[i][1] = b[i][1] * p;
+            }
+            if (b_failed != 1)
+            {
+                b[0][2] = b[1][0] * b[2][1] - b[1][1] * b[2][0];
+                b[1][2] = b[2][0] * b[0][1] - b[2][1] * b[0][0];
+                b[2][2] = b[0][0] * b[1][1] - b[0][1] * b[1][0];
+                //compute u
+                for (i = 0; i<3; i++)
+                    for (j = 0; j<3; j++)
+                        u[i][j] = b[i][0] * a[j][0] + 
+                                  b[i][1] * a[j][1] + b[i][2] * a[j][2];
+            }
+
+            //compute t
+            for (i = 0; i<3; i++)
+                t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) - 
+                                                    u[i][2] * xc[2];
+        }//if(mode!=0 && a_failed!=1)
+    }//spur>0
+    else //just compute t and errors
+    {
+        //compute t
+        for (i = 0; i<3; i++)
+            t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) - 
+                                                u[i][2] * xc[2];
+    }//else spur>0 
+
+    //compute rms
+    for (i = 0; i<3; i++)
+    {
+        if (e[i] < 0) e[i] = 0;
+        e[i] = sqrt(e[i]);
+    }
+    d = e[2];
+    if (sigma < 0.0) d = -d;
+    d = (d + e[1]) + e[0];
+
+    if (mode == 2 || mode == 0)
+    {
+        rms1 = (e0 - d) - d;
+        if (rms1 < 0.0) rms1 = 0.0;
+    }
+
+    *rms = rms1;
+    return true;
+}
+
+/* Partial implementation of Needleman-Wunsch (NW) dymanamic programming for
+ * global alignment. The three NWDP_TM functions below are not complete
+ * implementation of NW algorithm because gap jumping in the standard Gotoh
+ * algorithm is not considered. Since the gap opening and gap extension is
+ * the same, this is not a problem. This code was exploited in TM-align
+ * because it is about 1.5 times faster than a complete NW implementation.
+ * Nevertheless, if gap openning != gap extension shall be implemented in
+ * the future, the Gotoh algorithm must be implemented. In rare scenarios,
+ * it is also possible to have asymmetric alignment (i.e. 
+ * TMalign A.pdb B.pdb and TMalign B.pdb A.pdb have different TM_A and TM_B
+ * values) caused by the NWPD_TM implement.
+ */
+
+/* Input: score[1:len1, 1:len2], and gap_open
+ * Output: j2i[1:len2] \in {1:len1} U {-1}
+ * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
+void NWDP_TM(double **score, bool **path, double **val,
+    int len1, int len2, double gap_open, int j2i[])
+{
+
+    int i, j;
+    double h, v, d;
+
+    //initialization
+    for(i=0; i<=len1; i++)
+    {
+        val[i][0]=0;
+        //val[i][0]=i*gap_open;
+        path[i][0]=false; //not from diagonal
+    }
+
+    for(j=0; j<=len2; j++)
+    {
+        val[0][j]=0;
+        //val[0][j]=j*gap_open;
+        path[0][j]=false; //not from diagonal
+        j2i[j]=-1;    //all are not aligned, only use j2i[1:len2]
+    }      
+
+
+    //decide matrix and path
+    for(i=1; i<=len1; i++)
+    {
+        for(j=1; j<=len2; j++)
+        {
+            d=val[i-1][j-1]+score[i][j]; //diagonal
+
+            //symbol insertion in horizontal (= a gap in vertical)
+            h=val[i-1][j];
+            if(path[i-1][j]) h += gap_open; //aligned in last position
+
+            //symbol insertion in vertical
+            v=val[i][j-1];
+            if(path[i][j-1]) v += gap_open; //aligned in last position
+
+
+            if(d>=h && d>=v)
+            {
+                path[i][j]=true; //from diagonal
+                val[i][j]=d;
+            }
+            else 
+            {
+                path[i][j]=false; //from horizontal
+                if(v>=h) val[i][j]=v;
+                else val[i][j]=h;
+            }
+        } //for i
+    } //for j
+
+    //trace back to extract the alignment
+    i=len1;
+    j=len2;
+    while(i>0 && j>0)
+    {
+        if(path[i][j]) //from diagonal
+        {
+            j2i[j-1]=i-1;
+            i--;
+            j--;
+        }
+        else 
+        {
+            h=val[i-1][j];
+            if(path[i-1][j]) h +=gap_open;
+
+            v=val[i][j-1];
+            if(path[i][j-1]) v +=gap_open;
+
+            if(v>=h) j--;
+            else i--;
+        }
+    }
+}
+
+/* Input: vectors x, y, rotation matrix t, u, scale factor d02, and gap_open
+ * Output: j2i[1:len2] \in {1:len1} U {-1}
+ * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
+void NWDP_TM(bool **path, double **val, double **x, double **y,
+    int len1, int len2, double t[3], double u[3][3],
+    double d02, double gap_open, int j2i[])
+{
+    int i, j;
+    double h, v, d;
+
+    //initialization. use old val[i][0] and val[0][j] initialization
+    //to minimize difference from TMalign fortran version
+    for(i=0; i<=len1; i++)
+    {
+        val[i][0]=0;
+        //val[i][0]=i*gap_open;
+        path[i][0]=false; //not from diagonal
+    }
+
+    for(j=0; j<=len2; j++)
+    {
+        val[0][j]=0;
+        //val[0][j]=j*gap_open;
+        path[0][j]=false; //not from diagonal
+        j2i[j]=-1;    //all are not aligned, only use j2i[1:len2]
+    }      
+    double xx[3], dij;
+
+
+    //decide matrix and path
+    for(i=1; i<=len1; i++)
+    {
+        transform(t, u, &x[i-1][0], xx);
+        for(j=1; j<=len2; j++)
+        {
+            dij=dist(xx, &y[j-1][0]);    
+            d=val[i-1][j-1] +  1.0/(1+dij/d02);
+
+            //symbol insertion in horizontal (= a gap in vertical)
+            h=val[i-1][j];
+            if(path[i-1][j]) h += gap_open; //aligned in last position
+
+            //symbol insertion in vertical
+            v=val[i][j-1];
+            if(path[i][j-1]) v += gap_open; //aligned in last position
+
+
+            if(d>=h && d>=v)
+            {
+                path[i][j]=true; //from diagonal
+                val[i][j]=d;
+            }
+            else 
+            {
+                path[i][j]=false; //from horizontal
+                if(v>=h) val[i][j]=v;
+                else val[i][j]=h;
+            }
+        } //for i
+    } //for j
+
+    //trace back to extract the alignment
+    i=len1;
+    j=len2;
+    while(i>0 && j>0)
+    {
+        if(path[i][j]) //from diagonal
+        {
+            j2i[j-1]=i-1;
+            i--;
+            j--;
+        }
+        else 
+        {
+            h=val[i-1][j];
+            if(path[i-1][j]) h +=gap_open;
+
+            v=val[i][j-1];
+            if(path[i][j-1]) v +=gap_open;
+
+            if(v>=h) j--;
+            else i--;
+        }
+    }
+}
+
+/* This is the same as the previous NWDP_TM, except for the lack of rotation
+ * Input: vectors x, y, scale factor d02, and gap_open
+ * Output: j2i[1:len2] \in {1:len1} U {-1}
+ * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
+void NWDP_SE(bool **path, double **val, double **x, double **y,
+    int len1, int len2, double d02, double gap_open, int j2i[])
+{
+    int i, j;
+    double h, v, d;
+
+    for(i=0; i<=len1; i++)
+    {
+        val[i][0]=0;
+        path[i][0]=false; //not from diagonal
+    }
+
+    for(j=0; j<=len2; j++)
+    {
+        val[0][j]=0;
+        path[0][j]=false; //not from diagonal
+        j2i[j]=-1;    //all are not aligned, only use j2i[1:len2]
+    }      
+    double dij;
+
+    //decide matrix and path
+    for(i=1; i<=len1; i++)
+    {
+        for(j=1; j<=len2; j++)
+        {
+            dij=dist(&x[i-1][0], &y[j-1][0]);    
+            d=val[i-1][j-1] +  1.0/(1+dij/d02);
+
+            //symbol insertion in horizontal (= a gap in vertical)
+            h=val[i-1][j];
+            if(path[i-1][j]) h += gap_open; //aligned in last position
+
+            //symbol insertion in vertical
+            v=val[i][j-1];
+            if(path[i][j-1]) v += gap_open; //aligned in last position
+
+
+            if(d>=h && d>=v)
+            {
+                path[i][j]=true; //from diagonal
+                val[i][j]=d;
+            }
+            else 
+            {
+                path[i][j]=false; //from horizontal
+                if(v>=h) val[i][j]=v;
+                else val[i][j]=h;
+            }
+        } //for i
+    } //for j
+
+    //trace back to extract the alignment
+    i=len1;
+    j=len2;
+    while(i>0 && j>0)
+    {
+        if(path[i][j]) //from diagonal
+        {
+            j2i[j-1]=i-1;
+            i--;
+            j--;
+        }
+        else 
+        {
+            h=val[i-1][j];
+            if(path[i-1][j]) h +=gap_open;
+
+            v=val[i][j-1];
+            if(path[i][j-1]) v +=gap_open;
+
+            if(v>=h) j--;
+            else i--;
+        }
+    }
+}
+
+/* +ss
+ * Input: secondary structure secx, secy, and gap_open
+ * Output: j2i[1:len2] \in {1:len1} U {-1}
+ * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
+void NWDP_TM(bool **path, double **val, const char *secx, const char *secy,
+    const int len1, const int len2, const double gap_open, int j2i[])
+{
+
+    int i, j;
+    double h, v, d;
+
+    //initialization
+    for(i=0; i<=len1; i++)
+    {
+        val[i][0]=0;
+        //val[i][0]=i*gap_open;
+        path[i][0]=false; //not from diagonal
+    }
+
+    for(j=0; j<=len2; j++)
+    {
+        val[0][j]=0;
+        //val[0][j]=j*gap_open;
+        path[0][j]=false; //not from diagonal
+        j2i[j]=-1;    //all are not aligned, only use j2i[1:len2]
+    }      
+
+    //decide matrix and path
+    for(i=1; i<=len1; i++)
+    {
+        for(j=1; j<=len2; j++)
+        {
+            d=val[i-1][j-1] + 1.0*(secx[i-1]==secy[j-1]);
+
+            //symbol insertion in horizontal (= a gap in vertical)
+            h=val[i-1][j];
+            if(path[i-1][j]) h += gap_open; //aligned in last position
+
+            //symbol insertion in vertical
+            v=val[i][j-1];
+            if(path[i][j-1]) v += gap_open; //aligned in last position
+
+            if(d>=h && d>=v)
+            {
+                path[i][j]=true; //from diagonal
+                val[i][j]=d;
+            }
+            else 
+            {
+                path[i][j]=false; //from horizontal
+                if(v>=h) val[i][j]=v;
+                else val[i][j]=h;
+            }
+        } //for i
+    } //for j
+
+    //trace back to extract the alignment
+    i=len1;
+    j=len2;
+    while(i>0 && j>0)
+    {
+        if(path[i][j]) //from diagonal
+        {
+            j2i[j-1]=i-1;
+            i--;
+            j--;
+        }
+        else 
+        {
+            h=val[i-1][j];
+            if(path[i-1][j]) h +=gap_open;
+
+            v=val[i][j-1];
+            if(path[i][j-1]) v +=gap_open;
+
+            if(v>=h) j--;
+            else i--;
+        }
+    }
+}
+
+void parameter_set4search(const int xlen, const int ylen,
+    double &D0_MIN, double &Lnorm,
+    double &score_d8, double &d0, double &d0_search, double &dcu0)
+{
+    //parameter initilization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8
+    D0_MIN=0.5; 
+    dcu0=4.25;                       //update 3.85-->4.25
+ 
+    Lnorm=getmin(xlen, ylen);        //normaliz TMscore by this in searching
+    if (Lnorm<=19)                    //update 15-->19
+        d0=0.168;                   //update 0.5-->0.168
+    else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8);
+    D0_MIN=d0+0.8;              //this should be moved to above
+    d0=D0_MIN;                  //update: best for search    
+
+    d0_search=d0;
+    if (d0_search>8)   d0_search=8;
+    if (d0_search<4.5) d0_search=4.5;
+
+    score_d8=1.5*pow(Lnorm*1.0, 0.3)+3.5; //remove pairs with dis>d8 during search & final
+}
+
+void parameter_set4final_C3prime(const double len, double &D0_MIN,
+    double &Lnorm, double &d0, double &d0_search)
+{
+    D0_MIN=0.3; 
+ 
+    Lnorm=len;            //normaliz TMscore by this in searching
+    if(Lnorm<=11) d0=0.3;
+    else if(Lnorm>11&&Lnorm<=15) d0=0.4;
+    else if(Lnorm>15&&Lnorm<=19) d0=0.5;
+    else if(Lnorm>19&&Lnorm<=23) d0=0.6;
+    else if(Lnorm>23&&Lnorm<30)  d0=0.7;
+    else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5);
+
+    d0_search=d0;
+    if (d0_search>8)   d0_search=8;
+    if (d0_search<4.5) d0_search=4.5;
+}
+
+void parameter_set4final(const double len, double &D0_MIN, double &Lnorm,
+    double &d0, double &d0_search, const int mol_type)
+{
+    if (mol_type>0) // RNA
+    {
+        parameter_set4final_C3prime(len, D0_MIN, Lnorm,
+            d0, d0_search);
+        return;
+    }
+    D0_MIN=0.5; 
+ 
+    Lnorm=len;            //normaliz TMscore by this in searching
+    if (Lnorm<=21) d0=0.5;          
+    else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8);
+    if (d0<D0_MIN) d0=D0_MIN;   
+    d0_search=d0;
+    if (d0_search>8)   d0_search=8;
+    if (d0_search<4.5) d0_search=4.5;
+}
+
+void parameter_set4scale(const int len, const double d_s, double &Lnorm,
+    double &d0, double &d0_search)
+{
+    d0=d_s;          
+    Lnorm=len;            //normaliz TMscore by this in searching
+    d0_search=d0;
+    if (d0_search>8)   d0_search=8;
+    if (d0_search<4.5) d0_search=4.5;  
+}
+
+//     1, collect those residues with dis<d;
+//     2, calculate TMscore
+int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[],
+    double *score1, int score_sum_method, const double Lnorm, 
+    const double score_d8, const double d0)
+{
+    double score_sum=0, di;
+    double d_tmp=d*d;
+    double d02=d0*d0;
+    double score_d8_cut = score_d8*score_d8;
+    
+    int i, n_cut, inc=0;
+
+    while(1)
+    {
+        n_cut=0;
+        score_sum=0;
+        for(i=0; i<n_ali; i++)
+        {
+            di = dist(xa[i], ya[i]);
+            if(di<d_tmp)
+            {
+                i_ali[n_cut]=i;
+                n_cut++;
+            }
+            if(score_sum_method==8)
+            {                
+                if(di<=score_d8_cut) score_sum += 1/(1+di/d02);
+            }
+            else score_sum += 1/(1+di/d02);
+        }
+        //there are not enough feasible pairs, reliefe the threshold         
+        if(n_cut<3 && n_ali>3)
+        {
+            inc++;
+            double dinc=(d+inc*0.5);
+            d_tmp = dinc * dinc;
+        }
+        else break;
+    }  
+
+    *score1=score_sum/Lnorm;
+    return n_cut;
+}
+
+int score_fun8_standard(double **xa, double **ya, int n_ali, double d,
+    int i_ali[], double *score1, int score_sum_method,
+    double score_d8, double d0)
+{
+    double score_sum = 0, di;
+    double d_tmp = d*d;
+    double d02 = d0*d0;
+    double score_d8_cut = score_d8*score_d8;
+
+    int i, n_cut, inc = 0;
+    while (1)
+    {
+        n_cut = 0;
+        score_sum = 0;
+        for (i = 0; i<n_ali; i++)
+        {
+            di = dist(xa[i], ya[i]);
+            if (di<d_tmp)
+            {
+                i_ali[n_cut] = i;
+                n_cut++;
+            }
+            if (score_sum_method == 8)
+            {
+                if (di <= score_d8_cut) score_sum += 1 / (1 + di / d02);
+            }
+            else
+            {
+                score_sum += 1 / (1 + di / d02);
+            }
+        }
+        //there are not enough feasible pairs, reliefe the threshold         
+        if (n_cut<3 && n_ali>3)
+        {
+            inc++;
+            double dinc = (d + inc*0.5);
+            d_tmp = dinc * dinc;
+        }
+        else break;
+    }
+
+    *score1 = score_sum / n_ali;
+    return n_cut;
+}
+
+double TMscore8_search(double **r1, double **r2, double **xtm, double **ytm,
+    double **xt, int Lali, double t0[3], double u0[3][3], int simplify_step,
+    int score_sum_method, double *Rcomm, double local_d0_search, double Lnorm,
+    double score_d8, double d0)
+{
+    int i, m;
+    double score_max, score, rmsd;    
+    const int kmax=Lali;    
+    int k_ali[kmax], ka, k;
+    double t[3];
+    double u[3][3];
+    double d;
+    
+
+    //iterative parameters
+    int n_it=20;            //maximum number of iterations
+    int n_init_max=6; //maximum number of different fragment length 
+    int L_ini[n_init_max];  //fragment lengths, Lali, Lali/2, Lali/4 ... 4   
+    int L_ini_min=4;
+    if(Lali<L_ini_min) L_ini_min=Lali;   
+
+    int n_init=0, i_init;      
+    for(i=0; i<n_init_max-1; i++)
+    {
+        n_init++;
+        L_ini[i]=(int) (Lali/pow(2.0, (double) i));
+        if(L_ini[i]<=L_ini_min)
+        {
+            L_ini[i]=L_ini_min;
+            break;
+        }
+    }
+    if(i==n_init_max-1)
+    {
+        n_init++;
+        L_ini[i]=L_ini_min;
+    }
+    
+    score_max=-1;
+    //find the maximum score starting from local structures superposition
+    int i_ali[kmax], n_cut;
+    int L_frag; //fragment length
+    int iL_max; //maximum starting postion for the fragment
+    
+    for(i_init=0; i_init<n_init; i_init++)
+    {
+        L_frag=L_ini[i_init];
+        iL_max=Lali-L_frag;
+      
+        i=0;   
+        while(1)
+        {
+            //extract the fragment starting from position i 
+            ka=0;
+            for(k=0; k<L_frag; k++)
+            {
+                int kk=k+i;
+                r1[k][0]=xtm[kk][0];  
+                r1[k][1]=xtm[kk][1]; 
+                r1[k][2]=xtm[kk][2];   
+                
+                r2[k][0]=ytm[kk][0];  
+                r2[k][1]=ytm[kk][1]; 
+                r2[k][2]=ytm[kk][2];
+                
+                k_ali[ka]=kk;
+                ka++;
+            }
+            
+            //extract rotation matrix based on the fragment
+            Kabsch(r1, r2, L_frag, 1, &rmsd, t, u);
+            if (simplify_step != 1)
+                *Rcomm = 0;
+            do_rotation(xtm, xt, Lali, t, u);
+            
+            //get subsegment of this fragment
+            d = local_d0_search - 1;
+            n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score, 
+                score_sum_method, Lnorm, score_d8, d0);
+            if(score>score_max)
+            {
+                score_max=score;
+                
+                //save the rotation matrix
+                for(k=0; k<3; k++)
+                {
+                    t0[k]=t[k];
+                    u0[k][0]=u[k][0];
+                    u0[k][1]=u[k][1];
+                    u0[k][2]=u[k][2];
+                }
+            }
+            
+            //try to extend the alignment iteratively            
+            d = local_d0_search + 1;
+            for(int it=0; it<n_it; it++)            
+            {
+                ka=0;
+                for(k=0; k<n_cut; k++)
+                {
+                    m=i_ali[k];
+                    r1[k][0]=xtm[m][0];  
+                    r1[k][1]=xtm[m][1]; 
+                    r1[k][2]=xtm[m][2];
+                    
+                    r2[k][0]=ytm[m][0];  
+                    r2[k][1]=ytm[m][1]; 
+                    r2[k][2]=ytm[m][2];
+                    
+                    k_ali[ka]=m;
+                    ka++;
+                } 
+                //extract rotation matrix based on the fragment                
+                Kabsch(r1, r2, n_cut, 1, &rmsd, t, u);
+                do_rotation(xtm, xt, Lali, t, u);
+                n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score, 
+                    score_sum_method, Lnorm, score_d8, d0);
+                if(score>score_max)
+                {
+                    score_max=score;
+
+                    //save the rotation matrix
+                    for(k=0; k<3; k++)
+                    {
+                        t0[k]=t[k];
+                        u0[k][0]=u[k][0];
+                        u0[k][1]=u[k][1];
+                        u0[k][2]=u[k][2];
+                    }                     
+                }
+                
+                //check if it converges            
+                if(n_cut==ka)
+                {                
+                    for(k=0; k<n_cut; k++)
+                    {
+                        if(i_ali[k]!=k_ali[k]) break;
+                    }
+                    if(k==n_cut) break;
+                }                                                               
+            } //for iteration            
+
+            if(i<iL_max)
+            {
+                i=i+simplify_step; //shift the fragment        
+                if(i>iL_max) i=iL_max;  //do this to use the last missed fragment
+            }
+            else if(i>=iL_max) break;
+        }//while(1)
+        //end of one fragment
+    }//for(i_init
+    return score_max;
+}
+
+
+double TMscore8_search_standard( double **r1, double **r2,
+    double **xtm, double **ytm, double **xt, int Lali,
+    double t0[3], double u0[3][3], int simplify_step, int score_sum_method,
+    double *Rcomm, double local_d0_search, double score_d8, double d0)
+{
+    int i, m;
+    double score_max, score, rmsd;
+    const int kmax = Lali;
+    int k_ali[kmax], ka, k;
+    double t[3];
+    double u[3][3];
+    double d;
+
+    //iterative parameters
+    int n_it = 20;            //maximum number of iterations
+    int n_init_max = 6; //maximum number of different fragment length 
+    int L_ini[n_init_max];  //fragment lengths, Lali, Lali/2, Lali/4 ... 4   
+    int L_ini_min = 4;
+    if (Lali<L_ini_min) L_ini_min = Lali;
+
+    int n_init = 0, i_init;
+    for (i = 0; i<n_init_max - 1; i++)
+    {
+        n_init++;
+        L_ini[i] = (int)(Lali / pow(2.0, (double)i));
+        if (L_ini[i] <= L_ini_min)
+        {
+            L_ini[i] = L_ini_min;
+            break;
+        }
+    }
+    if (i == n_init_max - 1)
+    {
+        n_init++;
+        L_ini[i] = L_ini_min;
+    }
+
+    score_max = -1;
+    //find the maximum score starting from local structures superposition
+    int i_ali[kmax], n_cut;
+    int L_frag; //fragment length
+    int iL_max; //maximum starting postion for the fragment
+
+    for (i_init = 0; i_init<n_init; i_init++)
+    {
+        L_frag = L_ini[i_init];
+        iL_max = Lali - L_frag;
+
+        i = 0;
+        while (1)
+        {
+            //extract the fragment starting from position i 
+            ka = 0;
+            for (k = 0; k<L_frag; k++)
+            {
+                int kk = k + i;
+                r1[k][0] = xtm[kk][0];
+                r1[k][1] = xtm[kk][1];
+                r1[k][2] = xtm[kk][2];
+
+                r2[k][0] = ytm[kk][0];
+                r2[k][1] = ytm[kk][1];
+                r2[k][2] = ytm[kk][2];
+
+                k_ali[ka] = kk;
+                ka++;
+            }
+            //extract rotation matrix based on the fragment
+            Kabsch(r1, r2, L_frag, 1, &rmsd, t, u);
+            if (simplify_step != 1)
+                *Rcomm = 0;
+            do_rotation(xtm, xt, Lali, t, u);
+
+            //get subsegment of this fragment
+            d = local_d0_search - 1;
+            n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score,
+                score_sum_method, score_d8, d0);
+
+            if (score>score_max)
+            {
+                score_max = score;
+
+                //save the rotation matrix
+                for (k = 0; k<3; k++)
+                {
+                    t0[k] = t[k];
+                    u0[k][0] = u[k][0];
+                    u0[k][1] = u[k][1];
+                    u0[k][2] = u[k][2];
+                }
+            }
+
+            //try to extend the alignment iteratively            
+            d = local_d0_search + 1;
+            for (int it = 0; it<n_it; it++)
+            {
+                ka = 0;
+                for (k = 0; k<n_cut; k++)
+                {
+                    m = i_ali[k];
+                    r1[k][0] = xtm[m][0];
+                    r1[k][1] = xtm[m][1];
+                    r1[k][2] = xtm[m][2];
+
+                    r2[k][0] = ytm[m][0];
+                    r2[k][1] = ytm[m][1];
+                    r2[k][2] = ytm[m][2];
+
+                    k_ali[ka] = m;
+                    ka++;
+                }
+                //extract rotation matrix based on the fragment                
+                Kabsch(r1, r2, n_cut, 1, &rmsd, t, u);
+                do_rotation(xtm, xt, Lali, t, u);
+                n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score,
+                    score_sum_method, score_d8, d0);
+                if (score>score_max)
+                {
+                    score_max = score;
+
+                    //save the rotation matrix
+                    for (k = 0; k<3; k++)
+                    {
+                        t0[k] = t[k];
+                        u0[k][0] = u[k][0];
+                        u0[k][1] = u[k][1];
+                        u0[k][2] = u[k][2];
+                    }
+                }
+
+                //check if it converges            
+                if (n_cut == ka)
+                {
+                    for (k = 0; k<n_cut; k++)
+                    {
+                        if (i_ali[k] != k_ali[k]) break;
+                    }
+                    if (k == n_cut) break;
+                }
+            } //for iteration            
+
+            if (i<iL_max)
+            {
+                i = i + simplify_step; //shift the fragment        
+                if (i>iL_max) i = iL_max;  //do this to use the last missed fragment
+            }
+            else if (i >= iL_max) break;
+        }//while(1)
+        //end of one fragment
+    }//for(i_init
+    return score_max;
+}
+
+//Comprehensive TMscore search engine
+// input:   two vector sets: x, y
+//          an alignment invmap0[] between x and y
+//          simplify_step: 1 or 40 or other integers
+//          score_sum_method: 0 for score over all pairs
+//                            8 for socre over the pairs with dist<score_d8
+// output:  the best rotaion matrix t, u that results in highest TMscore
+double detailed_search(double **r1, double **r2, double **xtm, double **ytm,
+    double **xt, double **x, double **y, int xlen, int ylen, 
+    int invmap0[], double t[3], double u[3][3], int simplify_step,
+    int score_sum_method, double local_d0_search, double Lnorm,
+    double score_d8, double d0)
+{
+    //x is model, y is template, try to superpose onto y
+    int i, j, k;     
+    double tmscore;
+    double rmsd;
+
+    k=0;
+    for(i=0; i<ylen; i++) 
+    {
+        j=invmap0[i];
+        if(j>=0) //aligned
+        {
+            xtm[k][0]=x[j][0];
+            xtm[k][1]=x[j][1];
+            xtm[k][2]=x[j][2];
+                
+            ytm[k][0]=y[i][0];
+            ytm[k][1]=y[i][1];
+            ytm[k][2]=y[i][2];
+            k++;
+        }
+    }
+
+    //detailed search 40-->1
+    tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u, simplify_step,
+        score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
+    return tmscore;
+}
+
+double detailed_search_standard( double **r1, double **r2,
+    double **xtm, double **ytm, double **xt, double **x, double **y,
+    int xlen, int ylen, int invmap0[], double t[3], double u[3][3],
+    int simplify_step, int score_sum_method, double local_d0_search,
+    const bool& bNormalize, double Lnorm, double score_d8, double d0)
+{
+    //x is model, y is template, try to superpose onto y
+    int i, j, k;     
+    double tmscore;
+    double rmsd;
+
+    k=0;
+    for(i=0; i<ylen; i++) 
+    {
+        j=invmap0[i];
+        if(j>=0) //aligned
+        {
+            xtm[k][0]=x[j][0];
+            xtm[k][1]=x[j][1];
+            xtm[k][2]=x[j][2];
+                
+            ytm[k][0]=y[i][0];
+            ytm[k][1]=y[i][1];
+            ytm[k][2]=y[i][2];
+            k++;
+        }
+    }
+
+    //detailed search 40-->1
+    tmscore = TMscore8_search_standard( r1, r2, xtm, ytm, xt, k, t, u,
+        simplify_step, score_sum_method, &rmsd, local_d0_search, score_d8, d0);
+    if (bNormalize)// "-i", to use standard_TMscore, then bNormalize=true, else bNormalize=false; 
+        tmscore = tmscore * k / Lnorm;
+
+    return tmscore;
+}
+
+//compute the score quickly in three iterations
+double get_score_fast( double **r1, double **r2, double **xtm, double **ytm,
+    double **x, double **y, int xlen, int ylen, int invmap[],
+    double d0, double d0_search, double t[3], double u[3][3])
+{
+    double rms, tmscore, tmscore1, tmscore2;
+    int i, j, k;
+
+    k=0;
+    for(j=0; j<ylen; j++)
+    {
+        i=invmap[j];
+        if(i>=0)
+        {
+            r1[k][0]=x[i][0];
+            r1[k][1]=x[i][1];
+            r1[k][2]=x[i][2];
+
+            r2[k][0]=y[j][0];
+            r2[k][1]=y[j][1];
+            r2[k][2]=y[j][2];
+            
+            xtm[k][0]=x[i][0];
+            xtm[k][1]=x[i][1];
+            xtm[k][2]=x[i][2];
+            
+            ytm[k][0]=y[j][0];
+            ytm[k][1]=y[j][1];
+            ytm[k][2]=y[j][2];                  
+            
+            k++;
+        }
+        else if(i!=-1) PrintErrorAndQuit("Wrong map!\n");
+    }
+    Kabsch(r1, r2, k, 1, &rms, t, u);
+    
+    //evaluate score   
+    double di;
+    const int len=k;
+    double dis[len];    
+    double d00=d0_search;
+    double d002=d00*d00;
+    double d02=d0*d0;
+    
+    int n_ali=k;
+    double xrot[3];
+    tmscore=0;
+    for(k=0; k<n_ali; k++)
+    {
+        transform(t, u, &xtm[k][0], xrot);        
+        di=dist(xrot, &ytm[k][0]);
+        dis[k]=di;
+        tmscore += 1/(1+di/d02);
+    }
+    
+   
+   
+   //second iteration 
+    double d002t=d002;
+    while(1)
+    {
+        j=0;
+        for(k=0; k<n_ali; k++)
+        {            
+            if(dis[k]<=d002t)
+            {
+                r1[j][0]=xtm[k][0];
+                r1[j][1]=xtm[k][1];
+                r1[j][2]=xtm[k][2];
+                
+                r2[j][0]=ytm[k][0];
+                r2[j][1]=ytm[k][1];
+                r2[j][2]=ytm[k][2];
+                
+                j++;
+            }
+        }
+        //there are not enough feasible pairs, relieve the threshold 
+        if(j<3 && n_ali>3) d002t += 0.5;
+        else break;
+    }
+    
+    if(n_ali!=j)
+    {
+        Kabsch(r1, r2, j, 1, &rms, t, u);
+        tmscore1=0;
+        for(k=0; k<n_ali; k++)
+        {
+            transform(t, u, &xtm[k][0], xrot);        
+            di=dist(xrot, &ytm[k][0]);
+            dis[k]=di;
+            tmscore1 += 1/(1+di/d02);
+        }
+        
+        //third iteration
+        d002t=d002+1;
+       
+        while(1)
+        {
+            j=0;
+            for(k=0; k<n_ali; k++)
+            {            
+                if(dis[k]<=d002t)
+                {
+                    r1[j][0]=xtm[k][0];
+                    r1[j][1]=xtm[k][1];
+                    r1[j][2]=xtm[k][2];
+                    
+                    r2[j][0]=ytm[k][0];
+                    r2[j][1]=ytm[k][1];
+                    r2[j][2]=ytm[k][2];
+                                        
+                    j++;
+                }
+            }
+            //there are not enough feasible pairs, relieve the threshold 
+            if(j<3 && n_ali>3) d002t += 0.5;
+            else break;
+        }
+
+        //evaluate the score
+        Kabsch(r1, r2, j, 1, &rms, t, u);
+        tmscore2=0;
+        for(k=0; k<n_ali; k++)
+        {
+            transform(t, u, &xtm[k][0], xrot);
+            di=dist(xrot, &ytm[k][0]);
+            tmscore2 += 1/(1+di/d02);
+        }    
+    }
+    else
+    {
+        tmscore1=tmscore;
+        tmscore2=tmscore;
+    }
+    
+    if(tmscore1>=tmscore) tmscore=tmscore1;
+    if(tmscore2>=tmscore) tmscore=tmscore2;
+    return tmscore; // no need to normalize this score because it will not be used for latter scoring
+}
+
+
+//perform gapless threading to find the best initial alignment
+//input: x, y, xlen, ylen
+//output: y2x0 stores the best alignment: e.g., 
+//y2x0[j]=i means:
+//the jth element in y is aligned to the ith element in x if i>=0 
+//the jth element in y is aligned to a gap in x if i==-1
+double get_initial(double **r1, double **r2, double **xtm, double **ytm,
+    double **x, double **y, int xlen, int ylen, int *y2x,
+    double d0, double d0_search, const bool fast_opt,
+    double t[3], double u[3][3])
+{
+    int min_len=getmin(xlen, ylen);
+    if(min_len<3) PrintErrorAndQuit("Sequence is too short <3!\n");
+    
+    int min_ali= min_len/2;              //minimum size of considered fragment 
+    if(min_ali<=5)  min_ali=5;    
+    int n1, n2;
+    n1 = -ylen+min_ali; 
+    n2 = xlen-min_ali;
+
+    int i, j, k, k_best;
+    double tmscore, tmscore_max=-1;
+
+    k_best=n1;
+    for(k=n1; k<=n2; k+=(fast_opt)?5:1)
+    {
+        //get the map
+        for(j=0; j<ylen; j++)
+        {
+            i=j+k;
+            if(i>=0 && i<xlen) y2x[j]=i;
+            else y2x[j]=-1;
+        }
+        
+        //evaluate the map quickly in three iterations
+        //this is not real tmscore, it is used to evaluate the goodness of the initial alignment
+        tmscore=get_score_fast(r1, r2, xtm, ytm,
+            x, y, xlen, ylen, y2x, d0,d0_search, t, u);
+        if(tmscore>=tmscore_max)
+        {
+            tmscore_max=tmscore;
+            k_best=k;
+        }
+    }
+    
+    //extract the best map
+    k=k_best;
+    for(j=0; j<ylen; j++)
+    {
+        i=j+k;
+        if(i>=0 && i<xlen) y2x[j]=i;
+        else y2x[j]=-1;
+    }    
+
+    return tmscore_max;
+}
+
+void smooth(int *sec, int len)
+{
+    int i, j;
+    //smooth single  --x-- => -----
+    for (i=2; i<len-2; i++)
+    {
+        if(sec[i]==2 || sec[i]==4)
+        {
+            j=sec[i];
+            if (sec[i-2]!=j && sec[i-1]!=j && sec[i+1]!=j && sec[i+2]!=j)
+                sec[i]=1;
+        }
+    }
+
+    //   smooth double 
+    //   --xx-- => ------
+    for (i=0; i<len-5; i++)
+    {
+        //helix
+        if (sec[i]!=2   && sec[i+1]!=2 && sec[i+2]==2 && sec[i+3]==2 &&
+            sec[i+4]!=2 && sec[i+5]!= 2)
+        {
+            sec[i+2]=1;
+            sec[i+3]=1;
+        }
+
+        //beta
+        if (sec[i]!=4   && sec[i+1]!=4 && sec[i+2]==4 && sec[i+3]==4 &&
+            sec[i+4]!=4 && sec[i+5]!= 4)
+        {
+            sec[i+2]=1;
+            sec[i+3]=1;
+        }
+    }
+
+    //smooth connect
+    for (i=0; i<len-2; i++)
+    {        
+        if (sec[i]==2 && sec[i+1]!=2 && sec[i+2]==2) sec[i+1]=2;
+        else if(sec[i]==4 && sec[i+1]!=4 && sec[i+2]==4) sec[i+1]=4;
+    }
+
+}
+
+char sec_str(double dis13, double dis14, double dis15,
+            double dis24, double dis25, double dis35)
+{
+    char s='C';
+    
+    double delta=2.1;
+    if (fabs(dis15-6.37)<delta && fabs(dis14-5.18)<delta && 
+        fabs(dis25-5.18)<delta && fabs(dis13-5.45)<delta &&
+        fabs(dis24-5.45)<delta && fabs(dis35-5.45)<delta)
+    {
+        s='H'; //helix                        
+        return s;
+    }
+
+    delta=1.42;
+    if (fabs(dis15-13  )<delta && fabs(dis14-10.4)<delta &&
+        fabs(dis25-10.4)<delta && fabs(dis13-6.1 )<delta &&
+        fabs(dis24-6.1 )<delta && fabs(dis35-6.1 )<delta)
+    {
+        s='E'; //strand
+        return s;
+    }
+
+    if (dis15 < 8) s='T'; //turn
+    return s;
+}
+
+
+/* secondary stucture assignment for protein:
+ * 1->coil, 2->helix, 3->turn, 4->strand */
+void make_sec(double **x, int len, char *sec)
+{
+    int j1, j2, j3, j4, j5;
+    double d13, d14, d15, d24, d25, d35;
+    for(int i=0; i<len; i++)
+    {     
+        sec[i]='C';
+        j1=i-2;
+        j2=i-1;
+        j3=i;
+        j4=i+1;
+        j5=i+2;        
+        
+        if(j1>=0 && j5<len)
+        {
+            d13=sqrt(dist(x[j1], x[j3]));
+            d14=sqrt(dist(x[j1], x[j4]));
+            d15=sqrt(dist(x[j1], x[j5]));
+            d24=sqrt(dist(x[j2], x[j4]));
+            d25=sqrt(dist(x[j2], x[j5]));
+            d35=sqrt(dist(x[j3], x[j5]));
+            sec[i]=sec_str(d13, d14, d15, d24, d25, d35);            
+        }    
+    } 
+    sec[len]=0;
+}
+
+//get initial alignment from secondary structure alignment
+//input: x, y, xlen, ylen
+//output: y2x stores the best alignment: e.g., 
+//y2x[j]=i means:
+//the jth element in y is aligned to the ith element in x if i>=0 
+//the jth element in y is aligned to a gap in x if i==-1
+void get_initial_ss(bool **path, double **val,
+    const char *secx, const char *secy, int xlen, int ylen, int *y2x)
+{
+    double gap_open=-1.0;
+    NWDP_TM(path, val, secx, secy, xlen, ylen, gap_open, y2x);
+}
+
+
+// get_initial5 in TMalign fortran, get_initial_local in TMalign c by yangji
+//get initial alignment of local structure superposition
+//input: x, y, xlen, ylen
+//output: y2x stores the best alignment: e.g., 
+//y2x[j]=i means:
+//the jth element in y is aligned to the ith element in x if i>=0 
+//the jth element in y is aligned to a gap in x if i==-1
+bool get_initial5( double **r1, double **r2, double **xtm, double **ytm,
+    bool **path, double **val,
+    double **x, double **y, int xlen, int ylen, int *y2x,
+    double d0, double d0_search, const bool fast_opt, const double D0_MIN)
+{
+    double GL, rmsd;
+    double t[3];
+    double u[3][3];
+
+    double d01 = d0 + 1.5;
+    if (d01 < D0_MIN) d01 = D0_MIN;
+    double d02 = d01*d01;
+
+    double GLmax = 0;
+    int aL = getmin(xlen, ylen);
+    int *invmap = new int[ylen + 1];
+
+    // jump on sequence1-------------->
+    int n_jump1 = 0;
+    if (xlen > 250)
+        n_jump1 = 45;
+    else if (xlen > 200)
+        n_jump1 = 35;
+    else if (xlen > 150)
+        n_jump1 = 25;
+    else
+        n_jump1 = 15;
+    if (n_jump1 > (xlen / 3))
+        n_jump1 = xlen / 3;
+
+    // jump on sequence2-------------->
+    int n_jump2 = 0;
+    if (ylen > 250)
+        n_jump2 = 45;
+    else if (ylen > 200)
+        n_jump2 = 35;
+    else if (ylen > 150)
+        n_jump2 = 25;
+    else
+        n_jump2 = 15;
+    if (n_jump2 > (ylen / 3))
+        n_jump2 = ylen / 3;
+
+    // fragment to superimpose-------------->
+    int n_frag[2] = { 20, 100 };
+    if (n_frag[0] > (aL / 3))
+        n_frag[0] = aL / 3;
+    if (n_frag[1] > (aL / 2))
+        n_frag[1] = aL / 2;
+
+    // start superimpose search-------------->
+    if (fast_opt)
+    {
+        n_jump1*=5;
+        n_jump2*=5;
+    }
+    bool flag = false;
+    for (int i_frag = 0; i_frag < 2; i_frag++)
+    {
+        int m1 = xlen - n_frag[i_frag] + 1;
+        int m2 = ylen - n_frag[i_frag] + 1;
+
+        for (int i = 0; i<m1; i = i + n_jump1) //index starts from 0, different from FORTRAN
+        {
+            for (int j = 0; j<m2; j = j + n_jump2)
+            {
+                for (int k = 0; k<n_frag[i_frag]; k++) //fragment in y
+                {
+                    r1[k][0] = x[k + i][0];
+                    r1[k][1] = x[k + i][1];
+                    r1[k][2] = x[k + i][2];
+
+                    r2[k][0] = y[k + j][0];
+                    r2[k][1] = y[k + j][1];
+                    r2[k][2] = y[k + j][2];
+                }
+
+                // superpose the two structures and rotate it
+                Kabsch(r1, r2, n_frag[i_frag], 1, &rmsd, t, u);
+
+                double gap_open = 0.0;
+                NWDP_TM(path, val, x, y, xlen, ylen,
+                    t, u, d02, gap_open, invmap);
+                GL = get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen,
+                    invmap, d0, d0_search, t, u);
+                if (GL>GLmax)
+                {
+                    GLmax = GL;
+                    for (int ii = 0; ii<ylen; ii++) y2x[ii] = invmap[ii];
+                    flag = true;
+                }
+            }
+        }
+    }
+
+    delete[] invmap;
+    return flag;
+}
+
+void score_matrix_rmsd_sec( double **r1, double **r2, double **score,
+    const char *secx, const char *secy, double **x, double **y,
+    int xlen, int ylen, int *y2x, const double D0_MIN, double d0)
+{
+    double t[3], u[3][3];
+    double rmsd, dij;
+    double d01=d0+1.5;
+    if(d01 < D0_MIN) d01=D0_MIN;
+    double d02=d01*d01;
+
+    double xx[3];
+    int i, k=0;
+    for(int j=0; j<ylen; j++)
+    {
+        i=y2x[j];
+        if(i>=0)
+        {
+            r1[k][0]=x[i][0];  
+            r1[k][1]=x[i][1]; 
+            r1[k][2]=x[i][2];   
+            
+            r2[k][0]=y[j][0];  
+            r2[k][1]=y[j][1]; 
+            r2[k][2]=y[j][2];
+            
+            k++;
+        }
+    }
+    Kabsch(r1, r2, k, 1, &rmsd, t, u);
+
+    
+    for(int ii=0; ii<xlen; ii++)
+    {        
+        transform(t, u, &x[ii][0], xx);
+        for(int jj=0; jj<ylen; jj++)
+        {
+            dij=dist(xx, &y[jj][0]); 
+            if (secx[ii]==secy[jj])
+                score[ii+1][jj+1] = 1.0/(1+dij/d02) + 0.5;
+            else
+                score[ii+1][jj+1] = 1.0/(1+dij/d02);
+        }
+    }
+}
+
+
+//get initial alignment from secondary structure and previous alignments
+//input: x, y, xlen, ylen
+//output: y2x stores the best alignment: e.g., 
+//y2x[j]=i means:
+//the jth element in y is aligned to the ith element in x if i>=0 
+//the jth element in y is aligned to a gap in x if i==-1
+void get_initial_ssplus(double **r1, double **r2, double **score, bool **path,
+    double **val, const char *secx, const char *secy, double **x, double **y,
+    int xlen, int ylen, int *y2x0, int *y2x, const double D0_MIN, double d0)
+{
+    //create score matrix for DP
+    score_matrix_rmsd_sec(r1, r2, score, secx, secy, x, y, xlen, ylen,
+        y2x0, D0_MIN,d0);
+    
+    double gap_open=-1.0;
+    NWDP_TM(score, path, val, xlen, ylen, gap_open, y2x);
+}
+
+
+void find_max_frag(double **x, int len, int *start_max,
+    int *end_max, double dcu0, const bool fast_opt)
+{
+    int r_min, fra_min=4;           //minimum fragment for search
+    if (fast_opt) fra_min=8;
+    int start;
+    int Lfr_max=0;
+
+    r_min= (int) (len*1.0/3.0); //minimum fragment, in case too small protein
+    if(r_min > fra_min) r_min=fra_min;
+    
+    int inc=0;
+    double dcu0_cut=dcu0*dcu0;;
+    double dcu_cut=dcu0_cut;
+
+    while(Lfr_max < r_min)
+    {        
+        Lfr_max=0;            
+        int j=1;    //number of residues at nf-fragment
+        start=0;
+        for(int i=1; i<len; i++)
+        {
+            if(dist(x[i-1], x[i]) < dcu_cut)
+            {
+                j++;
+
+                if(i==(len-1))
+                {
+                    if(j > Lfr_max) 
+                    {
+                        Lfr_max=j;
+                        *start_max=start;
+                        *end_max=i;                        
+                    }
+                    j=1;
+                }
+            }
+            else
+            {
+                if(j>Lfr_max) 
+                {
+                    Lfr_max=j;
+                    *start_max=start;
+                    *end_max=i-1;                                        
+                }
+
+                j=1;
+                start=i;
+            }
+        }// for i;
+        
+        if(Lfr_max < r_min)
+        {
+            inc++;
+            double dinc=pow(1.1, (double) inc) * dcu0;
+            dcu_cut= dinc*dinc;
+        }
+    }//while <;    
+}
+
+//perform fragment gapless threading to find the best initial alignment
+//input: x, y, xlen, ylen
+//output: y2x0 stores the best alignment: e.g., 
+//y2x0[j]=i means:
+//the jth element in y is aligned to the ith element in x if i>=0 
+//the jth element in y is aligned to a gap in x if i==-1
+double get_initial_fgt(double **r1, double **r2, double **xtm, double **ytm,
+    double **x, double **y, int xlen, int ylen, 
+    int *y2x, double d0, double d0_search,
+    double dcu0, const bool fast_opt, double t[3], double u[3][3])
+{
+    int fra_min=4;           //minimum fragment for search
+    if (fast_opt) fra_min=8;
+    int fra_min1=fra_min-1;  //cutoff for shift, save time
+
+    int xstart=0, ystart=0, xend=0, yend=0;
+
+    find_max_frag(x, xlen, &xstart, &xend, dcu0, fast_opt);
+    find_max_frag(y, ylen, &ystart, &yend, dcu0, fast_opt);
+
+
+    int Lx = xend-xstart+1;
+    int Ly = yend-ystart+1;
+    int *ifr, *y2x_;
+    int L_fr=getmin(Lx, Ly);
+    ifr= new int[L_fr];
+    y2x_= new int[ylen+1];
+
+    //select what piece will be used. The original implement may cause 
+    //asymetry, but only when xlen==ylen and Lx==Ly
+    //if L1=Lfr1 and L2=Lfr2 (normal proteins), it will be the same as initial1
+
+    if(Lx<Ly || (Lx==Ly && xlen<ylen))
+    {        
+        for(int i=0; i<L_fr; i++) ifr[i]=xstart+i;
+    }
+    else if(Lx>Ly || (Lx==Ly && xlen>ylen))
+    {        
+        for(int i=0; i<L_fr; i++) ifr[i]=ystart+i;
+    }
+    else // solve asymetric for 1x5gA vs 2q7nA5
+    {
+        /* In this case, L0==xlen==ylen; L_fr==Lx==Ly */
+        int L0=xlen;
+        double tmscore, tmscore_max=-1;
+        int i, j, k;
+        int n1, n2;
+        int min_len;
+        int min_ali;
+
+        /* part 1, normalized by xlen */
+        for(i=0; i<L_fr; i++) ifr[i]=xstart+i;
+
+        if(L_fr==L0)
+        {
+            n1= (int)(L0*0.1); //my index starts from 0
+            n2= (int)(L0*0.89);
+            j=0;
+            for(i=n1; i<= n2; i++)
+            {
+                ifr[j]=ifr[i];
+                j++;
+            }
+            L_fr=j;
+        }
+
+        int L1=L_fr;
+        min_len=getmin(L1, ylen);    
+        min_ali= (int) (min_len/2.5); //minimum size of considered fragment 
+        if(min_ali<=fra_min1)  min_ali=fra_min1;    
+        n1 = -ylen+min_ali; 
+        n2 = L1-min_ali;
+
+        for(k=n1; k<=n2; k+=(fast_opt)?3:1)
+        {
+            //get the map
+            for(j=0; j<ylen; j++)
+            {
+                i=j+k;
+                if(i>=0 && i<L1) y2x_[j]=ifr[i];
+                else             y2x_[j]=-1;
+            }
+
+            //evaluate the map quickly in three iterations
+            tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_,
+                d0, d0_search, t, u);
+
+            if(tmscore>=tmscore_max)
+            {
+                tmscore_max=tmscore;
+                for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
+            }
+        }
+
+        /* part 2, normalized by ylen */
+        L_fr=Ly;
+        for(i=0; i<L_fr; i++) ifr[i]=ystart+i;
+
+        if (L_fr==L0)
+        {
+            n1= (int)(L0*0.1); //my index starts from 0
+            n2= (int)(L0*0.89);
+
+            j=0;
+            for(i=n1; i<= n2; i++)
+            {
+                ifr[j]=ifr[i];
+                j++;
+            }
+            L_fr=j;
+        }
+
+        int L2=L_fr;
+        min_len=getmin(xlen, L2);    
+        min_ali= (int) (min_len/2.5); //minimum size of considered fragment 
+        if(min_ali<=fra_min1)  min_ali=fra_min1;    
+        n1 = -L2+min_ali; 
+        n2 = xlen-min_ali;
+
+        for(k=n1; k<=n2; k++)
+        {
+            //get the map
+            for(j=0; j<ylen; j++) y2x_[j]=-1;
+
+            for(j=0; j<L2; j++)
+            {
+                i=j+k;
+                if(i>=0 && i<xlen) y2x_[ifr[j]]=i;
+            }
+        
+            //evaluate the map quickly in three iterations
+            tmscore=get_score_fast(r1, r2, xtm, ytm,
+                x, y, xlen, ylen, y2x_, d0,d0_search, t, u);
+            if(tmscore>=tmscore_max)
+            {
+                tmscore_max=tmscore;
+                for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
+            }
+        }
+
+        delete [] ifr;
+        delete [] y2x_;
+        return tmscore_max;
+    }
+
+    
+    int L0=getmin(xlen, ylen); //non-redundant to get_initial1
+    if(L_fr==L0)
+    {
+        int n1= (int)(L0*0.1); //my index starts from 0
+        int n2= (int)(L0*0.89);
+
+        int j=0;
+        for(int i=n1; i<= n2; i++)
+        {
+            ifr[j]=ifr[i];
+            j++;
+        }
+        L_fr=j;
+    }
+
+
+    //gapless threading for the extracted fragment
+    double tmscore, tmscore_max=-1;
+
+    if(Lx<Ly || (Lx==Ly && xlen<=ylen))
+    {
+        int L1=L_fr;
+        int min_len=getmin(L1, ylen);    
+        int min_ali= (int) (min_len/2.5);              //minimum size of considered fragment 
+        if(min_ali<=fra_min1)  min_ali=fra_min1;    
+        int n1, n2;
+        n1 = -ylen+min_ali; 
+        n2 = L1-min_ali;
+
+        int i, j, k;
+        for(k=n1; k<=n2; k+=(fast_opt)?3:1)
+        {
+            //get the map
+            for(j=0; j<ylen; j++)
+            {
+                i=j+k;
+                if(i>=0 && i<L1) y2x_[j]=ifr[i];
+                else             y2x_[j]=-1;
+            }
+
+            //evaluate the map quickly in three iterations
+            tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_,
+                d0, d0_search, t, u);
+
+            if(tmscore>=tmscore_max)
+            {
+                tmscore_max=tmscore;
+                for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
+            }
+        }
+    }
+    else
+    {
+        int L2=L_fr;
+        int min_len=getmin(xlen, L2);    
+        int min_ali= (int) (min_len/2.5);              //minimum size of considered fragment 
+        if(min_ali<=fra_min1)  min_ali=fra_min1;    
+        int n1, n2;
+        n1 = -L2+min_ali; 
+        n2 = xlen-min_ali;
+
+        int i, j, k;    
+
+        for(k=n1; k<=n2; k++)
+        {
+            //get the map
+            for(j=0; j<ylen; j++) y2x_[j]=-1;
+
+            for(j=0; j<L2; j++)
+            {
+                i=j+k;
+                if(i>=0 && i<xlen) y2x_[ifr[j]]=i;
+            }
+        
+            //evaluate the map quickly in three iterations
+            tmscore=get_score_fast(r1, r2, xtm, ytm,
+                x, y, xlen, ylen, y2x_, d0,d0_search, t, u);
+            if(tmscore>=tmscore_max)
+            {
+                tmscore_max=tmscore;
+                for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
+            }
+        }
+    }    
+
+
+    delete [] ifr;
+    delete [] y2x_;
+    return tmscore_max;
+}
+
+//heuristic run of dynamic programing iteratively to find the best alignment
+//input: initial rotation matrix t, u
+//       vectors x and y, d0
+//output: best alignment that maximizes the TMscore, will be stored in invmap
+double DP_iter(double **r1, double **r2, double **xtm, double **ytm,
+    double **xt, bool **path, double **val, double **x, double **y,
+    int xlen, int ylen, double t[3], double u[3][3], int invmap0[],
+    int g1, int g2, int iteration_max, double local_d0_search,
+    double D0_MIN, double Lnorm, double d0, double score_d8)
+{
+    double gap_open[2]={-0.6, 0};
+    double rmsd; 
+    int *invmap=new int[ylen+1];
+    
+    int iteration, i, j, k;
+    double tmscore, tmscore_max, tmscore_old=0;    
+    int score_sum_method=8, simplify_step=40;
+    tmscore_max=-1;
+
+    //double d01=d0+1.5;
+    double d02=d0*d0;
+    for(int g=g1; g<g2; g++)
+    {
+        for(iteration=0; iteration<iteration_max; iteration++)
+        {           
+            NWDP_TM(path, val, x, y, xlen, ylen,
+                t, u, d02, gap_open[g], invmap);
+            
+            k=0;
+            for(j=0; j<ylen; j++) 
+            {
+                i=invmap[j];
+
+                if(i>=0) //aligned
+                {
+                    xtm[k][0]=x[i][0];
+                    xtm[k][1]=x[i][1];
+                    xtm[k][2]=x[i][2];
+                    
+                    ytm[k][0]=y[j][0];
+                    ytm[k][1]=y[j][1];
+                    ytm[k][2]=y[j][2];
+                    k++;
+                }
+            }
+
+            tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u,
+                simplify_step, score_sum_method, &rmsd, local_d0_search,
+                Lnorm, score_d8, d0);
+
+           
+            if(tmscore>tmscore_max)
+            {
+                tmscore_max=tmscore;
+                for(i=0; i<ylen; i++) invmap0[i]=invmap[i];
+            }
+    
+            if(iteration>0)
+            {
+                if(fabs(tmscore_old-tmscore)<0.000001) break;       
+            }
+            tmscore_old=tmscore;
+        }// for iteration           
+        
+    }//for gapopen
+    
+    
+    delete []invmap;
+    return tmscore_max;
+}
+
+
+void output_superpose(const string xname, const string yname,
+    const string fname_super,
+    double t[3], double u[3][3], const int ter_opt, const int mirror_opt,
+    const char *seqM, const char *seqxA, const char *seqyA,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2,
+    const char *chainID1, const char *chainID2,
+    const int xlen, const int ylen, const double d0A, const int n_ali8,
+    const double rmsd, const double TM1, const double Liden)
+{
+    stringstream buf;
+    stringstream buf_all;
+    stringstream buf_atm;
+    stringstream buf_all_atm;
+    stringstream buf_all_atm_lig;
+    stringstream buf_pdb;
+    stringstream buf_pymol;
+    stringstream buf_tm;
+    string line;
+    double x[3];  // before transform
+    double x1[3]; // after transform
+    bool after_ter; // true if passed the "TER" line in PDB
+    string asym_id; // chain ID
+
+    buf_tm<<"REMARK TM-align"
+        <<"\nREMARK Chain 1:"<<setw(11)<<left<<xname+chainID1<<" Size= "<<xlen
+        <<"\nREMARK Chain 2:"<<setw(11)<<yname+chainID2<<right<<" Size= "<<ylen
+        <<" (TM-score is normalized by "<<setw(4)<<ylen<<", d0="
+        <<setiosflags(ios::fixed)<<setprecision(2)<<setw(6)<<d0A<<")"
+        <<"\nREMARK Aligned length="<<setw(4)<<n_ali8<<", RMSD="
+        <<setw(6)<<setiosflags(ios::fixed)<<setprecision(2)<<rmsd
+        <<", TM-score="<<setw(7)<<setiosflags(ios::fixed)<<setprecision(5)<<TM1
+        <<", ID="<<setw(5)<<setiosflags(ios::fixed)<<setprecision(3)
+        <<((n_ali8>0)?Liden/n_ali8:0)<<endl;
+    string rasmol_CA_header="load inline\nselect *A\nwireframe .45\nselect *B\nwireframe .20\nselect all\ncolor white\n";
+    string rasmol_cartoon_header="load inline\nselect all\ncartoon\nselect *A\ncolor blue\nselect *B\ncolor red\nselect ligand\nwireframe 0.25\nselect solvent\nspacefill 0.25\nselect all\nexit\n"+buf_tm.str();
+    buf<<rasmol_CA_header;
+    buf_all<<rasmol_CA_header;
+    buf_atm<<rasmol_cartoon_header;
+    buf_all_atm<<rasmol_cartoon_header;
+    buf_all_atm_lig<<rasmol_cartoon_header;
+
+    /* for PDBx/mmCIF only */
+    map<string,int> _atom_site;
+    int atom_site_pos;
+    vector<string> line_vec;
+    string atom; // 4-character atom name
+    string AA;   // 3-character residue name
+    string resi; // 4-character residue sequence number
+    string inscode; // 1-character insertion code
+    string model_index; // model index
+    bool is_mmcif=false;
+    int chain_num=0;
+
+    /* used for CONECT record of chain1 */
+    int ca_idx1=0; // all CA atoms
+    int lig_idx1=0; // all atoms
+    vector <int> idx_vec;
+
+    /* used for CONECT record of chain2 */
+    int ca_idx2=0; // all CA atoms
+    int lig_idx2=0; // all atoms
+
+    /* extract aligned region */
+    vector<string> resi_aln1;
+    vector<string> resi_aln2;
+    int i1=-1;
+    int i2=-1;
+    int i;
+    for (i=0;i<strlen(seqM);i++)
+    {
+        i1+=(seqxA[i]!='-');
+        i2+=(seqyA[i]!='-');
+        if (seqM[i]==' ') continue;
+        resi_aln1.push_back(resi_vec1[i1].substr(0,4));
+        resi_aln2.push_back(resi_vec2[i2].substr(0,4));
+        if (seqM[i]!=':') continue;
+        buf    <<"select "<<resi_aln1.back()<<":A,"
+               <<resi_aln2.back()<<":B\ncolor red\n";
+        buf_all<<"select "<<resi_aln1.back()<<":A,"
+               <<resi_aln2.back()<<":B\ncolor red\n";
+    }
+    buf<<"select all\nexit\n"<<buf_tm.str();
+    buf_all<<"select all\nexit\n"<<buf_tm.str();
+
+    ifstream fin;
+    /* read first file */
+    after_ter=false;
+    asym_id="";
+    fin.open(xname.c_str());
+    while (fin.good())
+    {
+        getline(fin, line);
+        if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
+        if (is_mmcif==false && line.size()>=54 &&
+           (line.compare(0, 6, "ATOM  ")==0 ||
+            line.compare(0, 6, "HETATM")==0)) // PDB format
+        {
+            x[0]=atof(line.substr(30,8).c_str());
+            x[1]=atof(line.substr(38,8).c_str());
+            x[2]=atof(line.substr(46,8).c_str());
+            if (mirror_opt) x[2]=-x[2];
+            transform(t, u, x, x1);
+            buf_pdb<<line.substr(0,30)<<setiosflags(ios::fixed)
+                <<setprecision(3)
+                <<setw(8)<<x1[0] <<setw(8)<<x1[1] <<setw(8)<<x1[2]
+                <<line.substr(54)<<'\n';
+
+            if (line[16]!='A' && line[16]!=' ') continue;
+            if (after_ter && line.compare(0,6,"ATOM  ")==0) continue;
+            lig_idx1++;
+            buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1
+                <<line.substr(11,9)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
+            if (after_ter || line.compare(0,6,"ATOM  ")) continue;
+            if (ter_opt>=2)
+            {
+                if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1)) 
+                {
+                    after_ter=true;
+                    continue;
+                }
+                asym_id=line[21];
+            }
+            buf_all_atm<<"ATOM  "<<setw(5)<<lig_idx1
+                <<line.substr(11,9)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
+            if (find(resi_aln1.begin(),resi_aln1.end(),line.substr(22,4)
+                )!=resi_aln1.end())
+            {
+                buf_atm<<"ATOM  "<<setw(5)<<lig_idx1
+                    <<line.substr(11,9)<<" A"<<line.substr(22,8)
+                    <<setiosflags(ios::fixed)<<setprecision(3)
+                    <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
+            }
+            if (line.substr(12,4)!=" CA ") continue;
+            ca_idx1++;
+            buf_all<<"ATOM  "<<setw(5)<<ca_idx1
+                <<"  CA  "<<line.substr(17,3)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
+            if (find(resi_aln1.begin(),resi_aln1.end(),line.substr(22,4)
+                )==resi_aln1.end()) continue;
+            buf<<"ATOM  "<<setw(5)<<ca_idx1
+                <<"  CA  "<<line.substr(17,3)<<" A"<<line.substr(22,8)
+                <<setiosflags(ios::fixed)<<setprecision(3)
+                <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
+            idx_vec.push_back(ca_idx1);
+        }
+        else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
+        {
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                if (line.size()) break;
+            }
+            if (line.compare(0,11,"_atom_site.")) continue;
+            _atom_site.clear();
+            atom_site_pos=0;
+            _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
+                if (line.size()==0) continue;
+                if (line.compare(0,11,"_atom_site.")) break;
+                _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
+            }
+
+            if (is_mmcif==false)
+            {
+                buf_pdb.str(string());
+                is_mmcif=true;
+            }
+
+            while(1)
+            {
+                line_vec.clear();
+                split(line,line_vec);
+                if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
+                    line_vec[_atom_site["group_PDB"]]!="HETATM") break;
+                if (_atom_site.count("pdbx_PDB_model_num"))
+                {
+                    if (model_index.size() && model_index!=
+                        line_vec[_atom_site["pdbx_PDB_model_num"]])
+                        break;
+                    model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
+                }
+
+                x[0]=atof(line_vec[_atom_site["Cartn_x"]].c_str());
+                x[1]=atof(line_vec[_atom_site["Cartn_y"]].c_str());
+                x[2]=atof(line_vec[_atom_site["Cartn_z"]].c_str());
+                if (mirror_opt) x[2]=-x[2];
+                transform(t, u, x, x1);
+
+                if (_atom_site.count("label_alt_id")==0 || 
+                    line_vec[_atom_site["label_alt_id"]]=="." ||
+                    line_vec[_atom_site["label_alt_id"]]=="A")
+                {
+                    atom=line_vec[_atom_site["label_atom_id"]];
+                    if (atom[0]=='"') atom=atom.substr(1);
+                    if (atom.size() && atom[atom.size()-1]=='"')
+                        atom=atom.substr(0,atom.size()-1);
+                    if      (atom.size()==0) atom="    ";
+                    else if (atom.size()==1) atom=" "+atom+"  ";
+                    else if (atom.size()==2) atom=" "+atom+" ";
+                    else if (atom.size()==3) atom=" "+atom;
+                    else if (atom.size()>=5) atom=atom.substr(0,4);
+            
+                    AA=line_vec[_atom_site["label_comp_id"]]; // residue name
+                    if      (AA.size()==1) AA="  "+AA;
+                    else if (AA.size()==2) AA=" " +AA;
+                    else if (AA.size()>=4) AA=AA.substr(0,3);
+                
+                    if (_atom_site.count("auth_seq_id"))
+                        resi=line_vec[_atom_site["auth_seq_id"]];
+                    else resi=line_vec[_atom_site["label_seq_id"]];
+                    while (resi.size()<4) resi=' '+resi;
+                    if (resi.size()>4) resi=resi.substr(0,4);
+                
+                    inscode=' ';
+                    if (_atom_site.count("pdbx_PDB_ins_code") && 
+                        line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                        inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+                    
+                    if (_atom_site.count("auth_asym_id"))
+                    {
+                        if (ter_opt>=2 && ca_idx1 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["auth_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["auth_asym_id"]];
+                    }
+                    else if (_atom_site.count("label_asym_id"))
+                    {
+                        if (ter_opt>=2 && ca_idx1 && asym_id.size() && 
+                            asym_id!=line_vec[_atom_site["label_asym_id"]])
+                            after_ter=true;
+                        asym_id=line_vec[_atom_site["label_asym_id"]];
+                    }
+                    buf_pdb<<left<<setw(6)
+                        <<line_vec[_atom_site["group_PDB"]]<<right
+                        <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
+                        <<AA<<" "<<asym_id[asym_id.size()-1]
+                        <<resi<<inscode<<"   "
+                        <<setiosflags(ios::fixed)<<setprecision(3)
+                        <<setw(8)<<x1[0]
+                        <<setw(8)<<x1[1]
+                        <<setw(8)<<x1[2]<<'\n';
+
+                    if (after_ter==false ||
+                        line_vec[_atom_site["group_pdb"]]=="HETATM")
+                    {
+                        lig_idx1++;
+                        buf_all_atm_lig<<left<<setw(6)
+                            <<line_vec[_atom_site["group_PDB"]]<<right
+                            <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
+                            <<AA<<" A"<<resi<<inscode<<"   "
+                            <<setiosflags(ios::fixed)<<setprecision(3)
+                            <<setw(8)<<x1[0]
+                            <<setw(8)<<x1[1]
+                            <<setw(8)<<x1[2]<<'\n';
+                        if (after_ter==false &&
+                            line_vec[_atom_site["group_PDB"]]=="ATOM")
+                        {
+                            buf_all_atm<<"ATOM  "<<setw(6)
+                                <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
+                                <<AA<<" A"<<resi<<inscode<<"   "
+                                <<setiosflags(ios::fixed)<<setprecision(3)
+                                <<setw(8)<<x1[0]
+                                <<setw(8)<<x1[1]
+                                <<setw(8)<<x1[2]<<'\n';
+                            if (find(resi_aln1.begin(),resi_aln1.end(),resi
+                                )!=resi_aln1.end())
+                            {
+                                buf_atm<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<lig_idx1%100000<<' '
+                                    <<atom<<' '<<AA<<" A"<<resi<<inscode<<"   "
+                                    <<setiosflags(ios::fixed)<<setprecision(3)
+                                    <<setw(8)<<x1[0]
+                                    <<setw(8)<<x1[1]
+                                    <<setw(8)<<x1[2]<<'\n';
+                            }
+                            if (atom==" CA ")
+                            {
+                                ca_idx1++;
+                                buf_all<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<ca_idx1%100000<<"  CA  "
+                                    <<AA<<" A"<<resi<<inscode<<"   "
+                                    <<setiosflags(ios::fixed)<<setprecision(3)
+                                    <<setw(8)<<x1[0]
+                                    <<setw(8)<<x1[1]
+                                    <<setw(8)<<x1[2]<<'\n';
+                                if (find(resi_aln1.begin(),resi_aln1.end(),resi
+                                    )!=resi_aln1.end())
+                                {
+                                    buf<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<ca_idx1%100000<<"  CA  "
+                                    <<AA<<" A"<<resi<<inscode<<"   "
+                                    <<setiosflags(ios::fixed)<<setprecision(3)
+                                    <<setw(8)<<x1[0]
+                                    <<setw(8)<<x1[1]
+                                    <<setw(8)<<x1[2]<<'\n';
+                                    idx_vec.push_back(ca_idx1);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                while(1)
+                {
+                    if (fin.good()) getline(fin, line);
+                    else break;
+                    if (line.size()) break;
+                }
+            }
+        }
+        else if (line.size() && is_mmcif==false)
+        {
+            buf_pdb<<line<<'\n';
+            if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
+        }
+    }
+    fin.close();
+    buf<<"TER\n";
+    buf_all<<"TER\n";
+    buf_atm<<"TER\n";
+    buf_all_atm<<"TER\n";
+    buf_all_atm_lig<<"TER\n";
+    for (i=1;i<ca_idx1;i++) buf_all<<"CONECT"
+        <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
+    for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
+        <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
+    idx_vec.clear();
+
+    /* read second file */
+    after_ter=false;
+    asym_id="";
+    fin.open(yname.c_str());
+    while (fin.good())
+    {
+        getline(fin, line);
+        if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
+        if (line.size()>=54 && (line.compare(0, 6, "ATOM  ")==0 ||
+            line.compare(0, 6, "HETATM")==0)) // PDB format
+        {
+            if (line[16]!='A' && line[16]!=' ') continue;
+            if (after_ter && line.compare(0,6,"ATOM  ")==0) continue;
+            lig_idx2++;
+            buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1+lig_idx2
+                <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
+            if (after_ter || line.compare(0,6,"ATOM  ")) continue;
+            if (ter_opt>=2)
+            {
+                if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1))
+                {
+                    after_ter=true;
+                    continue;
+                }
+                asym_id=line[21];
+            }
+            buf_all_atm<<"ATOM  "<<setw(5)<<lig_idx1+lig_idx2
+                <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
+            if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4)
+                )!=resi_aln2.end())
+            {
+                buf_atm<<"ATOM  "<<setw(5)<<lig_idx1+lig_idx2
+                    <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
+            }
+            if (line.substr(12,4)!=" CA ") continue;
+            ca_idx2++;
+            buf_all<<"ATOM  "<<setw(5)<<ca_idx1+ca_idx2<<"  CA  "
+                <<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n';
+            if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4)
+                )==resi_aln2.end()) continue;
+            buf<<"ATOM  "<<setw(5)<<ca_idx1+ca_idx2<<"  CA  "
+                <<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n';
+            idx_vec.push_back(ca_idx1+ca_idx2);
+        }
+        else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
+        {
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
+                if (line.size()) break;
+            }
+            if (line.compare(0,11,"_atom_site.")) continue;
+            _atom_site.clear();
+            atom_site_pos=0;
+            _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
+            while(1)
+            {
+                if (fin.good()) getline(fin, line);
+                else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
+                if (line.size()==0) continue;
+                if (line.compare(0,11,"_atom_site.")) break;
+                _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
+            }
+
+            while(1)
+            {
+                line_vec.clear();
+                split(line,line_vec);
+                if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
+                    line_vec[_atom_site["group_PDB"]]!="HETATM") break;
+                if (_atom_site.count("pdbx_PDB_model_num"))
+                {
+                    if (model_index.size() && model_index!=
+                        line_vec[_atom_site["pdbx_PDB_model_num"]])
+                        break;
+                    model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
+                }
+
+                if (_atom_site.count("label_alt_id")==0 || 
+                    line_vec[_atom_site["label_alt_id"]]=="." ||
+                    line_vec[_atom_site["label_alt_id"]]=="A")
+                {
+                    atom=line_vec[_atom_site["label_atom_id"]];
+                    if (atom[0]=='"') atom=atom.substr(1);
+                    if (atom.size() && atom[atom.size()-1]=='"')
+                        atom=atom.substr(0,atom.size()-1);
+                    if      (atom.size()==0) atom="    ";
+                    else if (atom.size()==1) atom=" "+atom+"  ";
+                    else if (atom.size()==2) atom=" "+atom+" ";
+                    else if (atom.size()==3) atom=" "+atom;
+                    else if (atom.size()>=5) atom=atom.substr(0,4);
+            
+                    AA=line_vec[_atom_site["label_comp_id"]]; // residue name
+                    if      (AA.size()==1) AA="  "+AA;
+                    else if (AA.size()==2) AA=" " +AA;
+                    else if (AA.size()>=4) AA=AA.substr(0,3);
+                
+                    if (_atom_site.count("auth_seq_id"))
+                        resi=line_vec[_atom_site["auth_seq_id"]];
+                    else resi=line_vec[_atom_site["label_seq_id"]];
+                    while (resi.size()<4) resi=' '+resi;
+                    if (resi.size()>4) resi=resi.substr(0,4);
+                
+                    inscode=' ';
+                    if (_atom_site.count("pdbx_PDB_ins_code") && 
+                        line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
+                        inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
+                    
+                    if (ter_opt>=2)
+                    {
+                        if (_atom_site.count("auth_asym_id"))
+                        {
+                            if (ca_idx2 && asym_id.size() && 
+                                asym_id!=line_vec[_atom_site["auth_asym_id"]])
+                                after_ter=true;
+                            else
+                                asym_id=line_vec[_atom_site["auth_asym_id"]];
+                        }
+                        else if (_atom_site.count("label_asym_id"))
+                        {
+                            if (ca_idx2 && asym_id.size() && 
+                                asym_id!=line_vec[_atom_site["label_asym_id"]])
+                                after_ter=true;
+                            else
+                                asym_id=line_vec[_atom_site["label_asym_id"]];
+                        }
+                    }
+                    if (after_ter==false || 
+                        line_vec[_atom_site["group_PDB"]]=="HETATM")
+                    {
+                        lig_idx2++;
+                        buf_all_atm_lig<<left<<setw(6)
+                            <<line_vec[_atom_site["group_PDB"]]<<right
+                            <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
+                            <<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                            <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                            <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                            <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                            <<'\n';
+                        if (after_ter==false &&
+                            line_vec[_atom_site["group_PDB"]]=="ATOM")
+                        {
+                            buf_all_atm<<"ATOM  "<<setw(6)
+                                <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
+                                <<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                <<'\n';
+                            if (find(resi_aln2.begin(),resi_aln2.end(),resi
+                                    )!=resi_aln2.end())
+                            {
+                                buf_atm<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
+                                    <<atom<<' '<<AA<<" B"<<resi<<inscode<<"   "
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                    <<'\n';
+                            }
+                            if (atom==" CA ")
+                            {
+                                ca_idx2++;
+                                buf_all<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<(ca_idx1+ca_idx2)%100000
+                                    <<"  CA  "<<AA<<" B"<<resi<<inscode<<"   "
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                    <<'\n';
+                                if (find(resi_aln2.begin(),resi_aln2.end(),resi
+                                    )!=resi_aln2.end())
+                                {
+                                    buf<<"ATOM  "<<setw(6)
+                                    <<setw(5)<<(ca_idx1+ca_idx2)%100000
+                                    <<"  CA  "<<AA<<" B"<<resi<<inscode<<"   "
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
+                                    <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
+                                    <<'\n';
+                                    idx_vec.push_back(ca_idx1+ca_idx2);
+                                }
+                            }
+                        }
+                    }
+                }
+
+                if (fin.good()) getline(fin, line);
+                else break;
+            }
+        }
+        else if (line.size())
+        {
+            if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
+        }
+    }
+    fin.close();
+    buf<<"TER\n";
+    buf_all<<"TER\n";
+    buf_atm<<"TER\n";
+    buf_all_atm<<"TER\n";
+    buf_all_atm_lig<<"TER\n";
+    for (i=ca_idx1+1;i<ca_idx1+ca_idx2;i++) buf_all<<"CONECT"
+        <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
+    for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
+        <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
+    idx_vec.clear();
+
+    /* write pymol script */
+    ofstream fp;
+    vector<string> pml_list;
+    pml_list.push_back(fname_super+"");
+    pml_list.push_back(fname_super+"_atm");
+    pml_list.push_back(fname_super+"_all");
+    pml_list.push_back(fname_super+"_all_atm");
+    pml_list.push_back(fname_super+"_all_atm_lig");
+    for (i=0;i<pml_list.size();i++)
+    {
+        buf_pymol<<"#!/usr/bin/env pymol\n"
+            <<"load "<<pml_list[i]<<"\n"
+            <<"hide all\n"
+            <<((i==0 || i==2)?("show stick\n"):("show cartoon\n"))
+            <<"color blue, chain A\n"
+            <<"color red, chain B\n"
+            <<"set ray_shadow, 0\n"
+            <<"set stick_radius, 0.3\n"
+            <<"set sphere_scale, 0.25\n"
+            <<"show stick, not polymer\n"
+            <<"show sphere, not polymer\n"
+            <<"bg_color white\n"
+            <<"set transparency=0.2\n"
+            <<"zoom polymer\n"
+            <<endl;
+        fp.open((pml_list[i]+".pml").c_str());
+        fp<<buf_pymol.str();
+        fp.close();
+        buf_pymol.str(string());
+        pml_list[i].clear();
+    }
+    pml_list.clear();
+    
+    /* write rasmol script */
+    fp.open((fname_super).c_str());
+    fp<<buf.str();
+    fp.close();
+    fp.open((fname_super+"_all").c_str());
+    fp<<buf_all.str();
+    fp.close();
+    fp.open((fname_super+"_atm").c_str());
+    fp<<buf_atm.str();
+    fp.close();
+    fp.open((fname_super+"_all_atm").c_str());
+    fp<<buf_all_atm.str();
+    fp.close();
+    fp.open((fname_super+"_all_atm_lig").c_str());
+    fp<<buf_all_atm_lig.str();
+    fp.close();
+    fp.open((fname_super+".pdb").c_str());
+    fp<<buf_pdb.str();
+    fp.close();
+
+    /* clear stream */
+    buf.str(string());
+    buf_all.str(string());
+    buf_atm.str(string());
+    buf_all_atm.str(string());
+    buf_all_atm_lig.str(string());
+    buf_pdb.str(string());
+    buf_tm.str(string());
+    resi_aln1.clear();
+    resi_aln2.clear();
+    asym_id.clear();
+    line_vec.clear();
+    atom.clear();
+    AA.clear();
+    resi.clear();
+    inscode.clear();
+    model_index.clear();
+}
+
+/* extract rotation matrix based on TMscore8 */
+void output_rotation_matrix(const char* fname_matrix,
+    const double t[3], const double u[3][3])
+{
+    fstream fout;
+    fout.open(fname_matrix, ios::out | ios::trunc);
+    if (fout)// succeed
+    {
+        fout << "------ The rotation matrix to rotate Chain_1 to Chain_2 ------\n";
+        char dest[1000];
+        sprintf(dest, "m %18s %14s %14s %14s\n", "t[m]", "u[m][0]", "u[m][1]", "u[m][2]");
+        fout << string(dest);
+        for (int k = 0; k < 3; k++)
+        {
+            sprintf(dest, "%d %18.10f %14.10f %14.10f %14.10f\n", k, t[k], u[k][0], u[k][1], u[k][2]);
+            fout << string(dest);
+        }
+        fout << "\nCode for rotating Structure A from (x,y,z) to (X,Y,Z):\n"
+                "for(i=0; i<L; i++)\n"
+                "{\n"
+                "   X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];\n"
+                "   Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];\n"
+                "   Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];\n"
+                "}\n";
+        fout.close();
+    }
+    else
+        cout << "Open file to output rotation matrix fail.\n";
+}
+
+//output the final results
+void output_results(
+    const string xname, const string yname,
+    const char *chainID1, const char *chainID2,
+    const int xlen, const int ylen, double t[3], double u[3][3],
+    const double TM1, const double TM2,
+    const double TM3, const double TM4, const double TM5,
+    const double rmsd, const double d0_out,
+    const char *seqM, const char *seqxA, const char *seqyA, const double Liden,
+    const int n_ali8, const int L_ali,
+    const double TM_ali, const double rmsd_ali, const double TM_0,
+    const double d0_0, const double d0A, const double d0B,
+    const double Lnorm_ass, const double d0_scale, 
+    const double d0a, const double d0u, const char* fname_matrix,
+    const int outfmt_opt, const int ter_opt, const string fname_super,
+    const int i_opt, const int a_opt, const bool u_opt, const bool d_opt,
+    const int mirror_opt,
+    const vector<string>&resi_vec1, const vector<string>&resi_vec2)
+{
+    if (outfmt_opt<=0)
+    {
+        printf("\nName of Chain_1: %s%s (to be superimposed onto Chain_2)\n",
+            xname.c_str(), chainID1);
+        printf("Name of Chain_2: %s%s\n", yname.c_str(), chainID2);
+        printf("Length of Chain_1: %d residues\n", xlen);
+        printf("Length of Chain_2: %d residues\n\n", ylen);
+
+        if (i_opt)
+            printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali);
+
+        printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
+        printf("TM-score= %6.5f (if normalized by length of Chain_1, i.e., LN=%d, d0=%.2f)\n", TM2, xlen, d0B);
+        printf("TM-score= %6.5f (if normalized by length of Chain_2, i.e., LN=%d, d0=%.2f)\n", TM1, ylen, d0A);
+
+        if (a_opt==1)
+            printf("TM-score= %6.5f (if normalized by average length of two structures, i.e., LN= %.1f, d0= %.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
+        if (u_opt)
+            printf("TM-score= %6.5f (if normalized by user-specified LN=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u);
+        if (d_opt)
+            printf("TM-score= %6.5f (if scaled by user-specified d0= %.2f, and LN= %d)\n", TM5, d0_scale, ylen);
+        printf("(You should use TM-score normalized by length of the reference structure)\n");
+    
+        //output alignment
+        printf("\n(\":\" denotes residue pairs of d < %4.1f Angstrom, ", d0_out);
+        printf("\".\" denotes other aligned residues)\n");
+        printf("%s\n", seqxA);
+        printf("%s\n", seqM);
+        printf("%s\n", seqyA);
+    }
+    else if (outfmt_opt==1)
+    {
+        printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
+            xname.c_str(), chainID1, xlen, d0B, Liden/xlen, TM2);
+        printf("%s\n", seqxA);
+        printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
+            yname.c_str(), chainID2, ylen, d0A, Liden/ylen, TM1);
+        printf("%s\n", seqyA);
+
+        printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n",
+            n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
+
+        if (i_opt)
+            printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali);
+
+        if(a_opt)
+            printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
+
+        if(u_opt)
+            printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u);
+
+        if(d_opt)
+            printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen);
+
+        printf("$$$$\n");
+    }
+    else if (outfmt_opt==2)
+    {
+        printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d",
+            xname.c_str(), chainID1, yname.c_str(), chainID2, TM2, TM1, rmsd,
+            Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0,
+            xlen, ylen, n_ali8);
+    }
+    cout << endl;
+
+    if (strlen(fname_matrix)) 
+        output_rotation_matrix(fname_matrix, t, u);
+    if (fname_super.size())
+        output_superpose(xname, yname, fname_super, t, u, ter_opt, mirror_opt,
+            seqM, seqxA, seqyA, resi_vec1, resi_vec2, chainID1, chainID2,
+            xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden);
+}
+
+double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm,
+    double **xt, double **x, double **y, int xlen, int ylen, int invmap[],
+    int& L_ali, double& RMSD, double D0_MIN, double Lnorm, double d0,
+    double d0_search, double score_d8, double t[3], double u[3][3],
+    const int mol_type)
+{
+    D0_MIN = 0.5;
+    Lnorm = ylen;
+    if (mol_type>0) // RNA
+    {
+        if     (Lnorm<=11) d0=0.3; 
+        else if(Lnorm>11 && Lnorm<=15) d0=0.4;
+        else if(Lnorm>15 && Lnorm<=19) d0=0.5;
+        else if(Lnorm>19 && Lnorm<=23) d0=0.6;
+        else if(Lnorm>23 && Lnorm<30)  d0=0.7;
+        else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5);
+    }
+    else
+    {
+        if (Lnorm > 21) d0=(1.24*pow((Lnorm*1.0-15), 1.0/3) -1.8);
+        else d0 = D0_MIN;
+        if (d0 < D0_MIN) d0 = D0_MIN;
+    }
+    double d0_input = d0;// Scaled by seq_min
+
+    double tmscore;// collected alined residues from invmap
+    int n_al = 0;
+    int i;
+    for (int j = 0; j<ylen; j++)
+    {
+        i = invmap[j];
+        if (i >= 0)
+        {
+            xtm[n_al][0] = x[i][0];
+            xtm[n_al][1] = x[i][1];
+            xtm[n_al][2] = x[i][2];
+
+            ytm[n_al][0] = y[j][0];
+            ytm[n_al][1] = y[j][1];
+            ytm[n_al][2] = y[j][2];
+
+            r1[n_al][0] = x[i][0];
+            r1[n_al][1] = x[i][1];
+            r1[n_al][2] = x[i][2];
+
+            r2[n_al][0] = y[j][0];
+            r2[n_al][1] = y[j][1];
+            r2[n_al][2] = y[j][2];
+
+            n_al++;
+        }
+        else if (i != -1) PrintErrorAndQuit("Wrong map!\n");
+    }
+    L_ali = n_al;
+
+    Kabsch(r1, r2, n_al, 0, &RMSD, t, u);
+    RMSD = sqrt( RMSD/(1.0*n_al) );
+    
+    int temp_simplify_step = 1;
+    int temp_score_sum_method = 0;
+    d0_search = d0_input;
+    double rms = 0.0;
+    tmscore = TMscore8_search_standard(r1, r2, xtm, ytm, xt, n_al, t, u,
+        temp_simplify_step, temp_score_sum_method, &rms, d0_input,
+        score_d8, d0);
+    tmscore = tmscore * n_al / (1.0*Lnorm);
+
+    return tmscore;
+}
+
+/* copy the value of t and u into t0,u0 */
+void copy_t_u(double t[3], double u[3][3], double t0[3], double u0[3][3])
+{
+    int i,j;
+    for (i=0;i<3;i++)
+    {
+        t0[i]=t[i];
+        for (j=0;j<3;j++) u0[i][j]=u[i][j];
+    }
+}
+
+/* calculate approximate TM-score given rotation matrix */
+double approx_TM(const int xlen, const int ylen, const int a_opt,
+    double **xa, double **ya, double t[3], double u[3][3],
+    const int invmap0[], const int mol_type)
+{
+    double Lnorm_0=ylen; // normalized by the second protein
+    if (a_opt==-2 && xlen>ylen) Lnorm_0=xlen;      // longer
+    else if (a_opt==-1 && xlen<ylen) Lnorm_0=xlen; // shorter
+    else if (a_opt==1) Lnorm_0=(xlen+ylen)/2.;     // average
+    
+    double D0_MIN;
+    double Lnorm;
+    double d0;
+    double d0_search;
+    parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+    double TMtmp=0;
+    double d;
+    double xtmp[3]={0,0,0};
+
+    for(int i=0,j=0; j<ylen; j++)
+    {
+        i=invmap0[j];
+        if(i>=0)//aligned
+        {
+            transform(t, u, &xa[i][0], &xtmp[0]);
+            d=sqrt(dist(&xtmp[0], &ya[j][0]));
+            TMtmp+=1/(1+(d/d0)*(d/d0));
+            //if (d <= score_d8) TMtmp+=1/(1+(d/d0)*(d/d0));
+        }
+    }
+    TMtmp/=Lnorm_0;
+    return TMtmp;
+}
+
+void clean_up_after_approx_TM(int *invmap0, int *invmap,
+    double **score, bool **path, double **val, double **xtm, double **ytm,
+    double **xt, double **r1, double **r2, const int xlen, const int minlen)
+{
+    delete [] invmap0;
+    delete [] invmap;
+    DeleteArray(&score, xlen+1);
+    DeleteArray(&path, xlen+1);
+    DeleteArray(&val, xlen+1);
+    DeleteArray(&xtm, minlen);
+    DeleteArray(&ytm, minlen);
+    DeleteArray(&xt, xlen);
+    DeleteArray(&r1, minlen);
+    DeleteArray(&r2, minlen);
+    return;
+}
+
+/* Entry function for TM-align. Return TM-score calculation status:
+ * 0   - full TM-score calculation 
+ * 1   - terminated due to exception
+ * 2-7 - pre-terminated due to low TM-score */
+int TMalign_main(double **xa, double **ya,
+    const char *seqx, const char *seqy, const char *secx, const char *secy,
+    double t0[3], double u0[3][3],
+    double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
+    double &d0_0, double &TM_0,
+    double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
+    string &seqM, string &seqxA, string &seqyA,
+    double &rmsd0, int &L_ali, double &Liden,
+    double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
+    const int xlen, const int ylen,
+    const vector<string> sequence, const double Lnorm_ass,
+    const double d0_scale, const int i_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const bool fast_opt,
+    const int mol_type, const double TMcut=-1)
+{
+    double D0_MIN;        //for d0
+    double Lnorm;         //normalization length
+    double score_d8,d0,d0_search,dcu0;//for TMscore search
+    double t[3], u[3][3]; //Kabsch translation vector and rotation matrix
+    double **score;       // Input score table for dynamic programming
+    bool   **path;        // for dynamic programming  
+    double **val;         // for dynamic programming  
+    double **xtm, **ytm;  // for TMscore search engine
+    double **xt;          //for saving the superposed version of r_1 or xtm
+    double **r1, **r2;    // for Kabsch rotation
+
+    /***********************/
+    /* allocate memory     */
+    /***********************/
+    int minlen = min(xlen, ylen);
+    NewArray(&score, xlen+1, ylen+1);
+    NewArray(&path, xlen+1, ylen+1);
+    NewArray(&val, xlen+1, ylen+1);
+    NewArray(&xtm, minlen, 3);
+    NewArray(&ytm, minlen, 3);
+    NewArray(&xt, xlen, 3);
+    NewArray(&r1, minlen, 3);
+    NewArray(&r2, minlen, 3);
+
+    /***********************/
+    /*    parameter set    */
+    /***********************/
+    parameter_set4search(xlen, ylen, D0_MIN, Lnorm, 
+        score_d8, d0, d0_search, dcu0);
+    int simplify_step    = 40; //for similified search engine
+    int score_sum_method = 8;  //for scoring method, whether only sum over pairs with dis<score_d8
+
+    int i;
+    int *invmap0         = new int[ylen+1];
+    int *invmap          = new int[ylen+1];
+    double TM, TMmax=-1;
+    for(i=0; i<ylen; i++) invmap0[i]=-1;
+
+    double ddcc=0.4;
+    if (Lnorm <= 40) ddcc=0.1;   //Lnorm was setted in parameter_set4search
+    double local_d0_search = d0_search;
+
+    //************************************************//
+    //    get initial alignment from user's input:    //
+    //    Stick to the initial alignment              //
+    //************************************************//
+    bool bAlignStick = false;
+    if (i_opt==3)// if input has set parameter for "-I"
+    {
+        // In the original code, this loop starts from 1, which is
+        // incorrect. Fortran starts from 1 but C++ should starts from 0.
+        for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
+            invmap[j] = -1;
+
+        int i1 = -1;// in C version, index starts from zero, not from one
+        int i2 = -1;
+        int L1 = sequence[0].size();
+        int L2 = sequence[1].size();
+        int L = min(L1, L2);// Get positions for aligned residues
+        for (int kk1 = 0; kk1 < L; kk1++)
+        {
+            if (sequence[0][kk1] != '-') i1++;
+            if (sequence[1][kk1] != '-')
+            {
+                i2++;
+                if (i2 >= ylen || i1 >= xlen) kk1 = L;
+                else if (sequence[0][kk1] != '-') invmap[i2] = i1;
+            }
+        }
+
+        //--------------- 2. Align proteins from original alignment
+        double prevD0_MIN = D0_MIN;// stored for later use
+        int prevLnorm = Lnorm;
+        double prevd0 = d0;
+        TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+            invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8,
+            t, u, mol_type);
+        D0_MIN = prevD0_MIN;
+        Lnorm = prevLnorm;
+        d0 = prevd0;
+        TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+            invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0);
+        if (TM > TMmax)
+        {
+            TMmax = TM;
+            for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+        }
+        bAlignStick = true;
+    }
+
+    /******************************************************/
+    /*    get initial alignment with gapless threading    */
+    /******************************************************/
+    if (!bAlignStick)
+    {
+        get_initial(r1, r2, xtm, ytm, xa, ya, xlen, ylen, invmap0, d0,
+            d0_search, fast_opt, t, u);
+        TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap0,
+            t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
+            score_d8, d0);
+        if (TM>TMmax) TMmax = TM;
+        if (TMcut>0) copy_t_u(t, u, t0, u0);
+        //run dynamic programing iteratively to find the best alignment
+        TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, xlen, ylen,
+             t, u, invmap, 0, 2, (fast_opt)?2:30, local_d0_search,
+             D0_MIN, Lnorm, d0, score_d8);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            if (TMcut>0) copy_t_u(t, u, t0, u0);
+        }
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.5*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 2;
+            }
+        }
+
+        /************************************************************/
+        /*    get initial alignment based on secondary structure    */
+        /************************************************************/
+        get_initial_ss(path, val, secx, secy, xlen, ylen, invmap);
+        TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
+            t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
+            score_d8, d0);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            if (TMcut>0) copy_t_u(t, u, t0, u0);
+        }
+        if (TM > TMmax*0.2)
+        {
+            TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
+                local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                if (TMcut>0) copy_t_u(t, u, t0, u0);
+            }
+        }
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.52*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 3;
+            }
+        }
+
+        /************************************************************/
+        /*    get initial alignment based on local superposition    */
+        /************************************************************/
+        //=initial5 in original TM-align
+        if (get_initial5( r1, r2, xtm, ytm, path, val, xa, ya,
+            xlen, ylen, invmap, d0, d0_search, fast_opt, D0_MIN))
+        {
+            TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+                invmap, t, u, simplify_step, score_sum_method,
+                local_d0_search, Lnorm, score_d8, d0);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                if (TMcut>0) copy_t_u(t, u, t0, u0);
+            }
+            if (TM > TMmax*ddcc)
+            {
+                TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                    xlen, ylen, t, u, invmap, 0, 2, 2, local_d0_search,
+                    D0_MIN, Lnorm, d0, score_d8);
+                if (TM>TMmax)
+                {
+                    TMmax = TM;
+                    for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                    if (TMcut>0) copy_t_u(t, u, t0, u0);
+                }
+            }
+        }
+        else
+            cerr << "\n\nWarning: initial alignment from local superposition fail!\n\n" << endl;
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.54*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 4;
+            }
+        }
+
+        /********************************************************************/
+        /* get initial alignment by local superposition+secondary structure */
+        /********************************************************************/
+        //=initial3 in original TM-align
+        get_initial_ssplus(r1, r2, score, path, val, secx, secy, xa, ya,
+            xlen, ylen, invmap0, invmap, D0_MIN, d0);
+        TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
+             t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
+             score_d8, d0);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            if (TMcut>0) copy_t_u(t, u, t0, u0);
+        }
+        if (TM > TMmax*ddcc)
+        {
+            TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
+                local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                if (TMcut>0) copy_t_u(t, u, t0, u0);
+            }
+        }
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.56*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 5;
+            }
+        }
+
+        /*******************************************************************/
+        /*    get initial alignment based on fragment gapless threading    */
+        /*******************************************************************/
+        //=initial4 in original TM-align
+        get_initial_fgt(r1, r2, xtm, ytm, xa, ya, xlen, ylen,
+            invmap, d0, d0_search, dcu0, fast_opt, t, u);
+        TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
+            t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
+            score_d8, d0);
+        if (TM>TMmax)
+        {
+            TMmax = TM;
+            for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            if (TMcut>0) copy_t_u(t, u, t0, u0);
+        }
+        if (TM > TMmax*ddcc)
+        {
+            TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                xlen, ylen, t, u, invmap, 1, 2, 2, local_d0_search, D0_MIN,
+                Lnorm, d0, score_d8);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+                if (TMcut>0) copy_t_u(t, u, t0, u0);
+            }
+        }
+
+        if (TMcut>0) // pre-terminate if TM-score is too low
+        {
+            double TMtmp=approx_TM(xlen, ylen, a_opt,
+                xa, ya, t0, u0, invmap0, mol_type);
+
+            if (TMtmp<0.58*TMcut)
+            {
+                TM1=TM2=TM3=TM4=TM5=TMtmp;
+                clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                    xtm, ytm, xt, r1, r2, xlen, minlen);
+                return 6;
+            }
+        }
+
+        //************************************************//
+        //    get initial alignment from user's input:    //
+        //************************************************//
+        if (i_opt==1)// if input has set parameter for "-i"
+        {
+            for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
+                invmap[j] = -1;
+
+            int i1 = -1;// in C version, index starts from zero, not from one
+            int i2 = -1;
+            int L1 = sequence[0].size();
+            int L2 = sequence[1].size();
+            int L = min(L1, L2);// Get positions for aligned residues
+            for (int kk1 = 0; kk1 < L; kk1++)
+            {
+                if (sequence[0][kk1] != '-')
+                    i1++;
+                if (sequence[1][kk1] != '-')
+                {
+                    i2++;
+                    if (i2 >= ylen || i1 >= xlen) kk1 = L;
+                    else if (sequence[0][kk1] != '-') invmap[i2] = i1;
+                }
+            }
+
+            //--------------- 2. Align proteins from original alignment
+            double prevD0_MIN = D0_MIN;// stored for later use
+            int prevLnorm = Lnorm;
+            double prevd0 = d0;
+            TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya,
+                xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0,
+                d0_search, score_d8, t, u, mol_type);
+            D0_MIN = prevD0_MIN;
+            Lnorm = prevLnorm;
+            d0 = prevd0;
+
+            TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya,
+                xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm,
+                score_d8, d0);
+            if (TM > TMmax)
+            {
+                TMmax = TM;
+                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            }
+            // Different from get_initial, get_initial_ss and get_initial_ssplus
+            TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
+                xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
+                local_d0_search, D0_MIN, Lnorm, d0, score_d8);
+            if (TM>TMmax)
+            {
+                TMmax = TM;
+                for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
+            }
+        }
+    }
+
+
+
+    //*******************************************************************//
+    //    The alignment will not be changed any more in the following    //
+    //*******************************************************************//
+    //check if the initial alignment is generated approriately
+    bool flag=false;
+    for(i=0; i<ylen; i++)
+    {
+        if(invmap0[i]>=0)
+        {
+            flag=true;
+            break;
+        }
+    }
+    if(!flag)
+    {
+        cout << "There is no alignment between the two proteins!" << endl;
+        cout << "Program stop with no result!" << endl;
+        return 1;
+    }
+
+    /* last TM-score pre-termination */
+    if (TMcut>0)
+    {
+        double TMtmp=approx_TM(xlen, ylen, a_opt,
+            xa, ya, t0, u0, invmap0, mol_type);
+
+        if (TMtmp<0.6*TMcut)
+        {
+            TM1=TM2=TM3=TM4=TM5=TMtmp;
+            clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+                xtm, ytm, xt, r1, r2, xlen, minlen);
+            return 7;
+        }
+    }
+
+    //********************************************************************//
+    //    Detailed TMscore search engine --> prepare for final TMscore    //
+    //********************************************************************//
+    //run detailed TMscore search engine for the best alignment, and
+    //extract the best rotation matrix (t, u) for the best alginment
+    simplify_step=1;
+    if (fast_opt) simplify_step=40;
+    score_sum_method=8;
+    TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
+        invmap0, t, u, simplify_step, score_sum_method, local_d0_search,
+        false, Lnorm, score_d8, d0);
+
+    //select pairs with dis<d8 for final TMscore computation and output alignment
+    int k=0;
+    int *m1, *m2;
+    double d;
+    m1=new int[xlen]; //alignd index in x
+    m2=new int[ylen]; //alignd index in y
+    do_rotation(xa, xt, xlen, t, u);
+    k=0;
+    for(int j=0; j<ylen; j++)
+    {
+        i=invmap0[j];
+        if(i>=0)//aligned
+        {
+            n_ali++;
+            d=sqrt(dist(&xt[i][0], &ya[j][0]));
+            if (d <= score_d8 || (i_opt == 3))
+            {
+                m1[k]=i;
+                m2[k]=j;
+
+                xtm[k][0]=xa[i][0];
+                xtm[k][1]=xa[i][1];
+                xtm[k][2]=xa[i][2];
+
+                ytm[k][0]=ya[j][0];
+                ytm[k][1]=ya[j][1];
+                ytm[k][2]=ya[j][2];
+
+                r1[k][0] = xt[i][0];
+                r1[k][1] = xt[i][1];
+                r1[k][2] = xt[i][2];
+                r2[k][0] = ya[j][0];
+                r2[k][1] = ya[j][1];
+                r2[k][2] = ya[j][2];
+
+                k++;
+            }
+        }
+    }
+    n_ali8=k;
+
+    Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u);// rmsd0 is used for final output, only recalculate rmsd0, not t & u
+    rmsd0 = sqrt(rmsd0 / n_ali8);
+
+
+    //****************************************//
+    //              Final TMscore             //
+    //    Please set parameters for output    //
+    //****************************************//
+    double rmsd;
+    simplify_step=1;
+    score_sum_method=0;
+    double Lnorm_0=ylen;
+
+
+    //normalized by length of structure A
+    parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+    d0A=d0;
+    d0_0=d0A;
+    local_d0_search = d0_search;
+    TM1 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, simplify_step,
+        score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
+    TM_0 = TM1;
+
+    //normalized by length of structure B
+    parameter_set4final(xlen+0.0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+    d0B=d0;
+    local_d0_search = d0_search;
+    TM2 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t, u, simplify_step,
+        score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
+
+    double Lnorm_d0;
+    if (a_opt>0)
+    {
+        //normalized by average length of structures A, B
+        Lnorm_0=(xlen+ylen)*0.5;
+        parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
+        d0a=d0;
+        d0_0=d0a;
+        local_d0_search = d0_search;
+
+        TM3 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM3;
+    }
+    if (u_opt)
+    {
+        //normalized by user assigned length
+        parameter_set4final(Lnorm_ass, D0_MIN, Lnorm,
+            d0, d0_search, mol_type);
+        d0u=d0;
+        d0_0=d0u;
+        Lnorm_0=Lnorm_ass;
+        local_d0_search = d0_search;
+        TM4 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM4;
+    }
+    if (d_opt)
+    {
+        //scaled by user assigned d0
+        parameter_set4scale(ylen, d0_scale, Lnorm, d0, d0_search);
+        d0_out=d0_scale;
+        d0_0=d0_scale;
+        //Lnorm_0=ylen;
+        Lnorm_d0=Lnorm_0;
+        local_d0_search = d0_search;
+        TM5 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
+            simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
+            score_d8, d0);
+        TM_0=TM5;
+    }
+
+    /* derive alignment from superposition */
+    int ali_len=xlen+ylen; //maximum length of alignment
+    seqxA.assign(ali_len,'-');
+    seqM.assign( ali_len,' ');
+    seqyA.assign(ali_len,'-');
+    
+    //do_rotation(xa, xt, xlen, t, u);
+    do_rotation(xa, xt, xlen, t0, u0);
+
+    int kk=0, i_old=0, j_old=0;
+    d=0;
+    for(int k=0; k<n_ali8; k++)
+    {
+        for(int i=i_old; i<m1[k]; i++)
+        {
+            //align x to gap
+            seqxA[kk]=seqx[i];
+            seqyA[kk]='-';
+            seqM[kk]=' ';                    
+            kk++;
+        }
+
+        for(int j=j_old; j<m2[k]; j++)
+        {
+            //align y to gap
+            seqxA[kk]='-';
+            seqyA[kk]=seqy[j];
+            seqM[kk]=' ';
+            kk++;
+        }
+
+        seqxA[kk]=seqx[m1[k]];
+        seqyA[kk]=seqy[m2[k]];
+        Liden+=(seqxA[kk]==seqyA[kk]);
+        d=sqrt(dist(&xt[m1[k]][0], &ya[m2[k]][0]));
+        if(d<d0_out) seqM[kk]=':';
+        else         seqM[kk]='.';
+        kk++;  
+        i_old=m1[k]+1;
+        j_old=m2[k]+1;
+    }
+
+    //tail
+    for(int i=i_old; i<xlen; i++)
+    {
+        //align x to gap
+        seqxA[kk]=seqx[i];
+        seqyA[kk]='-';
+        seqM[kk]=' ';
+        kk++;
+    }    
+    for(int j=j_old; j<ylen; j++)
+    {
+        //align y to gap
+        seqxA[kk]='-';
+        seqyA[kk]=seqy[j];
+        seqM[kk]=' ';
+        kk++;
+    }
+    seqxA=seqxA.substr(0,kk);
+    seqyA=seqyA.substr(0,kk);
+    seqM =seqM.substr(0,kk);
+
+    /* free memory */
+    clean_up_after_approx_TM(invmap0, invmap, score, path, val,
+        xtm, ytm, xt, r1, r2, xlen, minlen);
+    delete [] m1;
+    delete [] m2;
+    return 0; // zero for no exception
+}
+
+/* entry function for TM-align with circular permutation
+ * i_opt, a_opt, u_opt, d_opt, TMcut are not implemented yet */
+int CPalign_main(double **xa, double **ya,
+    const char *seqx, const char *seqy, const char *secx, const char *secy,
+    double t0[3], double u0[3][3],
+    double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
+    double &d0_0, double &TM_0,
+    double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
+    string &seqM, string &seqxA, string &seqyA,
+    double &rmsd0, int &L_ali, double &Liden,
+    double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
+    const int xlen, const int ylen,
+    const vector<string> sequence, const double Lnorm_ass,
+    const double d0_scale, const int i_opt, const int a_opt,
+    const bool u_opt, const bool d_opt, const bool fast_opt,
+    const int mol_type, const double TMcut=-1)
+{
+    char   *seqx_cp, *seqy_cp; // for the protein sequence 
+    char   *secx_cp, *secy_cp; // for the secondary structure 
+    double **xa_cp, **ya_cp;   // coordinates
+    string seqxA_cp,seqyA_cp;  // alignment
+    int    i,r;
+    int    cp_point=0;    // position of circular permutation
+    int    cp_aln_best=0; // amount of aligned residue in sliding window
+    int    cp_aln_current;// amount of aligned residue in sliding window
+
+    /* duplicate structure */
+    NewArray(&xa_cp, xlen*2, 3);
+    seqx_cp = new char[xlen*2 + 1];
+    secx_cp = new char[xlen*2 + 1];
+    for (r=0;r<xlen;r++)
+    {
+        xa_cp[r+xlen][0]=xa_cp[r][0]=xa[r][0];
+        xa_cp[r+xlen][1]=xa_cp[r][1]=xa[r][1];
+        xa_cp[r+xlen][2]=xa_cp[r][2]=xa[r][2];
+        seqx_cp[r+xlen]=seqx_cp[r]=seqx[r];
+        secx_cp[r+xlen]=secx_cp[r]=secx[r];
+    }
+    seqx_cp[2*xlen]=0;
+    secx_cp[2*xlen]=0;
+    
+    /* fTM-align alignment */
+    double TM1_cp,TM2_cp;
+    TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy,
+        t0, u0, TM1_cp, TM2_cp, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen*2, ylen, sequence, Lnorm_ass, d0_scale,
+        0, false, false, false, true, mol_type, -1);
+
+    /* delete gap in seqxA_cp */
+    r=0;
+    seqxA=seqxA_cp;
+    seqyA=seqyA_cp;
+    for (i=0;i<seqxA_cp.size();i++)
+    {
+        if (seqxA_cp[i]!='-')
+        {
+            seqxA[r]=seqxA_cp[i];
+            seqyA[r]=seqyA_cp[i];
+            r++;
+        }
+    }
+    seqxA=seqxA.substr(0,r);
+    seqyA=seqyA.substr(0,r);
+
+    /* count the number of aligned residues in each window
+     * r - residue index in the original unaligned sequence 
+     * i - position in the alignment */
+    for (r=0;r<xlen-1;r++)
+    {
+        cp_aln_current=0;
+        for (i=r;i<r+xlen;i++) cp_aln_current+=(seqyA[i]!='-');
+
+        if (cp_aln_current>cp_aln_best)
+        {
+            cp_aln_best=cp_aln_current;
+            cp_point=r;
+        }
+    }
+    seqM.clear();
+    seqxA.clear();
+    seqyA.clear();
+    seqxA_cp.clear();
+    seqyA_cp.clear();
+    rmsd0=Liden=n_ali=n_ali8=0;
+
+    /* fTM-align alignment */
+    TMalign_main(xa, ya, seqx, seqy, secx, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+        0, false, false, false, true, mol_type, -1);
+
+    /* do not use cricular permutation of number of aligned residues is not
+     * larger than sequence-order dependent alignment */
+    if (n_ali8>cp_aln_best) cp_point=0;
+
+    /* prepare structure for final alignment */
+    seqM.clear();
+    seqxA.clear();
+    seqyA.clear();
+    rmsd0=Liden=n_ali=n_ali8=0;
+    if (cp_point!=0)
+    {
+        for (r=0;r<xlen;r++)
+        {
+            xa_cp[r][0]=xa_cp[r+cp_point][0];
+            xa_cp[r][1]=xa_cp[r+cp_point][1];
+            xa_cp[r][2]=xa_cp[r+cp_point][2];
+            seqx_cp[r]=seqx_cp[r+cp_point];
+            secx_cp[r]=secx_cp[r+cp_point];
+        }
+    }
+    seqx_cp[xlen]=0;
+    secx_cp[xlen]=0;
+
+    /* full TM-align */
+    TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy,
+        t0, u0, TM1, TM2, TM3, TM4, TM5,
+        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp,
+        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+        i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type, TMcut);
+
+    /* correct alignment
+     * r - residue index in the original unaligned sequence 
+     * i - position in the alignment */
+    if (cp_point>0)
+    {
+        r=0;
+        for (i=0;i<seqxA_cp.size();i++)
+        {
+            r+=(seqxA_cp[i]!='-');
+            if (r>=(xlen-cp_point)) 
+            {
+                i++;
+                break;
+            }
+        }
+        seqxA=seqxA_cp.substr(0,i)+'*'+seqxA_cp.substr(i);
+        seqM =seqM.substr(0,i)    +' '+seqM.substr(i);
+        seqyA=seqyA_cp.substr(0,i)+'-'+seqyA_cp.substr(i);
+    }
+    else
+    {
+        seqxA=seqxA_cp;
+        seqyA=seqyA_cp;
+    }
+
+    /* clean up */
+    delete[]seqx_cp;
+    delete[]secx_cp;
+    DeleteArray(&xa_cp,xlen*2);
+    seqxA_cp.clear();
+    seqyA_cp.clear();
+    return cp_point;
+}
+
+int main(int argc, char *argv[])
+{
+    if (argc < 2) print_help();
+
+
+    clock_t t1, t2;
+    t1 = clock();
+
+    /**********************/
+    /*    get argument    */
+    /**********************/
+    string xname       = "";
+    string yname       = "";
+    string fname_super = ""; // file name for superposed structure
+    string fname_lign  = ""; // file name for user alignment
+    string fname_matrix= ""; // file name for output matrix
+    vector<string> sequence; // get value from alignment file
+    double Lnorm_ass, d0_scale;
+
+    bool h_opt = false; // print full help message
+    bool v_opt = false; // print version
+    bool m_opt = false; // flag for -m, output rotation matrix
+    int  i_opt = 0;     // 1 for -i, 3 for -I
+    bool o_opt = false; // flag for -o, output superposed structure
+    int  a_opt = 0;     // flag for -a, do not normalized by average length
+    bool u_opt = false; // flag for -u, normalized by user specified length
+    bool d_opt = false; // flag for -d, user specified d0
+
+    double TMcut     =-1;
+    int    infmt1_opt=-1;    // PDB or PDBx/mmCIF format for chain_1
+    int    infmt2_opt=-1;    // PDB or PDBx/mmCIF format for chain_2
+    int    ter_opt   =3;     // TER, END, or different chainID
+    int    split_opt =0;     // do not split chain
+    int    outfmt_opt=0;     // set -outfmt to full output
+    bool   fast_opt  =false; // flags for -fast, fTM-align algorithm
+    int    cp_opt    =0;     // do not check circular permutation
+    int    mirror_opt=0;     // do not align mirror
+    int    het_opt=0;        // do not read HETATM residues
+    string atom_opt  ="auto";// use C alpha atom for protein and C3' for RNA
+    string mol_opt   ="auto";// auto-detect the molecule type as protein/RNA
+    string suffix_opt="";    // set -suffix to empty
+    string dir_opt   ="";    // set -dir to empty
+    string dir1_opt  ="";    // set -dir1 to empty
+    string dir2_opt  ="";    // set -dir2 to empty
+    int    byresi_opt=0;     // set -byresi to 0
+    vector<string> chain1_list; // only when -dir1 is set
+    vector<string> chain2_list; // only when -dir2 is set
+
+    for(int i = 1; i < argc; i++)
+    {
+        if ( !strcmp(argv[i],"-o") && i < (argc-1) )
+        {
+            fname_super = argv[i + 1];     o_opt = true; i++;
+        }
+        else if ( (!strcmp(argv[i],"-u") || 
+                   !strcmp(argv[i],"-L")) && i < (argc-1) )
+        {
+            Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++;
+        }
+        else if ( !strcmp(argv[i],"-a") && i < (argc-1) )
+        {
+            if (!strcmp(argv[i + 1], "T"))      a_opt=true;
+            else if (!strcmp(argv[i + 1], "F")) a_opt=false;
+            else 
+            {
+                a_opt=atoi(argv[i + 1]);
+                if (a_opt!=-2 && a_opt!=-1 && a_opt!=1)
+                    PrintErrorAndQuit("-a must be -2, -1, 1, T or F");
+            }
+            i++;
+        }
+        else if ( !strcmp(argv[i],"-d") && i < (argc-1) )
+        {
+            d0_scale = atof(argv[i + 1]); d_opt = true; i++;
+        }
+        else if ( !strcmp(argv[i],"-v") )
+        {
+            v_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-h") )
+        {
+            h_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-i") && i < (argc-1) )
+        {
+            if (i_opt==3)
+                PrintErrorAndQuit("ERROR! -i and -I cannot be used together");
+            fname_lign = argv[i + 1];      i_opt = 1; i++;
+        }
+        else if (!strcmp(argv[i], "-I") && i < (argc-1) )
+        {
+            if (i_opt==1)
+                PrintErrorAndQuit("ERROR! -I and -i cannot be used together");
+            fname_lign = argv[i + 1];      i_opt = 3; i++;
+        }
+        else if (!strcmp(argv[i], "-m") && i < (argc-1) )
+        {
+            fname_matrix = argv[i + 1];    m_opt = true; i++;
+        }// get filename for rotation matrix
+        else if (!strcmp(argv[i], "-fast"))
+        {
+            fast_opt = true;
+        }
+        else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) )
+        {
+            infmt1_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-infmt2") && i < (argc-1) )
+        {
+            infmt2_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-ter") && i < (argc-1) )
+        {
+            ter_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-split") && i < (argc-1) )
+        {
+            split_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-atom") && i < (argc-1) )
+        {
+            atom_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-mol") && i < (argc-1) )
+        {
+            mol_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir") && i < (argc-1) )
+        {
+            dir_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir1") && i < (argc-1) )
+        {
+            dir1_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-dir2") && i < (argc-1) )
+        {
+            dir2_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) )
+        {
+            suffix_opt=argv[i + 1]; i++;
+        }
+        else if ( !strcmp(argv[i],"-outfmt") && i < (argc-1) )
+        {
+            outfmt_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) )
+        {
+            TMcut=atof(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-byresi") && i < (argc-1) )
+        {
+            byresi_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-cp") )
+        {
+            cp_opt=1;
+        }
+        else if ( !strcmp(argv[i],"-mirror") && i < (argc-1) )
+        {
+            mirror_opt=atoi(argv[i + 1]); i++;
+        }
+        else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
+        {
+            het_opt=atoi(argv[i + 1]); i++;
+        }
+        else if (xname.size() == 0) xname=argv[i];
+        else if (yname.size() == 0) yname=argv[i];
+        else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
+    }
+
+    if(xname.size()==0 || (yname.size()==0 && dir_opt.size()==0) || 
+                          (yname.size()    && dir_opt.size()))
+    {
+        if (h_opt) print_help(h_opt);
+        if (v_opt)
+        {
+            print_version();
+            exit(EXIT_FAILURE);
+        }
+        if (xname.size()==0)
+            PrintErrorAndQuit("Please provide input structures");
+        else if (yname.size()==0 && dir_opt.size()==0)
+            PrintErrorAndQuit("Please provide structure B");
+        else if (yname.size() && dir_opt.size())
+            PrintErrorAndQuit("Please provide only one file name if -dir is set");
+    }
+
+    if (suffix_opt.size() && dir_opt.size()+dir1_opt.size()+dir2_opt.size()==0)
+        PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set");
+    if ((dir_opt.size() || dir1_opt.size() || dir2_opt.size()))
+    {
+        if (m_opt || o_opt)
+            PrintErrorAndQuit("-m or -o cannot be set with -dir, -dir1 or -dir2");
+        else if (dir_opt.size() && (dir1_opt.size() || dir2_opt.size()))
+            PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2");
+    }
+    if (atom_opt.size()!=4)
+        PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space.");
+    if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
+        PrintErrorAndQuit("ERROR! molecule type must be either RNA or protein.");
+    else if (mol_opt=="protein" && atom_opt=="auto")
+        atom_opt=" CA ";
+    else if (mol_opt=="RNA" && atom_opt=="auto")
+        atom_opt=" C3'";
+
+    if (u_opt && Lnorm_ass<=0)
+        PrintErrorAndQuit("Wrong value for option -u!  It should be >0");
+    if (d_opt && d0_scale<=0)
+        PrintErrorAndQuit("Wrong value for option -d!  It should be >0");
+    if (outfmt_opt>=2 && (a_opt || u_opt || d_opt))
+        PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d");
+    if (byresi_opt!=0)
+    {
+        if (i_opt)
+            PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I");
+        if (byresi_opt<0 || byresi_opt>3)
+            PrintErrorAndQuit("-byresi can only be 0, 1, 2 or 3");
+        if (byresi_opt>=2 && ter_opt>=2)
+            PrintErrorAndQuit("-byresi >=2 should be used with -ter <=1");
+    }
+    if (split_opt==1 && ter_opt!=0)
+        PrintErrorAndQuit("-split 1 should be used with -ter 0");
+    else if (split_opt==2 && ter_opt!=0 && ter_opt!=1)
+        PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1");
+    if (split_opt<0 || split_opt>2)
+        PrintErrorAndQuit("-split can only be 0, 1 or 2");
+    if (cp_opt!=0 && cp_opt!=1)
+        PrintErrorAndQuit("-cp can only be 0 or 1");
+    if (cp_opt && i_opt)
+        PrintErrorAndQuit("-cp cannot be used with -i or -I");
+
+    /* read initial alignment file from 'align.txt' */
+    if (i_opt) read_user_alignment(sequence, fname_lign, i_opt);
+
+    if (byresi_opt) i_opt=3;
+
+    if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt
+        PrintErrorAndQuit("ERROR! Please provide a file name for option -m!");
+
+    /* parse file list */
+    if (dir1_opt.size()+dir_opt.size()==0) chain1_list.push_back(xname);
+    else file2chainlist(chain1_list, xname, dir_opt+dir1_opt, suffix_opt);
+
+    if (dir_opt.size())
+        for (int i=0;i<chain1_list.size();i++)
+            chain2_list.push_back(chain1_list[i]);
+    else if (dir2_opt.size()==0) chain2_list.push_back(yname);
+    else file2chainlist(chain2_list, yname, dir2_opt, suffix_opt);
+
+    if (outfmt_opt==2)
+        cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t"
+            <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"<<endl;
+
+    /* declare previously global variables */
+    vector<vector<string> >PDB_lines1; // text of chain1
+    vector<vector<string> >PDB_lines2; // text of chain2
+    vector<int> mol_vec1;              // molecule type of chain1, RNA if >0
+    vector<int> mol_vec2;              // molecule type of chain2, RNA if >0
+    vector<string> chainID_list1;      // list of chainID1
+    vector<string> chainID_list2;      // list of chainID2
+    int    i,j;                // file index
+    int    chain_i,chain_j;    // chain index
+    int    r;                  // residue index
+    int    xlen, ylen;         // chain length
+    int    xchainnum,ychainnum;// number of chains in a PDB file
+    char   *seqx, *seqy;       // for the protein sequence 
+    char   *secx, *secy;       // for the secondary structure 
+    double **xa, **ya;         // for input vectors xa[0...xlen-1][0..2] and
+                               // ya[0...ylen-1][0..2], in general,
+                               // ya is regarded as native structure 
+                               // --> superpose xa onto ya
+    vector<string> resi_vec1;  // residue index for chain1
+    vector<string> resi_vec2;  // residue index for chain2
+
+    /* loop over file names */
+    for (i=0;i<chain1_list.size();i++)
+    {
+        /* parse chain 1 */
+        xname=chain1_list[i];
+        xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
+            mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
+        if (!xchainnum)
+        {
+            cerr<<"Warning! Cannot parse file: "<<xname
+                <<". Chain number 0."<<endl;
+            continue;
+        }
+        for (chain_i=0;chain_i<xchainnum;chain_i++)
+        {
+            xlen=PDB_lines1[chain_i].size();
+            mol_vec1[chain_i]=-1;
+            if (!xlen)
+            {
+                cerr<<"Warning! Cannot parse file: "<<xname
+                    <<". Chain length 0."<<endl;
+                continue;
+            }
+            else if (xlen<3)
+            {
+                cerr<<"Sequence is too short <3!: "<<xname<<endl;
+                continue;
+            }
+            NewArray(&xa, xlen, 3);
+            seqx = new char[xlen + 1];
+            secx = new char[xlen + 1];
+            xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, 
+                resi_vec1, byresi_opt?byresi_opt:o_opt);
+            if (mirror_opt) for (r=0;r<xlen;r++) xa[r][2]=-xa[r][2];
+            make_sec(xa, xlen, secx); // secondary structure assignment
+
+            for (j=(dir_opt.size()>0)*(i+1);j<chain2_list.size();j++)
+            {
+                /* parse chain 2 */
+                if (PDB_lines2.size()==0)
+                {
+                    yname=chain2_list[j];
+                    ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2,
+                        mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt,
+                        het_opt);
+                    if (!ychainnum)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain number 0."<<endl;
+                        continue;
+                    }
+                }
+                for (chain_j=0;chain_j<ychainnum;chain_j++)
+                {
+                    ylen=PDB_lines2[chain_j].size();
+                    mol_vec2[chain_j]=-1;
+                    if (!ylen)
+                    {
+                        cerr<<"Warning! Cannot parse file: "<<yname
+                            <<". Chain length 0."<<endl;
+                        continue;
+                    }
+                    else if (ylen<3)
+                    {
+                        cerr<<"Sequence is too short <3!: "<<yname<<endl;
+                        continue;
+                    }
+                    NewArray(&ya, ylen, 3);
+                    seqy = new char[ylen + 1];
+                    secy = new char[ylen + 1];
+                    ylen = read_PDB(PDB_lines2[chain_j], ya, seqy,
+                        resi_vec2, byresi_opt?byresi_opt:o_opt);
+                    make_sec(ya, ylen, secy);
+                    if (byresi_opt) extract_aln_from_resi(sequence,
+                        seqx,seqy,resi_vec1,resi_vec2,byresi_opt);
+
+                    /* declare variable specific to this pair of TMalign */
+                    double t0[3], u0[3][3];
+                    double TM1, TM2;
+                    double TM3, TM4, TM5;     // for a_opt, u_opt, d_opt
+                    double d0_0, TM_0;
+                    double d0A, d0B, d0u, d0a;
+                    double d0_out=5.0;
+                    string seqM, seqxA, seqyA;// for output alignment
+                    double rmsd0 = 0.0;
+                    int L_ali;                // Aligned length in standard_TMscore
+                    double Liden=0;
+                    double TM_ali, rmsd_ali;  // TMscore and rmsd in standard_TMscore
+                    int n_ali=0;
+                    int n_ali8=0;
+
+                    /* entry function for structure alignment */
+                    if (cp_opt) CPalign_main(
+                        xa, ya, seqx, seqy, secx, secy,
+                        t0, u0, TM1, TM2, TM3, TM4, TM5,
+                        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                        seqM, seqxA, seqyA,
+                        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                        i_opt, a_opt, u_opt, d_opt, fast_opt,
+                        mol_vec1[chain_i]+mol_vec2[chain_j],TMcut);
+                    else TMalign_main(
+                        xa, ya, seqx, seqy, secx, secy,
+                        t0, u0, TM1, TM2, TM3, TM4, TM5,
+                        d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
+                        seqM, seqxA, seqyA,
+                        rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
+                        xlen, ylen, sequence, Lnorm_ass, d0_scale,
+                        i_opt, a_opt, u_opt, d_opt, fast_opt,
+                        mol_vec1[chain_i]+mol_vec2[chain_j],TMcut);
+
+                    /* print result */
+                    if (outfmt_opt==0) print_version();
+                    output_results(
+                        xname.substr(dir1_opt.size()),
+                        yname.substr(dir2_opt.size()),
+                        chainID_list1[chain_i].c_str(),
+                        chainID_list2[chain_j].c_str(),
+                        xlen, ylen, t0, u0, TM1, TM2, 
+                        TM3, TM4, TM5, rmsd0, d0_out,
+                        seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
+                        n_ali8, L_ali, TM_ali, rmsd_ali,
+                        TM_0, d0_0, d0A, d0B,
+                        Lnorm_ass, d0_scale, d0a, d0u, 
+                        (m_opt?fname_matrix+chainID_list1[chain_i]:"").c_str(),
+                        outfmt_opt, ter_opt, 
+                        (o_opt?fname_super+chainID_list1[chain_i]:"").c_str(),
+                        i_opt, a_opt, u_opt, d_opt,mirror_opt,
+                        resi_vec1,resi_vec2);
+
+                    /* Done! Free memory */
+                    seqM.clear();
+                    seqxA.clear();
+                    seqyA.clear();
+                    DeleteArray(&ya, ylen);
+                    delete [] seqy;
+                    delete [] secy;
+                    resi_vec2.clear();
+                } // chain_j
+                if (chain2_list.size()>1)
+                {
+                    yname.clear();
+                    for (chain_j=0;chain_j<ychainnum;chain_j++)
+                        PDB_lines2[chain_j].clear();
+                    PDB_lines2.clear();
+                    chainID_list2.clear();
+                    mol_vec2.clear();
+                }
+            } // j
+            PDB_lines1[chain_i].clear();
+            DeleteArray(&xa, xlen);
+            delete [] seqx;
+            delete [] secx;
+            resi_vec1.clear();
+        } // chain_i
+        xname.clear();
+        PDB_lines1.clear();
+        chainID_list1.clear();
+        mol_vec1.clear();
+    } // i
+    if (chain2_list.size()==1)
+    {
+        yname.clear();
+        for (chain_j=0;chain_j<ychainnum;chain_j++)
+            PDB_lines2[chain_j].clear();
+        PDB_lines2.clear();
+        resi_vec2.clear();
+        chainID_list2.clear();
+        mol_vec2.clear();
+    }
+    chain1_list.clear();
+    chain2_list.clear();
+    sequence.clear();
+
+    t2 = clock();
+    float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC;
+    printf("Total CPU time is %5.2f seconds\n", diff);
+    return 0;
+}