Mercurial > repos > guerler > springsuite
comparison spring_package/tmalign/TMalign.cpp @ 17:c790d25086dc draft
"planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
| author | guerler |
|---|---|
| date | Wed, 28 Oct 2020 05:11:56 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 16:16eb2acaaa20 | 17:c790d25086dc |
|---|---|
| 1 /* TM-align: sequence-independent structure alignment of monomer proteins by | |
| 2 * TM-score superposition. Please report issues to yangzhanglab@umich.edu | |
| 3 * | |
| 4 * References to cite: | |
| 5 * Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) | |
| 6 * | |
| 7 * DISCLAIMER: | |
| 8 * Permission to use, copy, modify, and distribute the Software for any | |
| 9 * purpose, with or without fee, is hereby granted, provided that the | |
| 10 * notices on the head, the reference information, and this copyright | |
| 11 * notice appear in all copies or substantial portions of the Software. | |
| 12 * It is provided "as is" without express or implied warranty. | |
| 13 * | |
| 14 * ========================== | |
| 15 * How to install the program | |
| 16 * ========================== | |
| 17 * The following command compiles the program in your Linux computer: | |
| 18 * | |
| 19 * g++ -static -O3 -ffast-math -lm -o TMalign TMalign.cpp | |
| 20 * | |
| 21 * The '-static' flag should be removed on Mac OS, which does not support | |
| 22 * building static executables. | |
| 23 * | |
| 24 * ====================== | |
| 25 * How to use the program | |
| 26 * ====================== | |
| 27 * You can run the program without argument to obtain the document. | |
| 28 * Briefly, you can compare two structures by: | |
| 29 * | |
| 30 * ./TMalign structure1.pdb structure2.pdb | |
| 31 * | |
| 32 * ============== | |
| 33 * Update history | |
| 34 * ============== | |
| 35 * 2012/01/24: A C/C++ code of TM-align was constructed by Jianyi Yang | |
| 36 * 2016/05/21: Several updates of this program were made by Jianji Wu: | |
| 37 * (1) fixed several compiling bugs | |
| 38 * (2) made I/O of C/C++ version consistent with the Fortran version | |
| 39 * (3) added outputs including full-atom and ligand structures | |
| 40 * (4) added options of '-i', '-I' and '-m' | |
| 41 * 2016/05/25: Fixed a bug on PDB file reading | |
| 42 * 2018/06/04: Several updates were made by Chengxin Zhang, including | |
| 43 * (1) Fixed bug in reading PDB files with negative residue index, | |
| 44 * (2) Implemented the fTM-align algorithm (by the '-fast' option) | |
| 45 * as described in R Dong, S Pan, Z Peng, Y Zhang, J Yang | |
| 46 * (2018) Nucleic acids research. gky430. | |
| 47 * (3) Included option to perform TM-align against a whole | |
| 48 * folder of PDB files. A full list of options not available | |
| 49 * in the Fortran version can be explored by TMalign -h | |
| 50 * 2018/07/27: Added the -byresi option for TM-score superposition without | |
| 51 * re-alignment as in TMscore and TMscore -c | |
| 52 * 2018/08/07: Added the -dir option | |
| 53 * 2018/08/14: Added the -split option | |
| 54 * 2018/08/16: Added the -infmt1, -infmt2 options. | |
| 55 * 2019/01/07: Added support for PDBx/mmCIF format. | |
| 56 * 2019/02/09: Fixed asymmetric alignment bug. | |
| 57 * 2019/03/17: Added the -cp option for circular permutation | |
| 58 * 2019/07/23: Supported RasMol output by '-o' option | |
| 59 * 2019/07/24: Fixed bug on PyMOL format output by '-o' option with mmCIF input | |
| 60 * 2019/08/18: Fixed bug on RasMol format output file *_atm. Removed excessive | |
| 61 * circular permutation alignment by -cp | |
| 62 * 2019/08/20: Clarified PyMOL syntax. | |
| 63 * 2019/08/22: Added four additional PyMOL scripts. | |
| 64 */ | |
| 65 #include <math.h> | |
| 66 #include <stdio.h> | |
| 67 #include <stdlib.h> | |
| 68 #include <time.h> | |
| 69 #include <string.h> | |
| 70 #include <malloc/malloc.h> | |
| 71 #include <sstream> | |
| 72 #include <iostream> | |
| 73 #include <iomanip> | |
| 74 #include <fstream> | |
| 75 #include <vector> | |
| 76 #include <iterator> | |
| 77 #include <algorithm> | |
| 78 #include <string> | |
| 79 #include <map> | |
| 80 | |
| 81 using namespace std; | |
| 82 | |
| 83 void print_version() | |
| 84 { | |
| 85 cout << | |
| 86 "\n" | |
| 87 " *********************************************************************\n" | |
| 88 " * TM-align (Version 20190822): protein structure alignment *\n" | |
| 89 " * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) *\n" | |
| 90 " * Please email comments and suggestions to yangzhanglab@umich.edu *\n" | |
| 91 " *********************************************************************" | |
| 92 << endl; | |
| 93 } | |
| 94 | |
| 95 void print_extra_help() | |
| 96 { | |
| 97 cout << | |
| 98 "Additional options:\n" | |
| 99 " -dir Perform all-against-all alignment among the list of PDB\n" | |
| 100 " chains listed by 'chain_list' under 'chain_folder'. Note\n" | |
| 101 " that the slash is necessary.\n" | |
| 102 " $ TMalign -dir chain_folder/ chain_list\n" | |
| 103 "\n" | |
| 104 " -dir1 Use chain2 to search a list of PDB chains listed by 'chain1_list'\n" | |
| 105 " under 'chain1_folder'. Note that the slash is necessary.\n" | |
| 106 " $ TMalign -dir1 chain1_folder/ chain1_list chain2\n" | |
| 107 "\n" | |
| 108 " -dir2 Use chain1 to search a list of PDB chains listed by 'chain2_list'\n" | |
| 109 " under 'chain2_folder'\n" | |
| 110 " $ TMalign chain1 -dir2 chain2_folder/ chain2_list\n" | |
| 111 "\n" | |
| 112 " -suffix (Only when -dir1 and/or -dir2 are set, default is empty)\n" | |
| 113 " add file name suffix to files listed by chain1_list or chain2_list\n" | |
| 114 "\n" | |
| 115 " -atom 4-character atom name used to represent a residue.\n" | |
| 116 " Default is \" CA \" for proteins\n" | |
| 117 " (note the spaces before and after CA).\n" | |
| 118 "\n" | |
| 119 " -ter Strings to mark the end of a chain\n" | |
| 120 " 3: (default) TER, ENDMDL, END or different chain ID\n" | |
| 121 " 2: ENDMDL, END, or different chain ID\n" | |
| 122 " 1: ENDMDL or END\n" | |
| 123 " 0: (default in the first C++ TMalign) end of file\n" | |
| 124 "\n" | |
| 125 " -split Whether to split PDB file into multiple chains\n" | |
| 126 " 0: (default) treat the whole structure as one single chain\n" | |
| 127 " 1: treat each MODEL as a separate chain (-ter should be 0)\n" | |
| 128 " 2: treat each chain as a seperate chain (-ter should be <=1)\n" | |
| 129 "\n" | |
| 130 " -outfmt Output format\n" | |
| 131 " 0: (default) full output\n" | |
| 132 " 1: fasta format compact output\n" | |
| 133 " 2: tabular format very compact output\n" | |
| 134 " -1: full output, but without version or citation information\n" | |
| 135 "\n" | |
| 136 " -byresi Whether to assume residue index correspondence between the\n" | |
| 137 " two structures.\n" | |
| 138 " 0: (default) sequence independent alignment\n" | |
| 139 " 1: (same as TMscore program) sequence-dependent superposition,\n" | |
| 140 " i.e. align by residue index\n" | |
| 141 " 2: (same as TMscore -c, should be used with -ter <=1)\n" | |
| 142 " align by residue index and chain ID\n" | |
| 143 " 3: (similar to TMscore -c, should be used with -ter <=1)\n" | |
| 144 " align by residue index and order of chain\n" | |
| 145 "\n" | |
| 146 " -TMcut -1: (default) do not consider TMcut\n" | |
| 147 " Values in [0.5,1): Do not proceed with TM-align for this\n" | |
| 148 " structure pair if TM-score is unlikely to reach TMcut.\n" | |
| 149 " TMcut is normalized is set by -a option:\n" | |
| 150 " -2: normalized by longer structure length\n" | |
| 151 " -1: normalized by shorter structure length\n" | |
| 152 " 0: (default, same as F) normalized by second structure\n" | |
| 153 " 1: same as T, normalized by average structure length\n" | |
| 154 "\n" | |
| 155 " -mirror Whether to align the mirror image of input structure\n" | |
| 156 " 0: (default) do not align mirrored structure\n" | |
| 157 " 1: align mirror of chain1 to origin chain2\n" | |
| 158 "\n" | |
| 159 " -het Whether to align residues marked as 'HETATM' in addition to 'ATOM '\n" | |
| 160 " 0: (default) only align 'ATOM ' residues\n" | |
| 161 " 1: align both 'ATOM ' and 'HETATM' residues\n" | |
| 162 "\n" | |
| 163 " -infmt1 Input format for chain1\n" | |
| 164 " -infmt2 Input format for chain2\n" | |
| 165 " -1: (default) automatically detect PDB or PDBx/mmCIF format\n" | |
| 166 " 0: PDB format\n" | |
| 167 " 1: SPICKER format\n" | |
| 168 " 2: xyz format\n" | |
| 169 " 3: PDBx/mmCIF format\n" | |
| 170 <<endl; | |
| 171 } | |
| 172 | |
| 173 void print_help(bool h_opt=false) | |
| 174 { | |
| 175 print_version(); | |
| 176 cout << | |
| 177 "\n" | |
| 178 "Usage: TMalign PDB1.pdb PDB2.pdb [Options]\n" | |
| 179 "\n" | |
| 180 "Options:\n" | |
| 181 " -u TM-score normalized by user assigned length (the same as -L)\n" | |
| 182 " warning: it should be >= minimum length of the two structures\n" | |
| 183 " otherwise, TM-score may be >1\n" | |
| 184 "\n" | |
| 185 " -a TM-score normalized by the average length of two structures\n" | |
| 186 " T or F, (default F)\n" | |
| 187 "\n" | |
| 188 " -i Start with an alignment specified in fasta file 'align.txt'\n" | |
| 189 "\n" | |
| 190 " -I Stick to the alignment specified in 'align.txt'\n" | |
| 191 "\n" | |
| 192 " -m Output TM-align rotation matrix\n" | |
| 193 "\n" | |
| 194 " -d TM-score scaled by an assigned d0, e.g. 5 Angstroms\n" | |
| 195 "\n" | |
| 196 " -o Output the superposition to 'TM_sup*'\n" | |
| 197 " $ TMalign PDB1.pdb PDB2.pdb -o TM_sup\n" | |
| 198 " View superposed C-alpha traces of aligned regions by RasMol or PyMOL:\n" | |
| 199 " $ rasmol -script TM_sup\n" | |
| 200 " $ pymol -d @TM_sup.pml\n" | |
| 201 " View superposed C-alpha traces of all regions:\n" | |
| 202 " $ rasmol -script TM_sup_all\n" | |
| 203 " $ pymol -d @TM_sup_all.pml\n" | |
| 204 " View superposed full-atom structures of aligned regions:\n" | |
| 205 " $ rasmol -script TM_sup_atm\n" | |
| 206 " $ pymol -d @TM_sup_atm.pml\n" | |
| 207 " View superposed full-atom structures of all regions:\n" | |
| 208 " $ rasmol -script TM_sup_all_atm\n" | |
| 209 " $ pymol -d @TM_sup_all_atm.pml\n" | |
| 210 " View superposed full-atom structures and ligands of all regions\n" | |
| 211 " $ rasmol -script TM_sup_all_atm_lig\n" | |
| 212 " $ pymol -d @TM_sup_all_atm_lig.pml\n" | |
| 213 "\n" | |
| 214 " -fast Fast but slightly inaccurate alignment by fTM-align algorithm\n" | |
| 215 "\n" | |
| 216 " -cp Alignment with circular permutation\n" | |
| 217 "\n" | |
| 218 " -v Print the version of TM-align\n" | |
| 219 "\n" | |
| 220 " -h Print the full help message, including additional options\n" | |
| 221 "\n" | |
| 222 " (Options -u, -a, -d, -o will not change the final structure alignment)\n\n" | |
| 223 "Example usages:\n" | |
| 224 " TMalign PDB1.pdb PDB2.pdb\n" | |
| 225 " TMalign PDB1.pdb PDB2.pdb -u 100 -d 5.0\n" | |
| 226 " TMalign PDB1.pdb PDB2.pdb -a T -o PDB1.sup\n" | |
| 227 " TMalign PDB1.pdb PDB2.pdb -i align.txt\n" | |
| 228 " TMalign PDB1.pdb PDB2.pdb -m matrix.txt\n" | |
| 229 " TMalign PDB1.pdb PDB2.pdb -fast\n" | |
| 230 " TMalign PDB1.pdb PDB2.pdb -cp\n" | |
| 231 <<endl; | |
| 232 | |
| 233 if (h_opt) print_extra_help(); | |
| 234 | |
| 235 exit(EXIT_SUCCESS); | |
| 236 } | |
| 237 | |
| 238 | |
| 239 /* Functions for the core TMalign algorithm, including the entry function | |
| 240 * TMalign_main */ | |
| 241 | |
| 242 void PrintErrorAndQuit(const string sErrorString) | |
| 243 { | |
| 244 cout << sErrorString << endl; | |
| 245 exit(1); | |
| 246 } | |
| 247 | |
| 248 template <typename T> inline T getmin(const T &a, const T &b) | |
| 249 { | |
| 250 return b<a?b:a; | |
| 251 } | |
| 252 | |
| 253 template <class A> void NewArray(A *** array, int Narray1, int Narray2) | |
| 254 { | |
| 255 *array=new A* [Narray1]; | |
| 256 for(int i=0; i<Narray1; i++) *(*array+i)=new A [Narray2]; | |
| 257 } | |
| 258 | |
| 259 template <class A> void DeleteArray(A *** array, int Narray) | |
| 260 { | |
| 261 for(int i=0; i<Narray; i++) | |
| 262 if(*(*array+i)) delete [] *(*array+i); | |
| 263 if(Narray) delete [] (*array); | |
| 264 (*array)=NULL; | |
| 265 } | |
| 266 | |
| 267 string AAmap(char A) | |
| 268 { | |
| 269 if (A=='A') return "ALA"; | |
| 270 if (A=='B') return "ASX"; | |
| 271 if (A=='C') return "CYS"; | |
| 272 if (A=='D') return "ASP"; | |
| 273 if (A=='E') return "GLU"; | |
| 274 if (A=='F') return "PHE"; | |
| 275 if (A=='G') return "GLY"; | |
| 276 if (A=='H') return "HIS"; | |
| 277 if (A=='I') return "ILE"; | |
| 278 if (A=='K') return "LYS"; | |
| 279 if (A=='L') return "LEU"; | |
| 280 if (A=='M') return "MET"; | |
| 281 if (A=='N') return "ASN"; | |
| 282 if (A=='O') return "PYL"; | |
| 283 if (A=='P') return "PRO"; | |
| 284 if (A=='Q') return "GLN"; | |
| 285 if (A=='R') return "ARG"; | |
| 286 if (A=='S') return "SER"; | |
| 287 if (A=='T') return "THR"; | |
| 288 if (A=='U') return "SEC"; | |
| 289 if (A=='V') return "VAL"; | |
| 290 if (A=='W') return "TRP"; | |
| 291 if (A=='Y') return "TYR"; | |
| 292 if (A=='Z') return "GLX"; | |
| 293 return "UNK"; | |
| 294 } | |
| 295 | |
| 296 char AAmap(const string &AA) | |
| 297 { | |
| 298 if (AA.compare("ALA")==0 || AA.compare("DAL")==0) return 'A'; | |
| 299 if (AA.compare("ASX")==0) return 'B'; | |
| 300 if (AA.compare("CYS")==0 || AA.compare("DCY")==0) return 'C'; | |
| 301 if (AA.compare("ASP")==0 || AA.compare("DAS")==0) return 'D'; | |
| 302 if (AA.compare("GLU")==0 || AA.compare("DGL")==0) return 'E'; | |
| 303 if (AA.compare("PHE")==0 || AA.compare("DPN")==0) return 'F'; | |
| 304 if (AA.compare("GLY")==0) return 'G'; | |
| 305 if (AA.compare("HIS")==0 || AA.compare("DHI")==0) return 'H'; | |
| 306 if (AA.compare("ILE")==0 || AA.compare("DIL")==0) return 'I'; | |
| 307 if (AA.compare("LYS")==0 || AA.compare("DLY")==0) return 'K'; | |
| 308 if (AA.compare("LEU")==0 || AA.compare("DLE")==0) return 'L'; | |
| 309 if (AA.compare("MET")==0 || AA.compare("MED")==0 || | |
| 310 AA.compare("MSE")==0) return 'M'; | |
| 311 if (AA.compare("ASN")==0 || AA.compare("DSG")==0) return 'N'; | |
| 312 if (AA.compare("PYL")==0) return 'O'; | |
| 313 if (AA.compare("PRO")==0 || AA.compare("DPR")==0) return 'P'; | |
| 314 if (AA.compare("GLN")==0 || AA.compare("DGN")==0) return 'Q'; | |
| 315 if (AA.compare("ARG")==0 || AA.compare("DAR")==0) return 'R'; | |
| 316 if (AA.compare("SER")==0 || AA.compare("DSN")==0) return 'S'; | |
| 317 if (AA.compare("THR")==0 || AA.compare("DTH")==0) return 'T'; | |
| 318 if (AA.compare("SEC")==0) return 'U'; | |
| 319 if (AA.compare("VAL")==0 || AA.compare("DVA")==0) return 'V'; | |
| 320 if (AA.compare("TRP")==0 || AA.compare("DTR")==0) return 'W'; | |
| 321 if (AA.compare("TYR")==0 || AA.compare("DTY")==0) return 'Y'; | |
| 322 if (AA.compare("GLX")==0) return 'Z'; | |
| 323 return 'X'; | |
| 324 } | |
| 325 | |
| 326 /* split a long string into vectors by whitespace | |
| 327 * line - input string | |
| 328 * line_vec - output vector | |
| 329 * delimiter - delimiter */ | |
| 330 void split(const string &line, vector<string> &line_vec, | |
| 331 const char delimiter=' ') | |
| 332 { | |
| 333 bool within_word = false; | |
| 334 for (int pos=0;pos<line.size();pos++) | |
| 335 { | |
| 336 if (line[pos]==delimiter) | |
| 337 { | |
| 338 within_word = false; | |
| 339 continue; | |
| 340 } | |
| 341 if (!within_word) | |
| 342 { | |
| 343 within_word = true; | |
| 344 line_vec.push_back(""); | |
| 345 } | |
| 346 line_vec.back()+=line[pos]; | |
| 347 } | |
| 348 } | |
| 349 | |
| 350 /* split a long string into vectors by whitespace, return both whitespaces | |
| 351 * and non-whitespaces | |
| 352 * line - input string | |
| 353 * line_vec - output vector | |
| 354 * space_vec - output vector | |
| 355 * delimiter - delimiter */ | |
| 356 void split_white(const string &line, vector<string> &line_vec, | |
| 357 vector<string>&white_vec, const char delimiter=' ') | |
| 358 { | |
| 359 bool within_word = false; | |
| 360 for (int pos=0;pos<line.size();pos++) | |
| 361 { | |
| 362 if (line[pos]==delimiter) | |
| 363 { | |
| 364 if (within_word==true) | |
| 365 { | |
| 366 white_vec.push_back(""); | |
| 367 within_word = false; | |
| 368 } | |
| 369 white_vec.back()+=delimiter; | |
| 370 } | |
| 371 else | |
| 372 { | |
| 373 if (within_word==false) | |
| 374 { | |
| 375 line_vec.push_back(""); | |
| 376 within_word = true; | |
| 377 } | |
| 378 line_vec.back()+=line[pos]; | |
| 379 } | |
| 380 } | |
| 381 } | |
| 382 | |
| 383 size_t get_PDB_lines(const string filename, | |
| 384 vector<vector<string> >&PDB_lines, vector<string> &chainID_list, | |
| 385 vector<int> &mol_vec, const int ter_opt, const int infmt_opt, | |
| 386 const string atom_opt, const int split_opt, const int het_opt) | |
| 387 { | |
| 388 size_t i=0; // resi i.e. atom index | |
| 389 string line; | |
| 390 char chainID=0; | |
| 391 string resi=""; | |
| 392 bool select_atom=false; | |
| 393 size_t model_idx=0; | |
| 394 vector<string> tmp_str_vec; | |
| 395 | |
| 396 ifstream fin; | |
| 397 fin.open(filename.c_str()); | |
| 398 | |
| 399 if (infmt_opt==0||infmt_opt==-1) // PDB format | |
| 400 { | |
| 401 while (fin.good()) | |
| 402 { | |
| 403 getline(fin, line); | |
| 404 if (infmt_opt==-1 && line.compare(0,5,"loop_")==0) // PDBx/mmCIF | |
| 405 return get_PDB_lines(filename,PDB_lines,chainID_list, | |
| 406 mol_vec, ter_opt, 3, atom_opt, split_opt,het_opt); | |
| 407 if (i > 0) | |
| 408 { | |
| 409 if (ter_opt>=1 && line.compare(0,3,"END")==0) break; | |
| 410 else if (ter_opt>=3 && line.compare(0,3,"TER")==0) break; | |
| 411 } | |
| 412 if (split_opt && line.compare(0,3,"END")==0) chainID=0; | |
| 413 if ((line.compare(0, 6, "ATOM ")==0 || | |
| 414 (line.compare(0, 6, "HETATM")==0 && het_opt)) | |
| 415 && line.size()>=54 && (line[16]==' ' || line[16]=='A')) | |
| 416 { | |
| 417 if (atom_opt=="auto") | |
| 418 select_atom=(line.compare(12,4," CA ")==0); | |
| 419 else select_atom=(line.compare(12,4,atom_opt)==0); | |
| 420 if (select_atom) | |
| 421 { | |
| 422 if (!chainID) | |
| 423 { | |
| 424 chainID=line[21]; | |
| 425 model_idx++; | |
| 426 stringstream i8_stream; | |
| 427 i=0; | |
| 428 if (split_opt==2) // split by chain | |
| 429 { | |
| 430 if (chainID==' ') | |
| 431 { | |
| 432 if (ter_opt>=1) i8_stream << ":_"; | |
| 433 else i8_stream<<':'<<model_idx<<":_"; | |
| 434 } | |
| 435 else | |
| 436 { | |
| 437 if (ter_opt>=1) i8_stream << ':' << chainID; | |
| 438 else i8_stream<<':'<<model_idx<<':'<<chainID; | |
| 439 } | |
| 440 chainID_list.push_back(i8_stream.str()); | |
| 441 } | |
| 442 else if (split_opt==1) // split by model | |
| 443 { | |
| 444 i8_stream << ':' << model_idx; | |
| 445 chainID_list.push_back(i8_stream.str()); | |
| 446 } | |
| 447 PDB_lines.push_back(tmp_str_vec); | |
| 448 mol_vec.push_back(0); | |
| 449 } | |
| 450 else if (ter_opt>=2 && chainID!=line[21]) break; | |
| 451 if (split_opt==2 && chainID!=line[21]) | |
| 452 { | |
| 453 chainID=line[21]; | |
| 454 i=0; | |
| 455 stringstream i8_stream; | |
| 456 if (chainID==' ') | |
| 457 { | |
| 458 if (ter_opt>=1) i8_stream << ":_"; | |
| 459 else i8_stream<<':'<<model_idx<<":_"; | |
| 460 } | |
| 461 else | |
| 462 { | |
| 463 if (ter_opt>=1) i8_stream << ':' << chainID; | |
| 464 else i8_stream<<':'<<model_idx<<':'<<chainID; | |
| 465 } | |
| 466 chainID_list.push_back(i8_stream.str()); | |
| 467 PDB_lines.push_back(tmp_str_vec); | |
| 468 mol_vec.push_back(0); | |
| 469 } | |
| 470 | |
| 471 if (resi==line.substr(22,5)) | |
| 472 cerr<<"Warning! Duplicated residue "<<resi<<endl; | |
| 473 resi=line.substr(22,5); // including insertion code | |
| 474 | |
| 475 PDB_lines.back().push_back(line); | |
| 476 if (line[17]==' ' && (line[18]=='D'||line[18]==' ')) mol_vec.back()++; | |
| 477 else mol_vec.back()--; | |
| 478 i++; | |
| 479 } | |
| 480 } | |
| 481 } | |
| 482 } | |
| 483 else if (infmt_opt==1) // SPICKER format | |
| 484 { | |
| 485 int L=0; | |
| 486 float x,y,z; | |
| 487 stringstream i8_stream; | |
| 488 while (fin.good()) | |
| 489 { | |
| 490 fin >>L>>x>>y>>z; | |
| 491 getline(fin, line); | |
| 492 if (!fin.good()) break; | |
| 493 model_idx++; | |
| 494 stringstream i8_stream; | |
| 495 i8_stream << ':' << model_idx; | |
| 496 chainID_list.push_back(i8_stream.str()); | |
| 497 PDB_lines.push_back(tmp_str_vec); | |
| 498 mol_vec.push_back(0); | |
| 499 for (i=0;i<L;i++) | |
| 500 { | |
| 501 fin >>x>>y>>z; | |
| 502 i8_stream<<"ATOM "<<setw(4)<<i+1<<" CA UNK "<<setw(4) | |
| 503 <<i+1<<" "<<setiosflags(ios::fixed)<<setprecision(3) | |
| 504 <<setw(8)<<x<<setw(8)<<y<<setw(8)<<z; | |
| 505 line=i8_stream.str(); | |
| 506 i8_stream.str(string()); | |
| 507 PDB_lines.back().push_back(line); | |
| 508 } | |
| 509 getline(fin, line); | |
| 510 } | |
| 511 } | |
| 512 else if (infmt_opt==2) // xyz format | |
| 513 { | |
| 514 int L=0; | |
| 515 char A; | |
| 516 stringstream i8_stream; | |
| 517 while (fin.good()) | |
| 518 { | |
| 519 getline(fin, line); | |
| 520 L=atoi(line.c_str()); | |
| 521 getline(fin, line); | |
| 522 for (i=0;i<line.size();i++) | |
| 523 if (line[i]==' '||line[i]=='\t') break; | |
| 524 if (!fin.good()) break; | |
| 525 chainID_list.push_back(':'+line.substr(0,i)); | |
| 526 PDB_lines.push_back(tmp_str_vec); | |
| 527 mol_vec.push_back(0); | |
| 528 for (i=0;i<L;i++) | |
| 529 { | |
| 530 getline(fin, line); | |
| 531 i8_stream<<"ATOM "<<setw(4)<<i+1<<" CA " | |
| 532 <<AAmap(line[0])<<" "<<setw(4)<<i+1<<" " | |
| 533 <<line.substr(2,8)<<line.substr(11,8)<<line.substr(20,8); | |
| 534 line=i8_stream.str(); | |
| 535 i8_stream.str(string()); | |
| 536 PDB_lines.back().push_back(line); | |
| 537 if (line[0]>='a' && line[0]<='z') mol_vec.back()++; // RNA | |
| 538 else mol_vec.back()--; | |
| 539 } | |
| 540 } | |
| 541 } | |
| 542 else if (infmt_opt==3) // PDBx/mmCIF format | |
| 543 { | |
| 544 bool loop_ = false; // not reading following content | |
| 545 map<string,int> _atom_site; | |
| 546 int atom_site_pos; | |
| 547 vector<string> line_vec; | |
| 548 string alt_id="."; // alternative location indicator | |
| 549 string asym_id="."; // this is similar to chainID, except that | |
| 550 // chainID is char while asym_id is a string | |
| 551 // with possibly multiple char | |
| 552 string prev_asym_id=""; | |
| 553 string AA=""; // residue name | |
| 554 string atom=""; | |
| 555 string prev_resi=""; | |
| 556 string model_index=""; // the same as model_idx but type is string | |
| 557 stringstream i8_stream; | |
| 558 while (fin.good()) | |
| 559 { | |
| 560 getline(fin, line); | |
| 561 if (line.size()==0) continue; | |
| 562 if (loop_) loop_ = line.compare(0,2,"# "); | |
| 563 if (!loop_) | |
| 564 { | |
| 565 if (line.compare(0,5,"loop_")) continue; | |
| 566 while(1) | |
| 567 { | |
| 568 if (fin.good()) getline(fin, line); | |
| 569 else PrintErrorAndQuit("ERROR! Unexpected end of "+filename); | |
| 570 if (line.size()) break; | |
| 571 } | |
| 572 if (line.compare(0,11,"_atom_site.")) continue; | |
| 573 | |
| 574 loop_=true; | |
| 575 _atom_site.clear(); | |
| 576 atom_site_pos=0; | |
| 577 _atom_site[line.substr(11,line.size()-12)]=atom_site_pos; | |
| 578 | |
| 579 while(1) | |
| 580 { | |
| 581 if (fin.good()) getline(fin, line); | |
| 582 else PrintErrorAndQuit("ERROR! Unexpected end of "+filename); | |
| 583 if (line.size()==0) continue; | |
| 584 if (line.compare(0,11,"_atom_site.")) break; | |
| 585 _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos; | |
| 586 } | |
| 587 | |
| 588 | |
| 589 if (_atom_site.count("group_PDB")* | |
| 590 _atom_site.count("label_atom_id")* | |
| 591 _atom_site.count("label_comp_id")* | |
| 592 (_atom_site.count("auth_asym_id")+ | |
| 593 _atom_site.count("label_asym_id"))* | |
| 594 (_atom_site.count("auth_seq_id")+ | |
| 595 _atom_site.count("label_seq_id"))* | |
| 596 _atom_site.count("Cartn_x")* | |
| 597 _atom_site.count("Cartn_y")* | |
| 598 _atom_site.count("Cartn_z")==0) | |
| 599 { | |
| 600 loop_ = false; | |
| 601 cerr<<"Warning! Missing one of the following _atom_site data items: group_PDB, label_atom_id, label_atom_id, auth_asym_id/label_asym_id, auth_seq_id/label_seq_id, Cartn_x, Cartn_y, Cartn_z"<<endl; | |
| 602 continue; | |
| 603 } | |
| 604 } | |
| 605 | |
| 606 line_vec.clear(); | |
| 607 split(line,line_vec); | |
| 608 if (line_vec[_atom_site["group_PDB"]]!="ATOM" && (het_opt==0 || | |
| 609 line_vec[_atom_site["group_PDB"]]!="HETATM")) continue; | |
| 610 | |
| 611 alt_id="."; | |
| 612 if (_atom_site.count("label_alt_id")) // in 39.4 % of entries | |
| 613 alt_id=line_vec[_atom_site["label_alt_id"]]; | |
| 614 if (alt_id!="." && alt_id!="A") continue; | |
| 615 | |
| 616 atom=line_vec[_atom_site["label_atom_id"]]; | |
| 617 if (atom[0]=='"') atom=atom.substr(1); | |
| 618 if (atom.size() && atom[atom.size()-1]=='"') | |
| 619 atom=atom.substr(0,atom.size()-1); | |
| 620 if (atom.size()==0) continue; | |
| 621 if (atom.size()==1) atom=" "+atom+" "; | |
| 622 else if (atom.size()==2) atom=" "+atom+" "; // wrong for sidechain H | |
| 623 else if (atom.size()==3) atom=" "+atom; | |
| 624 else if (atom.size()>=5) continue; | |
| 625 | |
| 626 AA=line_vec[_atom_site["label_comp_id"]]; // residue name | |
| 627 if (AA.size()==1) AA=" "+AA; | |
| 628 else if (AA.size()==2) AA=" " +AA; | |
| 629 else if (AA.size()>=4) continue; | |
| 630 | |
| 631 if (atom_opt=="auto") | |
| 632 select_atom=(atom==" CA "); | |
| 633 else select_atom=(atom==atom_opt); | |
| 634 | |
| 635 if (!select_atom) continue; | |
| 636 | |
| 637 if (_atom_site.count("auth_asym_id")) | |
| 638 asym_id=line_vec[_atom_site["auth_asym_id"]]; | |
| 639 else asym_id=line_vec[_atom_site["label_asym_id"]]; | |
| 640 if (asym_id==".") asym_id=" "; | |
| 641 | |
| 642 if (_atom_site.count("pdbx_PDB_model_num") && | |
| 643 model_index!=line_vec[_atom_site["pdbx_PDB_model_num"]]) | |
| 644 { | |
| 645 model_index=line_vec[_atom_site["pdbx_PDB_model_num"]]; | |
| 646 if (PDB_lines.size() && ter_opt>=1) break; | |
| 647 if (PDB_lines.size()==0 || split_opt>=1) | |
| 648 { | |
| 649 PDB_lines.push_back(tmp_str_vec); | |
| 650 mol_vec.push_back(0); | |
| 651 prev_asym_id=asym_id; | |
| 652 | |
| 653 if (split_opt==1 && ter_opt==0) chainID_list.push_back( | |
| 654 ':'+model_index); | |
| 655 else if (split_opt==2 && ter_opt==0) | |
| 656 chainID_list.push_back(':'+model_index+':'+asym_id); | |
| 657 else if (split_opt==2 && ter_opt==1) | |
| 658 chainID_list.push_back(':'+asym_id); | |
| 659 } | |
| 660 } | |
| 661 | |
| 662 if (prev_asym_id!=asym_id) | |
| 663 { | |
| 664 if (prev_asym_id!="" && ter_opt>=2) break; | |
| 665 if (split_opt>=2) | |
| 666 { | |
| 667 PDB_lines.push_back(tmp_str_vec); | |
| 668 mol_vec.push_back(0); | |
| 669 | |
| 670 if (split_opt==1 && ter_opt==0) chainID_list.push_back( | |
| 671 ':'+model_index); | |
| 672 else if (split_opt==2 && ter_opt==0) | |
| 673 chainID_list.push_back(':'+model_index+':'+asym_id); | |
| 674 else if (split_opt==2 && ter_opt==1) | |
| 675 chainID_list.push_back(':'+asym_id); | |
| 676 } | |
| 677 } | |
| 678 if (prev_asym_id!=asym_id) prev_asym_id=asym_id; | |
| 679 | |
| 680 if (AA[0]==' ' && (AA[1]=='D'||AA[1]==' ')) mol_vec.back()++; | |
| 681 else mol_vec.back()--; | |
| 682 | |
| 683 if (_atom_site.count("auth_seq_id")) | |
| 684 resi=line_vec[_atom_site["auth_seq_id"]]; | |
| 685 else resi=line_vec[_atom_site["label_seq_id"]]; | |
| 686 if (_atom_site.count("pdbx_PDB_ins_code") && | |
| 687 line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") | |
| 688 resi+=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; | |
| 689 else resi+=" "; | |
| 690 | |
| 691 if (prev_resi==resi) | |
| 692 cerr<<"Warning! Duplicated residue "<<resi<<endl; | |
| 693 prev_resi=resi; | |
| 694 | |
| 695 i++; | |
| 696 i8_stream<<"ATOM " | |
| 697 <<setw(5)<<i<<" "<<atom<<" "<<AA<<" "<<asym_id[0] | |
| 698 <<setw(5)<<resi.substr(0,5)<<" " | |
| 699 <<setw(8)<<line_vec[_atom_site["Cartn_x"]] | |
| 700 <<setw(8)<<line_vec[_atom_site["Cartn_y"]] | |
| 701 <<setw(8)<<line_vec[_atom_site["Cartn_z"]]; | |
| 702 PDB_lines.back().push_back(i8_stream.str()); | |
| 703 i8_stream.str(string()); | |
| 704 } | |
| 705 _atom_site.clear(); | |
| 706 line_vec.clear(); | |
| 707 alt_id.clear(); | |
| 708 asym_id.clear(); | |
| 709 AA.clear(); | |
| 710 } | |
| 711 | |
| 712 fin.close(); | |
| 713 line.clear(); | |
| 714 if (!split_opt) chainID_list.push_back(""); | |
| 715 return PDB_lines.size(); | |
| 716 } | |
| 717 | |
| 718 /* read fasta file from filename. sequence is stored into FASTA_lines | |
| 719 * while sequence name is stored into chainID_list. | |
| 720 * if ter_opt >=1, only read the first sequence. | |
| 721 * if ter_opt ==0, read all sequences. | |
| 722 * if split_opt >=1 and ter_opt ==0, each sequence is a separate entry. | |
| 723 * if split_opt ==0 and ter_opt ==0, all sequences are combined into one */ | |
| 724 size_t get_FASTA_lines(const string filename, | |
| 725 vector<vector<string> >&FASTA_lines, vector<string> &chainID_list, | |
| 726 vector<int> &mol_vec, const int ter_opt=3, const int split_opt=0) | |
| 727 { | |
| 728 string line; | |
| 729 vector<string> tmp_str_vec; | |
| 730 int l; | |
| 731 | |
| 732 ifstream fin; | |
| 733 fin.open(filename.c_str()); | |
| 734 | |
| 735 while (fin.good()) | |
| 736 { | |
| 737 getline(fin, line); | |
| 738 if (line.size()==0 || line[0]=='#') continue; | |
| 739 | |
| 740 if (line[0]=='>') | |
| 741 { | |
| 742 if (FASTA_lines.size()) | |
| 743 { | |
| 744 if (ter_opt) break; | |
| 745 if (split_opt==0) continue; | |
| 746 } | |
| 747 FASTA_lines.push_back(tmp_str_vec); | |
| 748 FASTA_lines.back().push_back(""); | |
| 749 mol_vec.push_back(0); | |
| 750 if (ter_opt==0 && split_opt) | |
| 751 { | |
| 752 line[0]=':'; | |
| 753 chainID_list.push_back(line); | |
| 754 } | |
| 755 else chainID_list.push_back(""); | |
| 756 } | |
| 757 else | |
| 758 { | |
| 759 FASTA_lines.back()[0]+=line; | |
| 760 for (l=0;l<line.size();l++) mol_vec.back()+= | |
| 761 ('a'<=line[l] && line[l]<='z')-('A'<=line[l] && line[l]<='Z'); | |
| 762 } | |
| 763 } | |
| 764 | |
| 765 line.clear(); | |
| 766 fin.close(); | |
| 767 return FASTA_lines.size(); | |
| 768 } | |
| 769 | |
| 770 | |
| 771 /* extract pairwise sequence alignment from residue index vectors, | |
| 772 * assuming that "sequence" contains two empty strings. | |
| 773 * return length of alignment, including gap. */ | |
| 774 int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy, | |
| 775 const vector<string> resi_vec1, const vector<string> resi_vec2, | |
| 776 const int byresi_opt) | |
| 777 { | |
| 778 sequence.clear(); | |
| 779 sequence.push_back(""); | |
| 780 sequence.push_back(""); | |
| 781 | |
| 782 int i1=0; // positions in resi_vec1 | |
| 783 int i2=0; // positions in resi_vec2 | |
| 784 int xlen=resi_vec1.size(); | |
| 785 int ylen=resi_vec2.size(); | |
| 786 map<char,int> chainID_map1; | |
| 787 map<char,int> chainID_map2; | |
| 788 if (byresi_opt==3) | |
| 789 { | |
| 790 vector<char> chainID_vec; | |
| 791 char chainID; | |
| 792 int i; | |
| 793 for (i=0;i<xlen;i++) | |
| 794 { | |
| 795 chainID=resi_vec1[i][5]; | |
| 796 if (!chainID_vec.size()|| chainID_vec.back()!=chainID) | |
| 797 { | |
| 798 chainID_vec.push_back(chainID); | |
| 799 chainID_map1[chainID]=chainID_vec.size(); | |
| 800 } | |
| 801 } | |
| 802 chainID_vec.clear(); | |
| 803 for (i=0;i<ylen;i++) | |
| 804 { | |
| 805 chainID=resi_vec2[i][5]; | |
| 806 if (!chainID_vec.size()|| chainID_vec.back()!=chainID) | |
| 807 { | |
| 808 chainID_vec.push_back(chainID); | |
| 809 chainID_map2[chainID]=chainID_vec.size(); | |
| 810 } | |
| 811 } | |
| 812 chainID_vec.clear(); | |
| 813 } | |
| 814 while(i1<xlen && i2<ylen) | |
| 815 { | |
| 816 if ((byresi_opt<=2 && resi_vec1[i1]==resi_vec2[i2]) || (byresi_opt==3 | |
| 817 && resi_vec1[i1].substr(0,5)==resi_vec2[i2].substr(0,5) | |
| 818 && chainID_map1[resi_vec1[i1][5]]==chainID_map2[resi_vec2[i2][5]])) | |
| 819 { | |
| 820 sequence[0]+=seqx[i1++]; | |
| 821 sequence[1]+=seqy[i2++]; | |
| 822 } | |
| 823 else if (atoi(resi_vec1[i1].substr(0,4).c_str())<= | |
| 824 atoi(resi_vec2[i2].substr(0,4).c_str())) | |
| 825 { | |
| 826 sequence[0]+=seqx[i1++]; | |
| 827 sequence[1]+='-'; | |
| 828 } | |
| 829 else | |
| 830 { | |
| 831 sequence[0]+='-'; | |
| 832 sequence[1]+=seqy[i2++]; | |
| 833 } | |
| 834 } | |
| 835 chainID_map1.clear(); | |
| 836 chainID_map2.clear(); | |
| 837 return sequence[0].size(); | |
| 838 } | |
| 839 | |
| 840 int read_PDB(const vector<string> &PDB_lines, double **a, char *seq, | |
| 841 vector<string> &resi_vec, const int byresi_opt) | |
| 842 { | |
| 843 int i; | |
| 844 for (i=0;i<PDB_lines.size();i++) | |
| 845 { | |
| 846 a[i][0] = atof(PDB_lines[i].substr(30, 8).c_str()); | |
| 847 a[i][1] = atof(PDB_lines[i].substr(38, 8).c_str()); | |
| 848 a[i][2] = atof(PDB_lines[i].substr(46, 8).c_str()); | |
| 849 seq[i] = AAmap(PDB_lines[i].substr(17, 3)); | |
| 850 | |
| 851 if (byresi_opt>=2) resi_vec.push_back(PDB_lines[i].substr(22,5)+ | |
| 852 PDB_lines[i][21]); | |
| 853 if (byresi_opt==1) resi_vec.push_back(PDB_lines[i].substr(22,5)); | |
| 854 } | |
| 855 seq[i]='\0'; | |
| 856 return i; | |
| 857 } | |
| 858 | |
| 859 double dist(double x[3], double y[3]) | |
| 860 { | |
| 861 double d1=x[0]-y[0]; | |
| 862 double d2=x[1]-y[1]; | |
| 863 double d3=x[2]-y[2]; | |
| 864 | |
| 865 return (d1*d1 + d2*d2 + d3*d3); | |
| 866 } | |
| 867 | |
| 868 double dot(double *a, double *b) | |
| 869 { | |
| 870 return (a[0] * b[0] + a[1] * b[1] + a[2] * b[2]); | |
| 871 } | |
| 872 | |
| 873 void transform(double t[3], double u[3][3], double *x, double *x1) | |
| 874 { | |
| 875 x1[0]=t[0]+dot(&u[0][0], x); | |
| 876 x1[1]=t[1]+dot(&u[1][0], x); | |
| 877 x1[2]=t[2]+dot(&u[2][0], x); | |
| 878 } | |
| 879 | |
| 880 void do_rotation(double **x, double **x1, int len, double t[3], double u[3][3]) | |
| 881 { | |
| 882 for(int i=0; i<len; i++) | |
| 883 { | |
| 884 transform(t, u, &x[i][0], &x1[i][0]); | |
| 885 } | |
| 886 } | |
| 887 | |
| 888 /* strip white space at the begining or end of string */ | |
| 889 string Trim(const string &inputString) | |
| 890 { | |
| 891 string result = inputString; | |
| 892 int idxBegin = inputString.find_first_not_of(" \n\r\t"); | |
| 893 int idxEnd = inputString.find_last_not_of(" \n\r\t"); | |
| 894 if (idxBegin >= 0 && idxEnd >= 0) | |
| 895 result = inputString.substr(idxBegin, idxEnd + 1 - idxBegin); | |
| 896 return result; | |
| 897 } | |
| 898 | |
| 899 /* read user specified pairwise alignment from 'fname_lign' to 'sequence'. | |
| 900 * This function should only be called by main function, as it will | |
| 901 * terminate a program if wrong alignment is given */ | |
| 902 void read_user_alignment(vector<string>&sequence, const string &fname_lign, | |
| 903 const int i_opt) | |
| 904 { | |
| 905 if (fname_lign == "") | |
| 906 PrintErrorAndQuit("Please provide a file name for option -i!"); | |
| 907 // open alignment file | |
| 908 int n_p = 0;// number of structures in alignment file | |
| 909 string line; | |
| 910 | |
| 911 ifstream fileIn(fname_lign.c_str()); | |
| 912 if (fileIn.is_open()) | |
| 913 { | |
| 914 while (fileIn.good()) | |
| 915 { | |
| 916 getline(fileIn, line); | |
| 917 if (line.compare(0, 1, ">") == 0)// Flag for a new structure | |
| 918 { | |
| 919 if (n_p >= 2) break; | |
| 920 sequence.push_back(""); | |
| 921 n_p++; | |
| 922 } | |
| 923 else if (n_p > 0 && line!="") sequence.back()+=line; | |
| 924 } | |
| 925 fileIn.close(); | |
| 926 } | |
| 927 else PrintErrorAndQuit("ERROR! Alignment file does not exist."); | |
| 928 | |
| 929 if (n_p < 2) | |
| 930 PrintErrorAndQuit("ERROR: Fasta format is wrong, two proteins should be included."); | |
| 931 if (sequence[0].size() != sequence[1].size()) | |
| 932 PrintErrorAndQuit("ERROR! FASTA file is wrong. The length in alignment should be equal for the two aligned proteins."); | |
| 933 if (i_opt==3) | |
| 934 { | |
| 935 int aligned_resNum=0; | |
| 936 for (int i=0;i<sequence[0].size();i++) | |
| 937 aligned_resNum+=(sequence[0][i]!='-' && sequence[1][i]!='-'); | |
| 938 if (aligned_resNum<3) | |
| 939 PrintErrorAndQuit("ERROR! Superposition is undefined for <3 aligned residues."); | |
| 940 } | |
| 941 line.clear(); | |
| 942 return; | |
| 943 } | |
| 944 | |
| 945 /* read list of entries from 'name' to 'chain_list'. | |
| 946 * dir_opt is the folder name (prefix). | |
| 947 * suffix_opt is the file name extension (suffix_opt). | |
| 948 * This function should only be called by main function, as it will | |
| 949 * terminate a program if wrong alignment is given */ | |
| 950 void file2chainlist(vector<string>&chain_list, const string &name, | |
| 951 const string &dir_opt, const string &suffix_opt) | |
| 952 { | |
| 953 ifstream fp(name.c_str()); | |
| 954 if (! fp.is_open()) | |
| 955 PrintErrorAndQuit(("Can not open file: "+name+'\n').c_str()); | |
| 956 string line; | |
| 957 while (fp.good()) | |
| 958 { | |
| 959 getline(fp, line); | |
| 960 if (! line.size()) continue; | |
| 961 chain_list.push_back(dir_opt+Trim(line)+suffix_opt); | |
| 962 } | |
| 963 fp.close(); | |
| 964 line.clear(); | |
| 965 } | |
| 966 | |
| 967 /************************************************************************** | |
| 968 Implemetation of Kabsch algoritm for finding the best rotation matrix | |
| 969 --------------------------------------------------------------------------- | |
| 970 x - x(i,m) are coordinates of atom m in set x (input) | |
| 971 y - y(i,m) are coordinates of atom m in set y (input) | |
| 972 n - n is number of atom pairs (input) | |
| 973 mode - 0:calculate rms only (input) | |
| 974 1:calculate u,t only (takes medium) | |
| 975 2:calculate rms,u,t (takes longer) | |
| 976 rms - sum of w*(ux+t-y)**2 over all atom pairs (output) | |
| 977 u - u(i,j) is rotation matrix for best superposition (output) | |
| 978 t - t(i) is translation vector for best superposition (output) | |
| 979 **************************************************************************/ | |
| 980 bool Kabsch(double **x, double **y, int n, int mode, double *rms, | |
| 981 double t[3], double u[3][3]) | |
| 982 { | |
| 983 int i, j, m, m1, l, k; | |
| 984 double e0, rms1, d, h, g; | |
| 985 double cth, sth, sqrth, p, det, sigma; | |
| 986 double xc[3], yc[3]; | |
| 987 double a[3][3], b[3][3], r[3][3], e[3], rr[6], ss[6]; | |
| 988 double sqrt3 = 1.73205080756888, tol = 0.01; | |
| 989 int ip[] = { 0, 1, 3, 1, 2, 4, 3, 4, 5 }; | |
| 990 int ip2312[] = { 1, 2, 0, 1 }; | |
| 991 | |
| 992 int a_failed = 0, b_failed = 0; | |
| 993 double epsilon = 0.00000001; | |
| 994 | |
| 995 //initializtation | |
| 996 *rms = 0; | |
| 997 rms1 = 0; | |
| 998 e0 = 0; | |
| 999 double c1[3], c2[3]; | |
| 1000 double s1[3], s2[3]; | |
| 1001 double sx[3], sy[3], sz[3]; | |
| 1002 for (i = 0; i < 3; i++) | |
| 1003 { | |
| 1004 s1[i] = 0.0; | |
| 1005 s2[i] = 0.0; | |
| 1006 | |
| 1007 sx[i] = 0.0; | |
| 1008 sy[i] = 0.0; | |
| 1009 sz[i] = 0.0; | |
| 1010 } | |
| 1011 | |
| 1012 for (i = 0; i<3; i++) | |
| 1013 { | |
| 1014 xc[i] = 0.0; | |
| 1015 yc[i] = 0.0; | |
| 1016 t[i] = 0.0; | |
| 1017 for (j = 0; j<3; j++) | |
| 1018 { | |
| 1019 u[i][j] = 0.0; | |
| 1020 r[i][j] = 0.0; | |
| 1021 a[i][j] = 0.0; | |
| 1022 if (i == j) | |
| 1023 { | |
| 1024 u[i][j] = 1.0; | |
| 1025 a[i][j] = 1.0; | |
| 1026 } | |
| 1027 } | |
| 1028 } | |
| 1029 | |
| 1030 if (n<1) return false; | |
| 1031 | |
| 1032 //compute centers for vector sets x, y | |
| 1033 for (i = 0; i<n; i++) | |
| 1034 { | |
| 1035 for (j = 0; j < 3; j++) | |
| 1036 { | |
| 1037 c1[j] = x[i][j]; | |
| 1038 c2[j] = y[i][j]; | |
| 1039 | |
| 1040 s1[j] += c1[j]; | |
| 1041 s2[j] += c2[j]; | |
| 1042 } | |
| 1043 | |
| 1044 for (j = 0; j < 3; j++) | |
| 1045 { | |
| 1046 sx[j] += c1[0] * c2[j]; | |
| 1047 sy[j] += c1[1] * c2[j]; | |
| 1048 sz[j] += c1[2] * c2[j]; | |
| 1049 } | |
| 1050 } | |
| 1051 for (i = 0; i < 3; i++) | |
| 1052 { | |
| 1053 xc[i] = s1[i] / n; | |
| 1054 yc[i] = s2[i] / n; | |
| 1055 } | |
| 1056 if (mode == 2 || mode == 0) | |
| 1057 for (int mm = 0; mm < n; mm++) | |
| 1058 for (int nn = 0; nn < 3; nn++) | |
| 1059 e0 += (x[mm][nn] - xc[nn]) * (x[mm][nn] - xc[nn]) + | |
| 1060 (y[mm][nn] - yc[nn]) * (y[mm][nn] - yc[nn]); | |
| 1061 for (j = 0; j < 3; j++) | |
| 1062 { | |
| 1063 r[j][0] = sx[j] - s1[0] * s2[j] / n; | |
| 1064 r[j][1] = sy[j] - s1[1] * s2[j] / n; | |
| 1065 r[j][2] = sz[j] - s1[2] * s2[j] / n; | |
| 1066 } | |
| 1067 | |
| 1068 //compute determinat of matrix r | |
| 1069 det = r[0][0] * (r[1][1] * r[2][2] - r[1][2] * r[2][1])\ | |
| 1070 - r[0][1] * (r[1][0] * r[2][2] - r[1][2] * r[2][0])\ | |
| 1071 + r[0][2] * (r[1][0] * r[2][1] - r[1][1] * r[2][0]); | |
| 1072 sigma = det; | |
| 1073 | |
| 1074 //compute tras(r)*r | |
| 1075 m = 0; | |
| 1076 for (j = 0; j<3; j++) | |
| 1077 { | |
| 1078 for (i = 0; i <= j; i++) | |
| 1079 { | |
| 1080 rr[m] = r[0][i] * r[0][j] + r[1][i] * r[1][j] + r[2][i] * r[2][j]; | |
| 1081 m++; | |
| 1082 } | |
| 1083 } | |
| 1084 | |
| 1085 double spur = (rr[0] + rr[2] + rr[5]) / 3.0; | |
| 1086 double cof = (((((rr[2] * rr[5] - rr[4] * rr[4]) + rr[0] * rr[5])\ | |
| 1087 - rr[3] * rr[3]) + rr[0] * rr[2]) - rr[1] * rr[1]) / 3.0; | |
| 1088 det = det*det; | |
| 1089 | |
| 1090 for (i = 0; i<3; i++) e[i] = spur; | |
| 1091 | |
| 1092 if (spur>0) | |
| 1093 { | |
| 1094 d = spur*spur; | |
| 1095 h = d - cof; | |
| 1096 g = (spur*cof - det) / 2.0 - spur*h; | |
| 1097 | |
| 1098 if (h>0) | |
| 1099 { | |
| 1100 sqrth = sqrt(h); | |
| 1101 d = h*h*h - g*g; | |
| 1102 if (d<0.0) d = 0.0; | |
| 1103 d = atan2(sqrt(d), -g) / 3.0; | |
| 1104 cth = sqrth * cos(d); | |
| 1105 sth = sqrth*sqrt3*sin(d); | |
| 1106 e[0] = (spur + cth) + cth; | |
| 1107 e[1] = (spur - cth) + sth; | |
| 1108 e[2] = (spur - cth) - sth; | |
| 1109 | |
| 1110 if (mode != 0) | |
| 1111 {//compute a | |
| 1112 for (l = 0; l<3; l = l + 2) | |
| 1113 { | |
| 1114 d = e[l]; | |
| 1115 ss[0] = (d - rr[2]) * (d - rr[5]) - rr[4] * rr[4]; | |
| 1116 ss[1] = (d - rr[5]) * rr[1] + rr[3] * rr[4]; | |
| 1117 ss[2] = (d - rr[0]) * (d - rr[5]) - rr[3] * rr[3]; | |
| 1118 ss[3] = (d - rr[2]) * rr[3] + rr[1] * rr[4]; | |
| 1119 ss[4] = (d - rr[0]) * rr[4] + rr[1] * rr[3]; | |
| 1120 ss[5] = (d - rr[0]) * (d - rr[2]) - rr[1] * rr[1]; | |
| 1121 | |
| 1122 if (fabs(ss[0]) <= epsilon) ss[0] = 0.0; | |
| 1123 if (fabs(ss[1]) <= epsilon) ss[1] = 0.0; | |
| 1124 if (fabs(ss[2]) <= epsilon) ss[2] = 0.0; | |
| 1125 if (fabs(ss[3]) <= epsilon) ss[3] = 0.0; | |
| 1126 if (fabs(ss[4]) <= epsilon) ss[4] = 0.0; | |
| 1127 if (fabs(ss[5]) <= epsilon) ss[5] = 0.0; | |
| 1128 | |
| 1129 if (fabs(ss[0]) >= fabs(ss[2])) | |
| 1130 { | |
| 1131 j = 0; | |
| 1132 if (fabs(ss[0]) < fabs(ss[5])) j = 2; | |
| 1133 } | |
| 1134 else if (fabs(ss[2]) >= fabs(ss[5])) j = 1; | |
| 1135 else j = 2; | |
| 1136 | |
| 1137 d = 0.0; | |
| 1138 j = 3 * j; | |
| 1139 for (i = 0; i<3; i++) | |
| 1140 { | |
| 1141 k = ip[i + j]; | |
| 1142 a[i][l] = ss[k]; | |
| 1143 d = d + ss[k] * ss[k]; | |
| 1144 } | |
| 1145 | |
| 1146 | |
| 1147 //if( d > 0.0 ) d = 1.0 / sqrt(d); | |
| 1148 if (d > epsilon) d = 1.0 / sqrt(d); | |
| 1149 else d = 0.0; | |
| 1150 for (i = 0; i<3; i++) a[i][l] = a[i][l] * d; | |
| 1151 }//for l | |
| 1152 | |
| 1153 d = a[0][0] * a[0][2] + a[1][0] * a[1][2] + a[2][0] * a[2][2]; | |
| 1154 if ((e[0] - e[1]) >(e[1] - e[2])) | |
| 1155 { | |
| 1156 m1 = 2; | |
| 1157 m = 0; | |
| 1158 } | |
| 1159 else | |
| 1160 { | |
| 1161 m1 = 0; | |
| 1162 m = 2; | |
| 1163 } | |
| 1164 p = 0; | |
| 1165 for (i = 0; i<3; i++) | |
| 1166 { | |
| 1167 a[i][m1] = a[i][m1] - d*a[i][m]; | |
| 1168 p = p + a[i][m1] * a[i][m1]; | |
| 1169 } | |
| 1170 if (p <= tol) | |
| 1171 { | |
| 1172 p = 1.0; | |
| 1173 for (i = 0; i<3; i++) | |
| 1174 { | |
| 1175 if (p < fabs(a[i][m])) continue; | |
| 1176 p = fabs(a[i][m]); | |
| 1177 j = i; | |
| 1178 } | |
| 1179 k = ip2312[j]; | |
| 1180 l = ip2312[j + 1]; | |
| 1181 p = sqrt(a[k][m] * a[k][m] + a[l][m] * a[l][m]); | |
| 1182 if (p > tol) | |
| 1183 { | |
| 1184 a[j][m1] = 0.0; | |
| 1185 a[k][m1] = -a[l][m] / p; | |
| 1186 a[l][m1] = a[k][m] / p; | |
| 1187 } | |
| 1188 else a_failed = 1; | |
| 1189 }//if p<=tol | |
| 1190 else | |
| 1191 { | |
| 1192 p = 1.0 / sqrt(p); | |
| 1193 for (i = 0; i<3; i++) a[i][m1] = a[i][m1] * p; | |
| 1194 }//else p<=tol | |
| 1195 if (a_failed != 1) | |
| 1196 { | |
| 1197 a[0][1] = a[1][2] * a[2][0] - a[1][0] * a[2][2]; | |
| 1198 a[1][1] = a[2][2] * a[0][0] - a[2][0] * a[0][2]; | |
| 1199 a[2][1] = a[0][2] * a[1][0] - a[0][0] * a[1][2]; | |
| 1200 } | |
| 1201 }//if(mode!=0) | |
| 1202 }//h>0 | |
| 1203 | |
| 1204 //compute b anyway | |
| 1205 if (mode != 0 && a_failed != 1)//a is computed correctly | |
| 1206 { | |
| 1207 //compute b | |
| 1208 for (l = 0; l<2; l++) | |
| 1209 { | |
| 1210 d = 0.0; | |
| 1211 for (i = 0; i<3; i++) | |
| 1212 { | |
| 1213 b[i][l] = r[i][0] * a[0][l] + | |
| 1214 r[i][1] * a[1][l] + r[i][2] * a[2][l]; | |
| 1215 d = d + b[i][l] * b[i][l]; | |
| 1216 } | |
| 1217 //if( d > 0 ) d = 1.0 / sqrt(d); | |
| 1218 if (d > epsilon) d = 1.0 / sqrt(d); | |
| 1219 else d = 0.0; | |
| 1220 for (i = 0; i<3; i++) b[i][l] = b[i][l] * d; | |
| 1221 } | |
| 1222 d = b[0][0] * b[0][1] + b[1][0] * b[1][1] + b[2][0] * b[2][1]; | |
| 1223 p = 0.0; | |
| 1224 | |
| 1225 for (i = 0; i<3; i++) | |
| 1226 { | |
| 1227 b[i][1] = b[i][1] - d*b[i][0]; | |
| 1228 p += b[i][1] * b[i][1]; | |
| 1229 } | |
| 1230 | |
| 1231 if (p <= tol) | |
| 1232 { | |
| 1233 p = 1.0; | |
| 1234 for (i = 0; i<3; i++) | |
| 1235 { | |
| 1236 if (p<fabs(b[i][0])) continue; | |
| 1237 p = fabs(b[i][0]); | |
| 1238 j = i; | |
| 1239 } | |
| 1240 k = ip2312[j]; | |
| 1241 l = ip2312[j + 1]; | |
| 1242 p = sqrt(b[k][0] * b[k][0] + b[l][0] * b[l][0]); | |
| 1243 if (p > tol) | |
| 1244 { | |
| 1245 b[j][1] = 0.0; | |
| 1246 b[k][1] = -b[l][0] / p; | |
| 1247 b[l][1] = b[k][0] / p; | |
| 1248 } | |
| 1249 else b_failed = 1; | |
| 1250 }//if( p <= tol ) | |
| 1251 else | |
| 1252 { | |
| 1253 p = 1.0 / sqrt(p); | |
| 1254 for (i = 0; i<3; i++) b[i][1] = b[i][1] * p; | |
| 1255 } | |
| 1256 if (b_failed != 1) | |
| 1257 { | |
| 1258 b[0][2] = b[1][0] * b[2][1] - b[1][1] * b[2][0]; | |
| 1259 b[1][2] = b[2][0] * b[0][1] - b[2][1] * b[0][0]; | |
| 1260 b[2][2] = b[0][0] * b[1][1] - b[0][1] * b[1][0]; | |
| 1261 //compute u | |
| 1262 for (i = 0; i<3; i++) | |
| 1263 for (j = 0; j<3; j++) | |
| 1264 u[i][j] = b[i][0] * a[j][0] + | |
| 1265 b[i][1] * a[j][1] + b[i][2] * a[j][2]; | |
| 1266 } | |
| 1267 | |
| 1268 //compute t | |
| 1269 for (i = 0; i<3; i++) | |
| 1270 t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) - | |
| 1271 u[i][2] * xc[2]; | |
| 1272 }//if(mode!=0 && a_failed!=1) | |
| 1273 }//spur>0 | |
| 1274 else //just compute t and errors | |
| 1275 { | |
| 1276 //compute t | |
| 1277 for (i = 0; i<3; i++) | |
| 1278 t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) - | |
| 1279 u[i][2] * xc[2]; | |
| 1280 }//else spur>0 | |
| 1281 | |
| 1282 //compute rms | |
| 1283 for (i = 0; i<3; i++) | |
| 1284 { | |
| 1285 if (e[i] < 0) e[i] = 0; | |
| 1286 e[i] = sqrt(e[i]); | |
| 1287 } | |
| 1288 d = e[2]; | |
| 1289 if (sigma < 0.0) d = -d; | |
| 1290 d = (d + e[1]) + e[0]; | |
| 1291 | |
| 1292 if (mode == 2 || mode == 0) | |
| 1293 { | |
| 1294 rms1 = (e0 - d) - d; | |
| 1295 if (rms1 < 0.0) rms1 = 0.0; | |
| 1296 } | |
| 1297 | |
| 1298 *rms = rms1; | |
| 1299 return true; | |
| 1300 } | |
| 1301 | |
| 1302 /* Partial implementation of Needleman-Wunsch (NW) dymanamic programming for | |
| 1303 * global alignment. The three NWDP_TM functions below are not complete | |
| 1304 * implementation of NW algorithm because gap jumping in the standard Gotoh | |
| 1305 * algorithm is not considered. Since the gap opening and gap extension is | |
| 1306 * the same, this is not a problem. This code was exploited in TM-align | |
| 1307 * because it is about 1.5 times faster than a complete NW implementation. | |
| 1308 * Nevertheless, if gap openning != gap extension shall be implemented in | |
| 1309 * the future, the Gotoh algorithm must be implemented. In rare scenarios, | |
| 1310 * it is also possible to have asymmetric alignment (i.e. | |
| 1311 * TMalign A.pdb B.pdb and TMalign B.pdb A.pdb have different TM_A and TM_B | |
| 1312 * values) caused by the NWPD_TM implement. | |
| 1313 */ | |
| 1314 | |
| 1315 /* Input: score[1:len1, 1:len2], and gap_open | |
| 1316 * Output: j2i[1:len2] \in {1:len1} U {-1} | |
| 1317 * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */ | |
| 1318 void NWDP_TM(double **score, bool **path, double **val, | |
| 1319 int len1, int len2, double gap_open, int j2i[]) | |
| 1320 { | |
| 1321 | |
| 1322 int i, j; | |
| 1323 double h, v, d; | |
| 1324 | |
| 1325 //initialization | |
| 1326 for(i=0; i<=len1; i++) | |
| 1327 { | |
| 1328 val[i][0]=0; | |
| 1329 //val[i][0]=i*gap_open; | |
| 1330 path[i][0]=false; //not from diagonal | |
| 1331 } | |
| 1332 | |
| 1333 for(j=0; j<=len2; j++) | |
| 1334 { | |
| 1335 val[0][j]=0; | |
| 1336 //val[0][j]=j*gap_open; | |
| 1337 path[0][j]=false; //not from diagonal | |
| 1338 j2i[j]=-1; //all are not aligned, only use j2i[1:len2] | |
| 1339 } | |
| 1340 | |
| 1341 | |
| 1342 //decide matrix and path | |
| 1343 for(i=1; i<=len1; i++) | |
| 1344 { | |
| 1345 for(j=1; j<=len2; j++) | |
| 1346 { | |
| 1347 d=val[i-1][j-1]+score[i][j]; //diagonal | |
| 1348 | |
| 1349 //symbol insertion in horizontal (= a gap in vertical) | |
| 1350 h=val[i-1][j]; | |
| 1351 if(path[i-1][j]) h += gap_open; //aligned in last position | |
| 1352 | |
| 1353 //symbol insertion in vertical | |
| 1354 v=val[i][j-1]; | |
| 1355 if(path[i][j-1]) v += gap_open; //aligned in last position | |
| 1356 | |
| 1357 | |
| 1358 if(d>=h && d>=v) | |
| 1359 { | |
| 1360 path[i][j]=true; //from diagonal | |
| 1361 val[i][j]=d; | |
| 1362 } | |
| 1363 else | |
| 1364 { | |
| 1365 path[i][j]=false; //from horizontal | |
| 1366 if(v>=h) val[i][j]=v; | |
| 1367 else val[i][j]=h; | |
| 1368 } | |
| 1369 } //for i | |
| 1370 } //for j | |
| 1371 | |
| 1372 //trace back to extract the alignment | |
| 1373 i=len1; | |
| 1374 j=len2; | |
| 1375 while(i>0 && j>0) | |
| 1376 { | |
| 1377 if(path[i][j]) //from diagonal | |
| 1378 { | |
| 1379 j2i[j-1]=i-1; | |
| 1380 i--; | |
| 1381 j--; | |
| 1382 } | |
| 1383 else | |
| 1384 { | |
| 1385 h=val[i-1][j]; | |
| 1386 if(path[i-1][j]) h +=gap_open; | |
| 1387 | |
| 1388 v=val[i][j-1]; | |
| 1389 if(path[i][j-1]) v +=gap_open; | |
| 1390 | |
| 1391 if(v>=h) j--; | |
| 1392 else i--; | |
| 1393 } | |
| 1394 } | |
| 1395 } | |
| 1396 | |
| 1397 /* Input: vectors x, y, rotation matrix t, u, scale factor d02, and gap_open | |
| 1398 * Output: j2i[1:len2] \in {1:len1} U {-1} | |
| 1399 * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */ | |
| 1400 void NWDP_TM(bool **path, double **val, double **x, double **y, | |
| 1401 int len1, int len2, double t[3], double u[3][3], | |
| 1402 double d02, double gap_open, int j2i[]) | |
| 1403 { | |
| 1404 int i, j; | |
| 1405 double h, v, d; | |
| 1406 | |
| 1407 //initialization. use old val[i][0] and val[0][j] initialization | |
| 1408 //to minimize difference from TMalign fortran version | |
| 1409 for(i=0; i<=len1; i++) | |
| 1410 { | |
| 1411 val[i][0]=0; | |
| 1412 //val[i][0]=i*gap_open; | |
| 1413 path[i][0]=false; //not from diagonal | |
| 1414 } | |
| 1415 | |
| 1416 for(j=0; j<=len2; j++) | |
| 1417 { | |
| 1418 val[0][j]=0; | |
| 1419 //val[0][j]=j*gap_open; | |
| 1420 path[0][j]=false; //not from diagonal | |
| 1421 j2i[j]=-1; //all are not aligned, only use j2i[1:len2] | |
| 1422 } | |
| 1423 double xx[3], dij; | |
| 1424 | |
| 1425 | |
| 1426 //decide matrix and path | |
| 1427 for(i=1; i<=len1; i++) | |
| 1428 { | |
| 1429 transform(t, u, &x[i-1][0], xx); | |
| 1430 for(j=1; j<=len2; j++) | |
| 1431 { | |
| 1432 dij=dist(xx, &y[j-1][0]); | |
| 1433 d=val[i-1][j-1] + 1.0/(1+dij/d02); | |
| 1434 | |
| 1435 //symbol insertion in horizontal (= a gap in vertical) | |
| 1436 h=val[i-1][j]; | |
| 1437 if(path[i-1][j]) h += gap_open; //aligned in last position | |
| 1438 | |
| 1439 //symbol insertion in vertical | |
| 1440 v=val[i][j-1]; | |
| 1441 if(path[i][j-1]) v += gap_open; //aligned in last position | |
| 1442 | |
| 1443 | |
| 1444 if(d>=h && d>=v) | |
| 1445 { | |
| 1446 path[i][j]=true; //from diagonal | |
| 1447 val[i][j]=d; | |
| 1448 } | |
| 1449 else | |
| 1450 { | |
| 1451 path[i][j]=false; //from horizontal | |
| 1452 if(v>=h) val[i][j]=v; | |
| 1453 else val[i][j]=h; | |
| 1454 } | |
| 1455 } //for i | |
| 1456 } //for j | |
| 1457 | |
| 1458 //trace back to extract the alignment | |
| 1459 i=len1; | |
| 1460 j=len2; | |
| 1461 while(i>0 && j>0) | |
| 1462 { | |
| 1463 if(path[i][j]) //from diagonal | |
| 1464 { | |
| 1465 j2i[j-1]=i-1; | |
| 1466 i--; | |
| 1467 j--; | |
| 1468 } | |
| 1469 else | |
| 1470 { | |
| 1471 h=val[i-1][j]; | |
| 1472 if(path[i-1][j]) h +=gap_open; | |
| 1473 | |
| 1474 v=val[i][j-1]; | |
| 1475 if(path[i][j-1]) v +=gap_open; | |
| 1476 | |
| 1477 if(v>=h) j--; | |
| 1478 else i--; | |
| 1479 } | |
| 1480 } | |
| 1481 } | |
| 1482 | |
| 1483 /* This is the same as the previous NWDP_TM, except for the lack of rotation | |
| 1484 * Input: vectors x, y, scale factor d02, and gap_open | |
| 1485 * Output: j2i[1:len2] \in {1:len1} U {-1} | |
| 1486 * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */ | |
| 1487 void NWDP_SE(bool **path, double **val, double **x, double **y, | |
| 1488 int len1, int len2, double d02, double gap_open, int j2i[]) | |
| 1489 { | |
| 1490 int i, j; | |
| 1491 double h, v, d; | |
| 1492 | |
| 1493 for(i=0; i<=len1; i++) | |
| 1494 { | |
| 1495 val[i][0]=0; | |
| 1496 path[i][0]=false; //not from diagonal | |
| 1497 } | |
| 1498 | |
| 1499 for(j=0; j<=len2; j++) | |
| 1500 { | |
| 1501 val[0][j]=0; | |
| 1502 path[0][j]=false; //not from diagonal | |
| 1503 j2i[j]=-1; //all are not aligned, only use j2i[1:len2] | |
| 1504 } | |
| 1505 double dij; | |
| 1506 | |
| 1507 //decide matrix and path | |
| 1508 for(i=1; i<=len1; i++) | |
| 1509 { | |
| 1510 for(j=1; j<=len2; j++) | |
| 1511 { | |
| 1512 dij=dist(&x[i-1][0], &y[j-1][0]); | |
| 1513 d=val[i-1][j-1] + 1.0/(1+dij/d02); | |
| 1514 | |
| 1515 //symbol insertion in horizontal (= a gap in vertical) | |
| 1516 h=val[i-1][j]; | |
| 1517 if(path[i-1][j]) h += gap_open; //aligned in last position | |
| 1518 | |
| 1519 //symbol insertion in vertical | |
| 1520 v=val[i][j-1]; | |
| 1521 if(path[i][j-1]) v += gap_open; //aligned in last position | |
| 1522 | |
| 1523 | |
| 1524 if(d>=h && d>=v) | |
| 1525 { | |
| 1526 path[i][j]=true; //from diagonal | |
| 1527 val[i][j]=d; | |
| 1528 } | |
| 1529 else | |
| 1530 { | |
| 1531 path[i][j]=false; //from horizontal | |
| 1532 if(v>=h) val[i][j]=v; | |
| 1533 else val[i][j]=h; | |
| 1534 } | |
| 1535 } //for i | |
| 1536 } //for j | |
| 1537 | |
| 1538 //trace back to extract the alignment | |
| 1539 i=len1; | |
| 1540 j=len2; | |
| 1541 while(i>0 && j>0) | |
| 1542 { | |
| 1543 if(path[i][j]) //from diagonal | |
| 1544 { | |
| 1545 j2i[j-1]=i-1; | |
| 1546 i--; | |
| 1547 j--; | |
| 1548 } | |
| 1549 else | |
| 1550 { | |
| 1551 h=val[i-1][j]; | |
| 1552 if(path[i-1][j]) h +=gap_open; | |
| 1553 | |
| 1554 v=val[i][j-1]; | |
| 1555 if(path[i][j-1]) v +=gap_open; | |
| 1556 | |
| 1557 if(v>=h) j--; | |
| 1558 else i--; | |
| 1559 } | |
| 1560 } | |
| 1561 } | |
| 1562 | |
| 1563 /* +ss | |
| 1564 * Input: secondary structure secx, secy, and gap_open | |
| 1565 * Output: j2i[1:len2] \in {1:len1} U {-1} | |
| 1566 * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */ | |
| 1567 void NWDP_TM(bool **path, double **val, const char *secx, const char *secy, | |
| 1568 const int len1, const int len2, const double gap_open, int j2i[]) | |
| 1569 { | |
| 1570 | |
| 1571 int i, j; | |
| 1572 double h, v, d; | |
| 1573 | |
| 1574 //initialization | |
| 1575 for(i=0; i<=len1; i++) | |
| 1576 { | |
| 1577 val[i][0]=0; | |
| 1578 //val[i][0]=i*gap_open; | |
| 1579 path[i][0]=false; //not from diagonal | |
| 1580 } | |
| 1581 | |
| 1582 for(j=0; j<=len2; j++) | |
| 1583 { | |
| 1584 val[0][j]=0; | |
| 1585 //val[0][j]=j*gap_open; | |
| 1586 path[0][j]=false; //not from diagonal | |
| 1587 j2i[j]=-1; //all are not aligned, only use j2i[1:len2] | |
| 1588 } | |
| 1589 | |
| 1590 //decide matrix and path | |
| 1591 for(i=1; i<=len1; i++) | |
| 1592 { | |
| 1593 for(j=1; j<=len2; j++) | |
| 1594 { | |
| 1595 d=val[i-1][j-1] + 1.0*(secx[i-1]==secy[j-1]); | |
| 1596 | |
| 1597 //symbol insertion in horizontal (= a gap in vertical) | |
| 1598 h=val[i-1][j]; | |
| 1599 if(path[i-1][j]) h += gap_open; //aligned in last position | |
| 1600 | |
| 1601 //symbol insertion in vertical | |
| 1602 v=val[i][j-1]; | |
| 1603 if(path[i][j-1]) v += gap_open; //aligned in last position | |
| 1604 | |
| 1605 if(d>=h && d>=v) | |
| 1606 { | |
| 1607 path[i][j]=true; //from diagonal | |
| 1608 val[i][j]=d; | |
| 1609 } | |
| 1610 else | |
| 1611 { | |
| 1612 path[i][j]=false; //from horizontal | |
| 1613 if(v>=h) val[i][j]=v; | |
| 1614 else val[i][j]=h; | |
| 1615 } | |
| 1616 } //for i | |
| 1617 } //for j | |
| 1618 | |
| 1619 //trace back to extract the alignment | |
| 1620 i=len1; | |
| 1621 j=len2; | |
| 1622 while(i>0 && j>0) | |
| 1623 { | |
| 1624 if(path[i][j]) //from diagonal | |
| 1625 { | |
| 1626 j2i[j-1]=i-1; | |
| 1627 i--; | |
| 1628 j--; | |
| 1629 } | |
| 1630 else | |
| 1631 { | |
| 1632 h=val[i-1][j]; | |
| 1633 if(path[i-1][j]) h +=gap_open; | |
| 1634 | |
| 1635 v=val[i][j-1]; | |
| 1636 if(path[i][j-1]) v +=gap_open; | |
| 1637 | |
| 1638 if(v>=h) j--; | |
| 1639 else i--; | |
| 1640 } | |
| 1641 } | |
| 1642 } | |
| 1643 | |
| 1644 void parameter_set4search(const int xlen, const int ylen, | |
| 1645 double &D0_MIN, double &Lnorm, | |
| 1646 double &score_d8, double &d0, double &d0_search, double &dcu0) | |
| 1647 { | |
| 1648 //parameter initilization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8 | |
| 1649 D0_MIN=0.5; | |
| 1650 dcu0=4.25; //update 3.85-->4.25 | |
| 1651 | |
| 1652 Lnorm=getmin(xlen, ylen); //normaliz TMscore by this in searching | |
| 1653 if (Lnorm<=19) //update 15-->19 | |
| 1654 d0=0.168; //update 0.5-->0.168 | |
| 1655 else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8); | |
| 1656 D0_MIN=d0+0.8; //this should be moved to above | |
| 1657 d0=D0_MIN; //update: best for search | |
| 1658 | |
| 1659 d0_search=d0; | |
| 1660 if (d0_search>8) d0_search=8; | |
| 1661 if (d0_search<4.5) d0_search=4.5; | |
| 1662 | |
| 1663 score_d8=1.5*pow(Lnorm*1.0, 0.3)+3.5; //remove pairs with dis>d8 during search & final | |
| 1664 } | |
| 1665 | |
| 1666 void parameter_set4final_C3prime(const double len, double &D0_MIN, | |
| 1667 double &Lnorm, double &d0, double &d0_search) | |
| 1668 { | |
| 1669 D0_MIN=0.3; | |
| 1670 | |
| 1671 Lnorm=len; //normaliz TMscore by this in searching | |
| 1672 if(Lnorm<=11) d0=0.3; | |
| 1673 else if(Lnorm>11&&Lnorm<=15) d0=0.4; | |
| 1674 else if(Lnorm>15&&Lnorm<=19) d0=0.5; | |
| 1675 else if(Lnorm>19&&Lnorm<=23) d0=0.6; | |
| 1676 else if(Lnorm>23&&Lnorm<30) d0=0.7; | |
| 1677 else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5); | |
| 1678 | |
| 1679 d0_search=d0; | |
| 1680 if (d0_search>8) d0_search=8; | |
| 1681 if (d0_search<4.5) d0_search=4.5; | |
| 1682 } | |
| 1683 | |
| 1684 void parameter_set4final(const double len, double &D0_MIN, double &Lnorm, | |
| 1685 double &d0, double &d0_search, const int mol_type) | |
| 1686 { | |
| 1687 if (mol_type>0) // RNA | |
| 1688 { | |
| 1689 parameter_set4final_C3prime(len, D0_MIN, Lnorm, | |
| 1690 d0, d0_search); | |
| 1691 return; | |
| 1692 } | |
| 1693 D0_MIN=0.5; | |
| 1694 | |
| 1695 Lnorm=len; //normaliz TMscore by this in searching | |
| 1696 if (Lnorm<=21) d0=0.5; | |
| 1697 else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8); | |
| 1698 if (d0<D0_MIN) d0=D0_MIN; | |
| 1699 d0_search=d0; | |
| 1700 if (d0_search>8) d0_search=8; | |
| 1701 if (d0_search<4.5) d0_search=4.5; | |
| 1702 } | |
| 1703 | |
| 1704 void parameter_set4scale(const int len, const double d_s, double &Lnorm, | |
| 1705 double &d0, double &d0_search) | |
| 1706 { | |
| 1707 d0=d_s; | |
| 1708 Lnorm=len; //normaliz TMscore by this in searching | |
| 1709 d0_search=d0; | |
| 1710 if (d0_search>8) d0_search=8; | |
| 1711 if (d0_search<4.5) d0_search=4.5; | |
| 1712 } | |
| 1713 | |
| 1714 // 1, collect those residues with dis<d; | |
| 1715 // 2, calculate TMscore | |
| 1716 int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[], | |
| 1717 double *score1, int score_sum_method, const double Lnorm, | |
| 1718 const double score_d8, const double d0) | |
| 1719 { | |
| 1720 double score_sum=0, di; | |
| 1721 double d_tmp=d*d; | |
| 1722 double d02=d0*d0; | |
| 1723 double score_d8_cut = score_d8*score_d8; | |
| 1724 | |
| 1725 int i, n_cut, inc=0; | |
| 1726 | |
| 1727 while(1) | |
| 1728 { | |
| 1729 n_cut=0; | |
| 1730 score_sum=0; | |
| 1731 for(i=0; i<n_ali; i++) | |
| 1732 { | |
| 1733 di = dist(xa[i], ya[i]); | |
| 1734 if(di<d_tmp) | |
| 1735 { | |
| 1736 i_ali[n_cut]=i; | |
| 1737 n_cut++; | |
| 1738 } | |
| 1739 if(score_sum_method==8) | |
| 1740 { | |
| 1741 if(di<=score_d8_cut) score_sum += 1/(1+di/d02); | |
| 1742 } | |
| 1743 else score_sum += 1/(1+di/d02); | |
| 1744 } | |
| 1745 //there are not enough feasible pairs, reliefe the threshold | |
| 1746 if(n_cut<3 && n_ali>3) | |
| 1747 { | |
| 1748 inc++; | |
| 1749 double dinc=(d+inc*0.5); | |
| 1750 d_tmp = dinc * dinc; | |
| 1751 } | |
| 1752 else break; | |
| 1753 } | |
| 1754 | |
| 1755 *score1=score_sum/Lnorm; | |
| 1756 return n_cut; | |
| 1757 } | |
| 1758 | |
| 1759 int score_fun8_standard(double **xa, double **ya, int n_ali, double d, | |
| 1760 int i_ali[], double *score1, int score_sum_method, | |
| 1761 double score_d8, double d0) | |
| 1762 { | |
| 1763 double score_sum = 0, di; | |
| 1764 double d_tmp = d*d; | |
| 1765 double d02 = d0*d0; | |
| 1766 double score_d8_cut = score_d8*score_d8; | |
| 1767 | |
| 1768 int i, n_cut, inc = 0; | |
| 1769 while (1) | |
| 1770 { | |
| 1771 n_cut = 0; | |
| 1772 score_sum = 0; | |
| 1773 for (i = 0; i<n_ali; i++) | |
| 1774 { | |
| 1775 di = dist(xa[i], ya[i]); | |
| 1776 if (di<d_tmp) | |
| 1777 { | |
| 1778 i_ali[n_cut] = i; | |
| 1779 n_cut++; | |
| 1780 } | |
| 1781 if (score_sum_method == 8) | |
| 1782 { | |
| 1783 if (di <= score_d8_cut) score_sum += 1 / (1 + di / d02); | |
| 1784 } | |
| 1785 else | |
| 1786 { | |
| 1787 score_sum += 1 / (1 + di / d02); | |
| 1788 } | |
| 1789 } | |
| 1790 //there are not enough feasible pairs, reliefe the threshold | |
| 1791 if (n_cut<3 && n_ali>3) | |
| 1792 { | |
| 1793 inc++; | |
| 1794 double dinc = (d + inc*0.5); | |
| 1795 d_tmp = dinc * dinc; | |
| 1796 } | |
| 1797 else break; | |
| 1798 } | |
| 1799 | |
| 1800 *score1 = score_sum / n_ali; | |
| 1801 return n_cut; | |
| 1802 } | |
| 1803 | |
| 1804 double TMscore8_search(double **r1, double **r2, double **xtm, double **ytm, | |
| 1805 double **xt, int Lali, double t0[3], double u0[3][3], int simplify_step, | |
| 1806 int score_sum_method, double *Rcomm, double local_d0_search, double Lnorm, | |
| 1807 double score_d8, double d0) | |
| 1808 { | |
| 1809 int i, m; | |
| 1810 double score_max, score, rmsd; | |
| 1811 const int kmax=Lali; | |
| 1812 int k_ali[kmax], ka, k; | |
| 1813 double t[3]; | |
| 1814 double u[3][3]; | |
| 1815 double d; | |
| 1816 | |
| 1817 | |
| 1818 //iterative parameters | |
| 1819 int n_it=20; //maximum number of iterations | |
| 1820 int n_init_max=6; //maximum number of different fragment length | |
| 1821 int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4 | |
| 1822 int L_ini_min=4; | |
| 1823 if(Lali<L_ini_min) L_ini_min=Lali; | |
| 1824 | |
| 1825 int n_init=0, i_init; | |
| 1826 for(i=0; i<n_init_max-1; i++) | |
| 1827 { | |
| 1828 n_init++; | |
| 1829 L_ini[i]=(int) (Lali/pow(2.0, (double) i)); | |
| 1830 if(L_ini[i]<=L_ini_min) | |
| 1831 { | |
| 1832 L_ini[i]=L_ini_min; | |
| 1833 break; | |
| 1834 } | |
| 1835 } | |
| 1836 if(i==n_init_max-1) | |
| 1837 { | |
| 1838 n_init++; | |
| 1839 L_ini[i]=L_ini_min; | |
| 1840 } | |
| 1841 | |
| 1842 score_max=-1; | |
| 1843 //find the maximum score starting from local structures superposition | |
| 1844 int i_ali[kmax], n_cut; | |
| 1845 int L_frag; //fragment length | |
| 1846 int iL_max; //maximum starting postion for the fragment | |
| 1847 | |
| 1848 for(i_init=0; i_init<n_init; i_init++) | |
| 1849 { | |
| 1850 L_frag=L_ini[i_init]; | |
| 1851 iL_max=Lali-L_frag; | |
| 1852 | |
| 1853 i=0; | |
| 1854 while(1) | |
| 1855 { | |
| 1856 //extract the fragment starting from position i | |
| 1857 ka=0; | |
| 1858 for(k=0; k<L_frag; k++) | |
| 1859 { | |
| 1860 int kk=k+i; | |
| 1861 r1[k][0]=xtm[kk][0]; | |
| 1862 r1[k][1]=xtm[kk][1]; | |
| 1863 r1[k][2]=xtm[kk][2]; | |
| 1864 | |
| 1865 r2[k][0]=ytm[kk][0]; | |
| 1866 r2[k][1]=ytm[kk][1]; | |
| 1867 r2[k][2]=ytm[kk][2]; | |
| 1868 | |
| 1869 k_ali[ka]=kk; | |
| 1870 ka++; | |
| 1871 } | |
| 1872 | |
| 1873 //extract rotation matrix based on the fragment | |
| 1874 Kabsch(r1, r2, L_frag, 1, &rmsd, t, u); | |
| 1875 if (simplify_step != 1) | |
| 1876 *Rcomm = 0; | |
| 1877 do_rotation(xtm, xt, Lali, t, u); | |
| 1878 | |
| 1879 //get subsegment of this fragment | |
| 1880 d = local_d0_search - 1; | |
| 1881 n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score, | |
| 1882 score_sum_method, Lnorm, score_d8, d0); | |
| 1883 if(score>score_max) | |
| 1884 { | |
| 1885 score_max=score; | |
| 1886 | |
| 1887 //save the rotation matrix | |
| 1888 for(k=0; k<3; k++) | |
| 1889 { | |
| 1890 t0[k]=t[k]; | |
| 1891 u0[k][0]=u[k][0]; | |
| 1892 u0[k][1]=u[k][1]; | |
| 1893 u0[k][2]=u[k][2]; | |
| 1894 } | |
| 1895 } | |
| 1896 | |
| 1897 //try to extend the alignment iteratively | |
| 1898 d = local_d0_search + 1; | |
| 1899 for(int it=0; it<n_it; it++) | |
| 1900 { | |
| 1901 ka=0; | |
| 1902 for(k=0; k<n_cut; k++) | |
| 1903 { | |
| 1904 m=i_ali[k]; | |
| 1905 r1[k][0]=xtm[m][0]; | |
| 1906 r1[k][1]=xtm[m][1]; | |
| 1907 r1[k][2]=xtm[m][2]; | |
| 1908 | |
| 1909 r2[k][0]=ytm[m][0]; | |
| 1910 r2[k][1]=ytm[m][1]; | |
| 1911 r2[k][2]=ytm[m][2]; | |
| 1912 | |
| 1913 k_ali[ka]=m; | |
| 1914 ka++; | |
| 1915 } | |
| 1916 //extract rotation matrix based on the fragment | |
| 1917 Kabsch(r1, r2, n_cut, 1, &rmsd, t, u); | |
| 1918 do_rotation(xtm, xt, Lali, t, u); | |
| 1919 n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score, | |
| 1920 score_sum_method, Lnorm, score_d8, d0); | |
| 1921 if(score>score_max) | |
| 1922 { | |
| 1923 score_max=score; | |
| 1924 | |
| 1925 //save the rotation matrix | |
| 1926 for(k=0; k<3; k++) | |
| 1927 { | |
| 1928 t0[k]=t[k]; | |
| 1929 u0[k][0]=u[k][0]; | |
| 1930 u0[k][1]=u[k][1]; | |
| 1931 u0[k][2]=u[k][2]; | |
| 1932 } | |
| 1933 } | |
| 1934 | |
| 1935 //check if it converges | |
| 1936 if(n_cut==ka) | |
| 1937 { | |
| 1938 for(k=0; k<n_cut; k++) | |
| 1939 { | |
| 1940 if(i_ali[k]!=k_ali[k]) break; | |
| 1941 } | |
| 1942 if(k==n_cut) break; | |
| 1943 } | |
| 1944 } //for iteration | |
| 1945 | |
| 1946 if(i<iL_max) | |
| 1947 { | |
| 1948 i=i+simplify_step; //shift the fragment | |
| 1949 if(i>iL_max) i=iL_max; //do this to use the last missed fragment | |
| 1950 } | |
| 1951 else if(i>=iL_max) break; | |
| 1952 }//while(1) | |
| 1953 //end of one fragment | |
| 1954 }//for(i_init | |
| 1955 return score_max; | |
| 1956 } | |
| 1957 | |
| 1958 | |
| 1959 double TMscore8_search_standard( double **r1, double **r2, | |
| 1960 double **xtm, double **ytm, double **xt, int Lali, | |
| 1961 double t0[3], double u0[3][3], int simplify_step, int score_sum_method, | |
| 1962 double *Rcomm, double local_d0_search, double score_d8, double d0) | |
| 1963 { | |
| 1964 int i, m; | |
| 1965 double score_max, score, rmsd; | |
| 1966 const int kmax = Lali; | |
| 1967 int k_ali[kmax], ka, k; | |
| 1968 double t[3]; | |
| 1969 double u[3][3]; | |
| 1970 double d; | |
| 1971 | |
| 1972 //iterative parameters | |
| 1973 int n_it = 20; //maximum number of iterations | |
| 1974 int n_init_max = 6; //maximum number of different fragment length | |
| 1975 int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4 | |
| 1976 int L_ini_min = 4; | |
| 1977 if (Lali<L_ini_min) L_ini_min = Lali; | |
| 1978 | |
| 1979 int n_init = 0, i_init; | |
| 1980 for (i = 0; i<n_init_max - 1; i++) | |
| 1981 { | |
| 1982 n_init++; | |
| 1983 L_ini[i] = (int)(Lali / pow(2.0, (double)i)); | |
| 1984 if (L_ini[i] <= L_ini_min) | |
| 1985 { | |
| 1986 L_ini[i] = L_ini_min; | |
| 1987 break; | |
| 1988 } | |
| 1989 } | |
| 1990 if (i == n_init_max - 1) | |
| 1991 { | |
| 1992 n_init++; | |
| 1993 L_ini[i] = L_ini_min; | |
| 1994 } | |
| 1995 | |
| 1996 score_max = -1; | |
| 1997 //find the maximum score starting from local structures superposition | |
| 1998 int i_ali[kmax], n_cut; | |
| 1999 int L_frag; //fragment length | |
| 2000 int iL_max; //maximum starting postion for the fragment | |
| 2001 | |
| 2002 for (i_init = 0; i_init<n_init; i_init++) | |
| 2003 { | |
| 2004 L_frag = L_ini[i_init]; | |
| 2005 iL_max = Lali - L_frag; | |
| 2006 | |
| 2007 i = 0; | |
| 2008 while (1) | |
| 2009 { | |
| 2010 //extract the fragment starting from position i | |
| 2011 ka = 0; | |
| 2012 for (k = 0; k<L_frag; k++) | |
| 2013 { | |
| 2014 int kk = k + i; | |
| 2015 r1[k][0] = xtm[kk][0]; | |
| 2016 r1[k][1] = xtm[kk][1]; | |
| 2017 r1[k][2] = xtm[kk][2]; | |
| 2018 | |
| 2019 r2[k][0] = ytm[kk][0]; | |
| 2020 r2[k][1] = ytm[kk][1]; | |
| 2021 r2[k][2] = ytm[kk][2]; | |
| 2022 | |
| 2023 k_ali[ka] = kk; | |
| 2024 ka++; | |
| 2025 } | |
| 2026 //extract rotation matrix based on the fragment | |
| 2027 Kabsch(r1, r2, L_frag, 1, &rmsd, t, u); | |
| 2028 if (simplify_step != 1) | |
| 2029 *Rcomm = 0; | |
| 2030 do_rotation(xtm, xt, Lali, t, u); | |
| 2031 | |
| 2032 //get subsegment of this fragment | |
| 2033 d = local_d0_search - 1; | |
| 2034 n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score, | |
| 2035 score_sum_method, score_d8, d0); | |
| 2036 | |
| 2037 if (score>score_max) | |
| 2038 { | |
| 2039 score_max = score; | |
| 2040 | |
| 2041 //save the rotation matrix | |
| 2042 for (k = 0; k<3; k++) | |
| 2043 { | |
| 2044 t0[k] = t[k]; | |
| 2045 u0[k][0] = u[k][0]; | |
| 2046 u0[k][1] = u[k][1]; | |
| 2047 u0[k][2] = u[k][2]; | |
| 2048 } | |
| 2049 } | |
| 2050 | |
| 2051 //try to extend the alignment iteratively | |
| 2052 d = local_d0_search + 1; | |
| 2053 for (int it = 0; it<n_it; it++) | |
| 2054 { | |
| 2055 ka = 0; | |
| 2056 for (k = 0; k<n_cut; k++) | |
| 2057 { | |
| 2058 m = i_ali[k]; | |
| 2059 r1[k][0] = xtm[m][0]; | |
| 2060 r1[k][1] = xtm[m][1]; | |
| 2061 r1[k][2] = xtm[m][2]; | |
| 2062 | |
| 2063 r2[k][0] = ytm[m][0]; | |
| 2064 r2[k][1] = ytm[m][1]; | |
| 2065 r2[k][2] = ytm[m][2]; | |
| 2066 | |
| 2067 k_ali[ka] = m; | |
| 2068 ka++; | |
| 2069 } | |
| 2070 //extract rotation matrix based on the fragment | |
| 2071 Kabsch(r1, r2, n_cut, 1, &rmsd, t, u); | |
| 2072 do_rotation(xtm, xt, Lali, t, u); | |
| 2073 n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score, | |
| 2074 score_sum_method, score_d8, d0); | |
| 2075 if (score>score_max) | |
| 2076 { | |
| 2077 score_max = score; | |
| 2078 | |
| 2079 //save the rotation matrix | |
| 2080 for (k = 0; k<3; k++) | |
| 2081 { | |
| 2082 t0[k] = t[k]; | |
| 2083 u0[k][0] = u[k][0]; | |
| 2084 u0[k][1] = u[k][1]; | |
| 2085 u0[k][2] = u[k][2]; | |
| 2086 } | |
| 2087 } | |
| 2088 | |
| 2089 //check if it converges | |
| 2090 if (n_cut == ka) | |
| 2091 { | |
| 2092 for (k = 0; k<n_cut; k++) | |
| 2093 { | |
| 2094 if (i_ali[k] != k_ali[k]) break; | |
| 2095 } | |
| 2096 if (k == n_cut) break; | |
| 2097 } | |
| 2098 } //for iteration | |
| 2099 | |
| 2100 if (i<iL_max) | |
| 2101 { | |
| 2102 i = i + simplify_step; //shift the fragment | |
| 2103 if (i>iL_max) i = iL_max; //do this to use the last missed fragment | |
| 2104 } | |
| 2105 else if (i >= iL_max) break; | |
| 2106 }//while(1) | |
| 2107 //end of one fragment | |
| 2108 }//for(i_init | |
| 2109 return score_max; | |
| 2110 } | |
| 2111 | |
| 2112 //Comprehensive TMscore search engine | |
| 2113 // input: two vector sets: x, y | |
| 2114 // an alignment invmap0[] between x and y | |
| 2115 // simplify_step: 1 or 40 or other integers | |
| 2116 // score_sum_method: 0 for score over all pairs | |
| 2117 // 8 for socre over the pairs with dist<score_d8 | |
| 2118 // output: the best rotaion matrix t, u that results in highest TMscore | |
| 2119 double detailed_search(double **r1, double **r2, double **xtm, double **ytm, | |
| 2120 double **xt, double **x, double **y, int xlen, int ylen, | |
| 2121 int invmap0[], double t[3], double u[3][3], int simplify_step, | |
| 2122 int score_sum_method, double local_d0_search, double Lnorm, | |
| 2123 double score_d8, double d0) | |
| 2124 { | |
| 2125 //x is model, y is template, try to superpose onto y | |
| 2126 int i, j, k; | |
| 2127 double tmscore; | |
| 2128 double rmsd; | |
| 2129 | |
| 2130 k=0; | |
| 2131 for(i=0; i<ylen; i++) | |
| 2132 { | |
| 2133 j=invmap0[i]; | |
| 2134 if(j>=0) //aligned | |
| 2135 { | |
| 2136 xtm[k][0]=x[j][0]; | |
| 2137 xtm[k][1]=x[j][1]; | |
| 2138 xtm[k][2]=x[j][2]; | |
| 2139 | |
| 2140 ytm[k][0]=y[i][0]; | |
| 2141 ytm[k][1]=y[i][1]; | |
| 2142 ytm[k][2]=y[i][2]; | |
| 2143 k++; | |
| 2144 } | |
| 2145 } | |
| 2146 | |
| 2147 //detailed search 40-->1 | |
| 2148 tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u, simplify_step, | |
| 2149 score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); | |
| 2150 return tmscore; | |
| 2151 } | |
| 2152 | |
| 2153 double detailed_search_standard( double **r1, double **r2, | |
| 2154 double **xtm, double **ytm, double **xt, double **x, double **y, | |
| 2155 int xlen, int ylen, int invmap0[], double t[3], double u[3][3], | |
| 2156 int simplify_step, int score_sum_method, double local_d0_search, | |
| 2157 const bool& bNormalize, double Lnorm, double score_d8, double d0) | |
| 2158 { | |
| 2159 //x is model, y is template, try to superpose onto y | |
| 2160 int i, j, k; | |
| 2161 double tmscore; | |
| 2162 double rmsd; | |
| 2163 | |
| 2164 k=0; | |
| 2165 for(i=0; i<ylen; i++) | |
| 2166 { | |
| 2167 j=invmap0[i]; | |
| 2168 if(j>=0) //aligned | |
| 2169 { | |
| 2170 xtm[k][0]=x[j][0]; | |
| 2171 xtm[k][1]=x[j][1]; | |
| 2172 xtm[k][2]=x[j][2]; | |
| 2173 | |
| 2174 ytm[k][0]=y[i][0]; | |
| 2175 ytm[k][1]=y[i][1]; | |
| 2176 ytm[k][2]=y[i][2]; | |
| 2177 k++; | |
| 2178 } | |
| 2179 } | |
| 2180 | |
| 2181 //detailed search 40-->1 | |
| 2182 tmscore = TMscore8_search_standard( r1, r2, xtm, ytm, xt, k, t, u, | |
| 2183 simplify_step, score_sum_method, &rmsd, local_d0_search, score_d8, d0); | |
| 2184 if (bNormalize)// "-i", to use standard_TMscore, then bNormalize=true, else bNormalize=false; | |
| 2185 tmscore = tmscore * k / Lnorm; | |
| 2186 | |
| 2187 return tmscore; | |
| 2188 } | |
| 2189 | |
| 2190 //compute the score quickly in three iterations | |
| 2191 double get_score_fast( double **r1, double **r2, double **xtm, double **ytm, | |
| 2192 double **x, double **y, int xlen, int ylen, int invmap[], | |
| 2193 double d0, double d0_search, double t[3], double u[3][3]) | |
| 2194 { | |
| 2195 double rms, tmscore, tmscore1, tmscore2; | |
| 2196 int i, j, k; | |
| 2197 | |
| 2198 k=0; | |
| 2199 for(j=0; j<ylen; j++) | |
| 2200 { | |
| 2201 i=invmap[j]; | |
| 2202 if(i>=0) | |
| 2203 { | |
| 2204 r1[k][0]=x[i][0]; | |
| 2205 r1[k][1]=x[i][1]; | |
| 2206 r1[k][2]=x[i][2]; | |
| 2207 | |
| 2208 r2[k][0]=y[j][0]; | |
| 2209 r2[k][1]=y[j][1]; | |
| 2210 r2[k][2]=y[j][2]; | |
| 2211 | |
| 2212 xtm[k][0]=x[i][0]; | |
| 2213 xtm[k][1]=x[i][1]; | |
| 2214 xtm[k][2]=x[i][2]; | |
| 2215 | |
| 2216 ytm[k][0]=y[j][0]; | |
| 2217 ytm[k][1]=y[j][1]; | |
| 2218 ytm[k][2]=y[j][2]; | |
| 2219 | |
| 2220 k++; | |
| 2221 } | |
| 2222 else if(i!=-1) PrintErrorAndQuit("Wrong map!\n"); | |
| 2223 } | |
| 2224 Kabsch(r1, r2, k, 1, &rms, t, u); | |
| 2225 | |
| 2226 //evaluate score | |
| 2227 double di; | |
| 2228 const int len=k; | |
| 2229 double dis[len]; | |
| 2230 double d00=d0_search; | |
| 2231 double d002=d00*d00; | |
| 2232 double d02=d0*d0; | |
| 2233 | |
| 2234 int n_ali=k; | |
| 2235 double xrot[3]; | |
| 2236 tmscore=0; | |
| 2237 for(k=0; k<n_ali; k++) | |
| 2238 { | |
| 2239 transform(t, u, &xtm[k][0], xrot); | |
| 2240 di=dist(xrot, &ytm[k][0]); | |
| 2241 dis[k]=di; | |
| 2242 tmscore += 1/(1+di/d02); | |
| 2243 } | |
| 2244 | |
| 2245 | |
| 2246 | |
| 2247 //second iteration | |
| 2248 double d002t=d002; | |
| 2249 while(1) | |
| 2250 { | |
| 2251 j=0; | |
| 2252 for(k=0; k<n_ali; k++) | |
| 2253 { | |
| 2254 if(dis[k]<=d002t) | |
| 2255 { | |
| 2256 r1[j][0]=xtm[k][0]; | |
| 2257 r1[j][1]=xtm[k][1]; | |
| 2258 r1[j][2]=xtm[k][2]; | |
| 2259 | |
| 2260 r2[j][0]=ytm[k][0]; | |
| 2261 r2[j][1]=ytm[k][1]; | |
| 2262 r2[j][2]=ytm[k][2]; | |
| 2263 | |
| 2264 j++; | |
| 2265 } | |
| 2266 } | |
| 2267 //there are not enough feasible pairs, relieve the threshold | |
| 2268 if(j<3 && n_ali>3) d002t += 0.5; | |
| 2269 else break; | |
| 2270 } | |
| 2271 | |
| 2272 if(n_ali!=j) | |
| 2273 { | |
| 2274 Kabsch(r1, r2, j, 1, &rms, t, u); | |
| 2275 tmscore1=0; | |
| 2276 for(k=0; k<n_ali; k++) | |
| 2277 { | |
| 2278 transform(t, u, &xtm[k][0], xrot); | |
| 2279 di=dist(xrot, &ytm[k][0]); | |
| 2280 dis[k]=di; | |
| 2281 tmscore1 += 1/(1+di/d02); | |
| 2282 } | |
| 2283 | |
| 2284 //third iteration | |
| 2285 d002t=d002+1; | |
| 2286 | |
| 2287 while(1) | |
| 2288 { | |
| 2289 j=0; | |
| 2290 for(k=0; k<n_ali; k++) | |
| 2291 { | |
| 2292 if(dis[k]<=d002t) | |
| 2293 { | |
| 2294 r1[j][0]=xtm[k][0]; | |
| 2295 r1[j][1]=xtm[k][1]; | |
| 2296 r1[j][2]=xtm[k][2]; | |
| 2297 | |
| 2298 r2[j][0]=ytm[k][0]; | |
| 2299 r2[j][1]=ytm[k][1]; | |
| 2300 r2[j][2]=ytm[k][2]; | |
| 2301 | |
| 2302 j++; | |
| 2303 } | |
| 2304 } | |
| 2305 //there are not enough feasible pairs, relieve the threshold | |
| 2306 if(j<3 && n_ali>3) d002t += 0.5; | |
| 2307 else break; | |
| 2308 } | |
| 2309 | |
| 2310 //evaluate the score | |
| 2311 Kabsch(r1, r2, j, 1, &rms, t, u); | |
| 2312 tmscore2=0; | |
| 2313 for(k=0; k<n_ali; k++) | |
| 2314 { | |
| 2315 transform(t, u, &xtm[k][0], xrot); | |
| 2316 di=dist(xrot, &ytm[k][0]); | |
| 2317 tmscore2 += 1/(1+di/d02); | |
| 2318 } | |
| 2319 } | |
| 2320 else | |
| 2321 { | |
| 2322 tmscore1=tmscore; | |
| 2323 tmscore2=tmscore; | |
| 2324 } | |
| 2325 | |
| 2326 if(tmscore1>=tmscore) tmscore=tmscore1; | |
| 2327 if(tmscore2>=tmscore) tmscore=tmscore2; | |
| 2328 return tmscore; // no need to normalize this score because it will not be used for latter scoring | |
| 2329 } | |
| 2330 | |
| 2331 | |
| 2332 //perform gapless threading to find the best initial alignment | |
| 2333 //input: x, y, xlen, ylen | |
| 2334 //output: y2x0 stores the best alignment: e.g., | |
| 2335 //y2x0[j]=i means: | |
| 2336 //the jth element in y is aligned to the ith element in x if i>=0 | |
| 2337 //the jth element in y is aligned to a gap in x if i==-1 | |
| 2338 double get_initial(double **r1, double **r2, double **xtm, double **ytm, | |
| 2339 double **x, double **y, int xlen, int ylen, int *y2x, | |
| 2340 double d0, double d0_search, const bool fast_opt, | |
| 2341 double t[3], double u[3][3]) | |
| 2342 { | |
| 2343 int min_len=getmin(xlen, ylen); | |
| 2344 if(min_len<3) PrintErrorAndQuit("Sequence is too short <3!\n"); | |
| 2345 | |
| 2346 int min_ali= min_len/2; //minimum size of considered fragment | |
| 2347 if(min_ali<=5) min_ali=5; | |
| 2348 int n1, n2; | |
| 2349 n1 = -ylen+min_ali; | |
| 2350 n2 = xlen-min_ali; | |
| 2351 | |
| 2352 int i, j, k, k_best; | |
| 2353 double tmscore, tmscore_max=-1; | |
| 2354 | |
| 2355 k_best=n1; | |
| 2356 for(k=n1; k<=n2; k+=(fast_opt)?5:1) | |
| 2357 { | |
| 2358 //get the map | |
| 2359 for(j=0; j<ylen; j++) | |
| 2360 { | |
| 2361 i=j+k; | |
| 2362 if(i>=0 && i<xlen) y2x[j]=i; | |
| 2363 else y2x[j]=-1; | |
| 2364 } | |
| 2365 | |
| 2366 //evaluate the map quickly in three iterations | |
| 2367 //this is not real tmscore, it is used to evaluate the goodness of the initial alignment | |
| 2368 tmscore=get_score_fast(r1, r2, xtm, ytm, | |
| 2369 x, y, xlen, ylen, y2x, d0,d0_search, t, u); | |
| 2370 if(tmscore>=tmscore_max) | |
| 2371 { | |
| 2372 tmscore_max=tmscore; | |
| 2373 k_best=k; | |
| 2374 } | |
| 2375 } | |
| 2376 | |
| 2377 //extract the best map | |
| 2378 k=k_best; | |
| 2379 for(j=0; j<ylen; j++) | |
| 2380 { | |
| 2381 i=j+k; | |
| 2382 if(i>=0 && i<xlen) y2x[j]=i; | |
| 2383 else y2x[j]=-1; | |
| 2384 } | |
| 2385 | |
| 2386 return tmscore_max; | |
| 2387 } | |
| 2388 | |
| 2389 void smooth(int *sec, int len) | |
| 2390 { | |
| 2391 int i, j; | |
| 2392 //smooth single --x-- => ----- | |
| 2393 for (i=2; i<len-2; i++) | |
| 2394 { | |
| 2395 if(sec[i]==2 || sec[i]==4) | |
| 2396 { | |
| 2397 j=sec[i]; | |
| 2398 if (sec[i-2]!=j && sec[i-1]!=j && sec[i+1]!=j && sec[i+2]!=j) | |
| 2399 sec[i]=1; | |
| 2400 } | |
| 2401 } | |
| 2402 | |
| 2403 // smooth double | |
| 2404 // --xx-- => ------ | |
| 2405 for (i=0; i<len-5; i++) | |
| 2406 { | |
| 2407 //helix | |
| 2408 if (sec[i]!=2 && sec[i+1]!=2 && sec[i+2]==2 && sec[i+3]==2 && | |
| 2409 sec[i+4]!=2 && sec[i+5]!= 2) | |
| 2410 { | |
| 2411 sec[i+2]=1; | |
| 2412 sec[i+3]=1; | |
| 2413 } | |
| 2414 | |
| 2415 //beta | |
| 2416 if (sec[i]!=4 && sec[i+1]!=4 && sec[i+2]==4 && sec[i+3]==4 && | |
| 2417 sec[i+4]!=4 && sec[i+5]!= 4) | |
| 2418 { | |
| 2419 sec[i+2]=1; | |
| 2420 sec[i+3]=1; | |
| 2421 } | |
| 2422 } | |
| 2423 | |
| 2424 //smooth connect | |
| 2425 for (i=0; i<len-2; i++) | |
| 2426 { | |
| 2427 if (sec[i]==2 && sec[i+1]!=2 && sec[i+2]==2) sec[i+1]=2; | |
| 2428 else if(sec[i]==4 && sec[i+1]!=4 && sec[i+2]==4) sec[i+1]=4; | |
| 2429 } | |
| 2430 | |
| 2431 } | |
| 2432 | |
| 2433 char sec_str(double dis13, double dis14, double dis15, | |
| 2434 double dis24, double dis25, double dis35) | |
| 2435 { | |
| 2436 char s='C'; | |
| 2437 | |
| 2438 double delta=2.1; | |
| 2439 if (fabs(dis15-6.37)<delta && fabs(dis14-5.18)<delta && | |
| 2440 fabs(dis25-5.18)<delta && fabs(dis13-5.45)<delta && | |
| 2441 fabs(dis24-5.45)<delta && fabs(dis35-5.45)<delta) | |
| 2442 { | |
| 2443 s='H'; //helix | |
| 2444 return s; | |
| 2445 } | |
| 2446 | |
| 2447 delta=1.42; | |
| 2448 if (fabs(dis15-13 )<delta && fabs(dis14-10.4)<delta && | |
| 2449 fabs(dis25-10.4)<delta && fabs(dis13-6.1 )<delta && | |
| 2450 fabs(dis24-6.1 )<delta && fabs(dis35-6.1 )<delta) | |
| 2451 { | |
| 2452 s='E'; //strand | |
| 2453 return s; | |
| 2454 } | |
| 2455 | |
| 2456 if (dis15 < 8) s='T'; //turn | |
| 2457 return s; | |
| 2458 } | |
| 2459 | |
| 2460 | |
| 2461 /* secondary stucture assignment for protein: | |
| 2462 * 1->coil, 2->helix, 3->turn, 4->strand */ | |
| 2463 void make_sec(double **x, int len, char *sec) | |
| 2464 { | |
| 2465 int j1, j2, j3, j4, j5; | |
| 2466 double d13, d14, d15, d24, d25, d35; | |
| 2467 for(int i=0; i<len; i++) | |
| 2468 { | |
| 2469 sec[i]='C'; | |
| 2470 j1=i-2; | |
| 2471 j2=i-1; | |
| 2472 j3=i; | |
| 2473 j4=i+1; | |
| 2474 j5=i+2; | |
| 2475 | |
| 2476 if(j1>=0 && j5<len) | |
| 2477 { | |
| 2478 d13=sqrt(dist(x[j1], x[j3])); | |
| 2479 d14=sqrt(dist(x[j1], x[j4])); | |
| 2480 d15=sqrt(dist(x[j1], x[j5])); | |
| 2481 d24=sqrt(dist(x[j2], x[j4])); | |
| 2482 d25=sqrt(dist(x[j2], x[j5])); | |
| 2483 d35=sqrt(dist(x[j3], x[j5])); | |
| 2484 sec[i]=sec_str(d13, d14, d15, d24, d25, d35); | |
| 2485 } | |
| 2486 } | |
| 2487 sec[len]=0; | |
| 2488 } | |
| 2489 | |
| 2490 //get initial alignment from secondary structure alignment | |
| 2491 //input: x, y, xlen, ylen | |
| 2492 //output: y2x stores the best alignment: e.g., | |
| 2493 //y2x[j]=i means: | |
| 2494 //the jth element in y is aligned to the ith element in x if i>=0 | |
| 2495 //the jth element in y is aligned to a gap in x if i==-1 | |
| 2496 void get_initial_ss(bool **path, double **val, | |
| 2497 const char *secx, const char *secy, int xlen, int ylen, int *y2x) | |
| 2498 { | |
| 2499 double gap_open=-1.0; | |
| 2500 NWDP_TM(path, val, secx, secy, xlen, ylen, gap_open, y2x); | |
| 2501 } | |
| 2502 | |
| 2503 | |
| 2504 // get_initial5 in TMalign fortran, get_initial_local in TMalign c by yangji | |
| 2505 //get initial alignment of local structure superposition | |
| 2506 //input: x, y, xlen, ylen | |
| 2507 //output: y2x stores the best alignment: e.g., | |
| 2508 //y2x[j]=i means: | |
| 2509 //the jth element in y is aligned to the ith element in x if i>=0 | |
| 2510 //the jth element in y is aligned to a gap in x if i==-1 | |
| 2511 bool get_initial5( double **r1, double **r2, double **xtm, double **ytm, | |
| 2512 bool **path, double **val, | |
| 2513 double **x, double **y, int xlen, int ylen, int *y2x, | |
| 2514 double d0, double d0_search, const bool fast_opt, const double D0_MIN) | |
| 2515 { | |
| 2516 double GL, rmsd; | |
| 2517 double t[3]; | |
| 2518 double u[3][3]; | |
| 2519 | |
| 2520 double d01 = d0 + 1.5; | |
| 2521 if (d01 < D0_MIN) d01 = D0_MIN; | |
| 2522 double d02 = d01*d01; | |
| 2523 | |
| 2524 double GLmax = 0; | |
| 2525 int aL = getmin(xlen, ylen); | |
| 2526 int *invmap = new int[ylen + 1]; | |
| 2527 | |
| 2528 // jump on sequence1--------------> | |
| 2529 int n_jump1 = 0; | |
| 2530 if (xlen > 250) | |
| 2531 n_jump1 = 45; | |
| 2532 else if (xlen > 200) | |
| 2533 n_jump1 = 35; | |
| 2534 else if (xlen > 150) | |
| 2535 n_jump1 = 25; | |
| 2536 else | |
| 2537 n_jump1 = 15; | |
| 2538 if (n_jump1 > (xlen / 3)) | |
| 2539 n_jump1 = xlen / 3; | |
| 2540 | |
| 2541 // jump on sequence2--------------> | |
| 2542 int n_jump2 = 0; | |
| 2543 if (ylen > 250) | |
| 2544 n_jump2 = 45; | |
| 2545 else if (ylen > 200) | |
| 2546 n_jump2 = 35; | |
| 2547 else if (ylen > 150) | |
| 2548 n_jump2 = 25; | |
| 2549 else | |
| 2550 n_jump2 = 15; | |
| 2551 if (n_jump2 > (ylen / 3)) | |
| 2552 n_jump2 = ylen / 3; | |
| 2553 | |
| 2554 // fragment to superimpose--------------> | |
| 2555 int n_frag[2] = { 20, 100 }; | |
| 2556 if (n_frag[0] > (aL / 3)) | |
| 2557 n_frag[0] = aL / 3; | |
| 2558 if (n_frag[1] > (aL / 2)) | |
| 2559 n_frag[1] = aL / 2; | |
| 2560 | |
| 2561 // start superimpose search--------------> | |
| 2562 if (fast_opt) | |
| 2563 { | |
| 2564 n_jump1*=5; | |
| 2565 n_jump2*=5; | |
| 2566 } | |
| 2567 bool flag = false; | |
| 2568 for (int i_frag = 0; i_frag < 2; i_frag++) | |
| 2569 { | |
| 2570 int m1 = xlen - n_frag[i_frag] + 1; | |
| 2571 int m2 = ylen - n_frag[i_frag] + 1; | |
| 2572 | |
| 2573 for (int i = 0; i<m1; i = i + n_jump1) //index starts from 0, different from FORTRAN | |
| 2574 { | |
| 2575 for (int j = 0; j<m2; j = j + n_jump2) | |
| 2576 { | |
| 2577 for (int k = 0; k<n_frag[i_frag]; k++) //fragment in y | |
| 2578 { | |
| 2579 r1[k][0] = x[k + i][0]; | |
| 2580 r1[k][1] = x[k + i][1]; | |
| 2581 r1[k][2] = x[k + i][2]; | |
| 2582 | |
| 2583 r2[k][0] = y[k + j][0]; | |
| 2584 r2[k][1] = y[k + j][1]; | |
| 2585 r2[k][2] = y[k + j][2]; | |
| 2586 } | |
| 2587 | |
| 2588 // superpose the two structures and rotate it | |
| 2589 Kabsch(r1, r2, n_frag[i_frag], 1, &rmsd, t, u); | |
| 2590 | |
| 2591 double gap_open = 0.0; | |
| 2592 NWDP_TM(path, val, x, y, xlen, ylen, | |
| 2593 t, u, d02, gap_open, invmap); | |
| 2594 GL = get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, | |
| 2595 invmap, d0, d0_search, t, u); | |
| 2596 if (GL>GLmax) | |
| 2597 { | |
| 2598 GLmax = GL; | |
| 2599 for (int ii = 0; ii<ylen; ii++) y2x[ii] = invmap[ii]; | |
| 2600 flag = true; | |
| 2601 } | |
| 2602 } | |
| 2603 } | |
| 2604 } | |
| 2605 | |
| 2606 delete[] invmap; | |
| 2607 return flag; | |
| 2608 } | |
| 2609 | |
| 2610 void score_matrix_rmsd_sec( double **r1, double **r2, double **score, | |
| 2611 const char *secx, const char *secy, double **x, double **y, | |
| 2612 int xlen, int ylen, int *y2x, const double D0_MIN, double d0) | |
| 2613 { | |
| 2614 double t[3], u[3][3]; | |
| 2615 double rmsd, dij; | |
| 2616 double d01=d0+1.5; | |
| 2617 if(d01 < D0_MIN) d01=D0_MIN; | |
| 2618 double d02=d01*d01; | |
| 2619 | |
| 2620 double xx[3]; | |
| 2621 int i, k=0; | |
| 2622 for(int j=0; j<ylen; j++) | |
| 2623 { | |
| 2624 i=y2x[j]; | |
| 2625 if(i>=0) | |
| 2626 { | |
| 2627 r1[k][0]=x[i][0]; | |
| 2628 r1[k][1]=x[i][1]; | |
| 2629 r1[k][2]=x[i][2]; | |
| 2630 | |
| 2631 r2[k][0]=y[j][0]; | |
| 2632 r2[k][1]=y[j][1]; | |
| 2633 r2[k][2]=y[j][2]; | |
| 2634 | |
| 2635 k++; | |
| 2636 } | |
| 2637 } | |
| 2638 Kabsch(r1, r2, k, 1, &rmsd, t, u); | |
| 2639 | |
| 2640 | |
| 2641 for(int ii=0; ii<xlen; ii++) | |
| 2642 { | |
| 2643 transform(t, u, &x[ii][0], xx); | |
| 2644 for(int jj=0; jj<ylen; jj++) | |
| 2645 { | |
| 2646 dij=dist(xx, &y[jj][0]); | |
| 2647 if (secx[ii]==secy[jj]) | |
| 2648 score[ii+1][jj+1] = 1.0/(1+dij/d02) + 0.5; | |
| 2649 else | |
| 2650 score[ii+1][jj+1] = 1.0/(1+dij/d02); | |
| 2651 } | |
| 2652 } | |
| 2653 } | |
| 2654 | |
| 2655 | |
| 2656 //get initial alignment from secondary structure and previous alignments | |
| 2657 //input: x, y, xlen, ylen | |
| 2658 //output: y2x stores the best alignment: e.g., | |
| 2659 //y2x[j]=i means: | |
| 2660 //the jth element in y is aligned to the ith element in x if i>=0 | |
| 2661 //the jth element in y is aligned to a gap in x if i==-1 | |
| 2662 void get_initial_ssplus(double **r1, double **r2, double **score, bool **path, | |
| 2663 double **val, const char *secx, const char *secy, double **x, double **y, | |
| 2664 int xlen, int ylen, int *y2x0, int *y2x, const double D0_MIN, double d0) | |
| 2665 { | |
| 2666 //create score matrix for DP | |
| 2667 score_matrix_rmsd_sec(r1, r2, score, secx, secy, x, y, xlen, ylen, | |
| 2668 y2x0, D0_MIN,d0); | |
| 2669 | |
| 2670 double gap_open=-1.0; | |
| 2671 NWDP_TM(score, path, val, xlen, ylen, gap_open, y2x); | |
| 2672 } | |
| 2673 | |
| 2674 | |
| 2675 void find_max_frag(double **x, int len, int *start_max, | |
| 2676 int *end_max, double dcu0, const bool fast_opt) | |
| 2677 { | |
| 2678 int r_min, fra_min=4; //minimum fragment for search | |
| 2679 if (fast_opt) fra_min=8; | |
| 2680 int start; | |
| 2681 int Lfr_max=0; | |
| 2682 | |
| 2683 r_min= (int) (len*1.0/3.0); //minimum fragment, in case too small protein | |
| 2684 if(r_min > fra_min) r_min=fra_min; | |
| 2685 | |
| 2686 int inc=0; | |
| 2687 double dcu0_cut=dcu0*dcu0;; | |
| 2688 double dcu_cut=dcu0_cut; | |
| 2689 | |
| 2690 while(Lfr_max < r_min) | |
| 2691 { | |
| 2692 Lfr_max=0; | |
| 2693 int j=1; //number of residues at nf-fragment | |
| 2694 start=0; | |
| 2695 for(int i=1; i<len; i++) | |
| 2696 { | |
| 2697 if(dist(x[i-1], x[i]) < dcu_cut) | |
| 2698 { | |
| 2699 j++; | |
| 2700 | |
| 2701 if(i==(len-1)) | |
| 2702 { | |
| 2703 if(j > Lfr_max) | |
| 2704 { | |
| 2705 Lfr_max=j; | |
| 2706 *start_max=start; | |
| 2707 *end_max=i; | |
| 2708 } | |
| 2709 j=1; | |
| 2710 } | |
| 2711 } | |
| 2712 else | |
| 2713 { | |
| 2714 if(j>Lfr_max) | |
| 2715 { | |
| 2716 Lfr_max=j; | |
| 2717 *start_max=start; | |
| 2718 *end_max=i-1; | |
| 2719 } | |
| 2720 | |
| 2721 j=1; | |
| 2722 start=i; | |
| 2723 } | |
| 2724 }// for i; | |
| 2725 | |
| 2726 if(Lfr_max < r_min) | |
| 2727 { | |
| 2728 inc++; | |
| 2729 double dinc=pow(1.1, (double) inc) * dcu0; | |
| 2730 dcu_cut= dinc*dinc; | |
| 2731 } | |
| 2732 }//while <; | |
| 2733 } | |
| 2734 | |
| 2735 //perform fragment gapless threading to find the best initial alignment | |
| 2736 //input: x, y, xlen, ylen | |
| 2737 //output: y2x0 stores the best alignment: e.g., | |
| 2738 //y2x0[j]=i means: | |
| 2739 //the jth element in y is aligned to the ith element in x if i>=0 | |
| 2740 //the jth element in y is aligned to a gap in x if i==-1 | |
| 2741 double get_initial_fgt(double **r1, double **r2, double **xtm, double **ytm, | |
| 2742 double **x, double **y, int xlen, int ylen, | |
| 2743 int *y2x, double d0, double d0_search, | |
| 2744 double dcu0, const bool fast_opt, double t[3], double u[3][3]) | |
| 2745 { | |
| 2746 int fra_min=4; //minimum fragment for search | |
| 2747 if (fast_opt) fra_min=8; | |
| 2748 int fra_min1=fra_min-1; //cutoff for shift, save time | |
| 2749 | |
| 2750 int xstart=0, ystart=0, xend=0, yend=0; | |
| 2751 | |
| 2752 find_max_frag(x, xlen, &xstart, &xend, dcu0, fast_opt); | |
| 2753 find_max_frag(y, ylen, &ystart, ¥d, dcu0, fast_opt); | |
| 2754 | |
| 2755 | |
| 2756 int Lx = xend-xstart+1; | |
| 2757 int Ly = yend-ystart+1; | |
| 2758 int *ifr, *y2x_; | |
| 2759 int L_fr=getmin(Lx, Ly); | |
| 2760 ifr= new int[L_fr]; | |
| 2761 y2x_= new int[ylen+1]; | |
| 2762 | |
| 2763 //select what piece will be used. The original implement may cause | |
| 2764 //asymetry, but only when xlen==ylen and Lx==Ly | |
| 2765 //if L1=Lfr1 and L2=Lfr2 (normal proteins), it will be the same as initial1 | |
| 2766 | |
| 2767 if(Lx<Ly || (Lx==Ly && xlen<ylen)) | |
| 2768 { | |
| 2769 for(int i=0; i<L_fr; i++) ifr[i]=xstart+i; | |
| 2770 } | |
| 2771 else if(Lx>Ly || (Lx==Ly && xlen>ylen)) | |
| 2772 { | |
| 2773 for(int i=0; i<L_fr; i++) ifr[i]=ystart+i; | |
| 2774 } | |
| 2775 else // solve asymetric for 1x5gA vs 2q7nA5 | |
| 2776 { | |
| 2777 /* In this case, L0==xlen==ylen; L_fr==Lx==Ly */ | |
| 2778 int L0=xlen; | |
| 2779 double tmscore, tmscore_max=-1; | |
| 2780 int i, j, k; | |
| 2781 int n1, n2; | |
| 2782 int min_len; | |
| 2783 int min_ali; | |
| 2784 | |
| 2785 /* part 1, normalized by xlen */ | |
| 2786 for(i=0; i<L_fr; i++) ifr[i]=xstart+i; | |
| 2787 | |
| 2788 if(L_fr==L0) | |
| 2789 { | |
| 2790 n1= (int)(L0*0.1); //my index starts from 0 | |
| 2791 n2= (int)(L0*0.89); | |
| 2792 j=0; | |
| 2793 for(i=n1; i<= n2; i++) | |
| 2794 { | |
| 2795 ifr[j]=ifr[i]; | |
| 2796 j++; | |
| 2797 } | |
| 2798 L_fr=j; | |
| 2799 } | |
| 2800 | |
| 2801 int L1=L_fr; | |
| 2802 min_len=getmin(L1, ylen); | |
| 2803 min_ali= (int) (min_len/2.5); //minimum size of considered fragment | |
| 2804 if(min_ali<=fra_min1) min_ali=fra_min1; | |
| 2805 n1 = -ylen+min_ali; | |
| 2806 n2 = L1-min_ali; | |
| 2807 | |
| 2808 for(k=n1; k<=n2; k+=(fast_opt)?3:1) | |
| 2809 { | |
| 2810 //get the map | |
| 2811 for(j=0; j<ylen; j++) | |
| 2812 { | |
| 2813 i=j+k; | |
| 2814 if(i>=0 && i<L1) y2x_[j]=ifr[i]; | |
| 2815 else y2x_[j]=-1; | |
| 2816 } | |
| 2817 | |
| 2818 //evaluate the map quickly in three iterations | |
| 2819 tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, | |
| 2820 d0, d0_search, t, u); | |
| 2821 | |
| 2822 if(tmscore>=tmscore_max) | |
| 2823 { | |
| 2824 tmscore_max=tmscore; | |
| 2825 for(j=0; j<ylen; j++) y2x[j]=y2x_[j]; | |
| 2826 } | |
| 2827 } | |
| 2828 | |
| 2829 /* part 2, normalized by ylen */ | |
| 2830 L_fr=Ly; | |
| 2831 for(i=0; i<L_fr; i++) ifr[i]=ystart+i; | |
| 2832 | |
| 2833 if (L_fr==L0) | |
| 2834 { | |
| 2835 n1= (int)(L0*0.1); //my index starts from 0 | |
| 2836 n2= (int)(L0*0.89); | |
| 2837 | |
| 2838 j=0; | |
| 2839 for(i=n1; i<= n2; i++) | |
| 2840 { | |
| 2841 ifr[j]=ifr[i]; | |
| 2842 j++; | |
| 2843 } | |
| 2844 L_fr=j; | |
| 2845 } | |
| 2846 | |
| 2847 int L2=L_fr; | |
| 2848 min_len=getmin(xlen, L2); | |
| 2849 min_ali= (int) (min_len/2.5); //minimum size of considered fragment | |
| 2850 if(min_ali<=fra_min1) min_ali=fra_min1; | |
| 2851 n1 = -L2+min_ali; | |
| 2852 n2 = xlen-min_ali; | |
| 2853 | |
| 2854 for(k=n1; k<=n2; k++) | |
| 2855 { | |
| 2856 //get the map | |
| 2857 for(j=0; j<ylen; j++) y2x_[j]=-1; | |
| 2858 | |
| 2859 for(j=0; j<L2; j++) | |
| 2860 { | |
| 2861 i=j+k; | |
| 2862 if(i>=0 && i<xlen) y2x_[ifr[j]]=i; | |
| 2863 } | |
| 2864 | |
| 2865 //evaluate the map quickly in three iterations | |
| 2866 tmscore=get_score_fast(r1, r2, xtm, ytm, | |
| 2867 x, y, xlen, ylen, y2x_, d0,d0_search, t, u); | |
| 2868 if(tmscore>=tmscore_max) | |
| 2869 { | |
| 2870 tmscore_max=tmscore; | |
| 2871 for(j=0; j<ylen; j++) y2x[j]=y2x_[j]; | |
| 2872 } | |
| 2873 } | |
| 2874 | |
| 2875 delete [] ifr; | |
| 2876 delete [] y2x_; | |
| 2877 return tmscore_max; | |
| 2878 } | |
| 2879 | |
| 2880 | |
| 2881 int L0=getmin(xlen, ylen); //non-redundant to get_initial1 | |
| 2882 if(L_fr==L0) | |
| 2883 { | |
| 2884 int n1= (int)(L0*0.1); //my index starts from 0 | |
| 2885 int n2= (int)(L0*0.89); | |
| 2886 | |
| 2887 int j=0; | |
| 2888 for(int i=n1; i<= n2; i++) | |
| 2889 { | |
| 2890 ifr[j]=ifr[i]; | |
| 2891 j++; | |
| 2892 } | |
| 2893 L_fr=j; | |
| 2894 } | |
| 2895 | |
| 2896 | |
| 2897 //gapless threading for the extracted fragment | |
| 2898 double tmscore, tmscore_max=-1; | |
| 2899 | |
| 2900 if(Lx<Ly || (Lx==Ly && xlen<=ylen)) | |
| 2901 { | |
| 2902 int L1=L_fr; | |
| 2903 int min_len=getmin(L1, ylen); | |
| 2904 int min_ali= (int) (min_len/2.5); //minimum size of considered fragment | |
| 2905 if(min_ali<=fra_min1) min_ali=fra_min1; | |
| 2906 int n1, n2; | |
| 2907 n1 = -ylen+min_ali; | |
| 2908 n2 = L1-min_ali; | |
| 2909 | |
| 2910 int i, j, k; | |
| 2911 for(k=n1; k<=n2; k+=(fast_opt)?3:1) | |
| 2912 { | |
| 2913 //get the map | |
| 2914 for(j=0; j<ylen; j++) | |
| 2915 { | |
| 2916 i=j+k; | |
| 2917 if(i>=0 && i<L1) y2x_[j]=ifr[i]; | |
| 2918 else y2x_[j]=-1; | |
| 2919 } | |
| 2920 | |
| 2921 //evaluate the map quickly in three iterations | |
| 2922 tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_, | |
| 2923 d0, d0_search, t, u); | |
| 2924 | |
| 2925 if(tmscore>=tmscore_max) | |
| 2926 { | |
| 2927 tmscore_max=tmscore; | |
| 2928 for(j=0; j<ylen; j++) y2x[j]=y2x_[j]; | |
| 2929 } | |
| 2930 } | |
| 2931 } | |
| 2932 else | |
| 2933 { | |
| 2934 int L2=L_fr; | |
| 2935 int min_len=getmin(xlen, L2); | |
| 2936 int min_ali= (int) (min_len/2.5); //minimum size of considered fragment | |
| 2937 if(min_ali<=fra_min1) min_ali=fra_min1; | |
| 2938 int n1, n2; | |
| 2939 n1 = -L2+min_ali; | |
| 2940 n2 = xlen-min_ali; | |
| 2941 | |
| 2942 int i, j, k; | |
| 2943 | |
| 2944 for(k=n1; k<=n2; k++) | |
| 2945 { | |
| 2946 //get the map | |
| 2947 for(j=0; j<ylen; j++) y2x_[j]=-1; | |
| 2948 | |
| 2949 for(j=0; j<L2; j++) | |
| 2950 { | |
| 2951 i=j+k; | |
| 2952 if(i>=0 && i<xlen) y2x_[ifr[j]]=i; | |
| 2953 } | |
| 2954 | |
| 2955 //evaluate the map quickly in three iterations | |
| 2956 tmscore=get_score_fast(r1, r2, xtm, ytm, | |
| 2957 x, y, xlen, ylen, y2x_, d0,d0_search, t, u); | |
| 2958 if(tmscore>=tmscore_max) | |
| 2959 { | |
| 2960 tmscore_max=tmscore; | |
| 2961 for(j=0; j<ylen; j++) y2x[j]=y2x_[j]; | |
| 2962 } | |
| 2963 } | |
| 2964 } | |
| 2965 | |
| 2966 | |
| 2967 delete [] ifr; | |
| 2968 delete [] y2x_; | |
| 2969 return tmscore_max; | |
| 2970 } | |
| 2971 | |
| 2972 //heuristic run of dynamic programing iteratively to find the best alignment | |
| 2973 //input: initial rotation matrix t, u | |
| 2974 // vectors x and y, d0 | |
| 2975 //output: best alignment that maximizes the TMscore, will be stored in invmap | |
| 2976 double DP_iter(double **r1, double **r2, double **xtm, double **ytm, | |
| 2977 double **xt, bool **path, double **val, double **x, double **y, | |
| 2978 int xlen, int ylen, double t[3], double u[3][3], int invmap0[], | |
| 2979 int g1, int g2, int iteration_max, double local_d0_search, | |
| 2980 double D0_MIN, double Lnorm, double d0, double score_d8) | |
| 2981 { | |
| 2982 double gap_open[2]={-0.6, 0}; | |
| 2983 double rmsd; | |
| 2984 int *invmap=new int[ylen+1]; | |
| 2985 | |
| 2986 int iteration, i, j, k; | |
| 2987 double tmscore, tmscore_max, tmscore_old=0; | |
| 2988 int score_sum_method=8, simplify_step=40; | |
| 2989 tmscore_max=-1; | |
| 2990 | |
| 2991 //double d01=d0+1.5; | |
| 2992 double d02=d0*d0; | |
| 2993 for(int g=g1; g<g2; g++) | |
| 2994 { | |
| 2995 for(iteration=0; iteration<iteration_max; iteration++) | |
| 2996 { | |
| 2997 NWDP_TM(path, val, x, y, xlen, ylen, | |
| 2998 t, u, d02, gap_open[g], invmap); | |
| 2999 | |
| 3000 k=0; | |
| 3001 for(j=0; j<ylen; j++) | |
| 3002 { | |
| 3003 i=invmap[j]; | |
| 3004 | |
| 3005 if(i>=0) //aligned | |
| 3006 { | |
| 3007 xtm[k][0]=x[i][0]; | |
| 3008 xtm[k][1]=x[i][1]; | |
| 3009 xtm[k][2]=x[i][2]; | |
| 3010 | |
| 3011 ytm[k][0]=y[j][0]; | |
| 3012 ytm[k][1]=y[j][1]; | |
| 3013 ytm[k][2]=y[j][2]; | |
| 3014 k++; | |
| 3015 } | |
| 3016 } | |
| 3017 | |
| 3018 tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u, | |
| 3019 simplify_step, score_sum_method, &rmsd, local_d0_search, | |
| 3020 Lnorm, score_d8, d0); | |
| 3021 | |
| 3022 | |
| 3023 if(tmscore>tmscore_max) | |
| 3024 { | |
| 3025 tmscore_max=tmscore; | |
| 3026 for(i=0; i<ylen; i++) invmap0[i]=invmap[i]; | |
| 3027 } | |
| 3028 | |
| 3029 if(iteration>0) | |
| 3030 { | |
| 3031 if(fabs(tmscore_old-tmscore)<0.000001) break; | |
| 3032 } | |
| 3033 tmscore_old=tmscore; | |
| 3034 }// for iteration | |
| 3035 | |
| 3036 }//for gapopen | |
| 3037 | |
| 3038 | |
| 3039 delete []invmap; | |
| 3040 return tmscore_max; | |
| 3041 } | |
| 3042 | |
| 3043 | |
| 3044 void output_superpose(const string xname, const string yname, | |
| 3045 const string fname_super, | |
| 3046 double t[3], double u[3][3], const int ter_opt, const int mirror_opt, | |
| 3047 const char *seqM, const char *seqxA, const char *seqyA, | |
| 3048 const vector<string>&resi_vec1, const vector<string>&resi_vec2, | |
| 3049 const char *chainID1, const char *chainID2, | |
| 3050 const int xlen, const int ylen, const double d0A, const int n_ali8, | |
| 3051 const double rmsd, const double TM1, const double Liden) | |
| 3052 { | |
| 3053 stringstream buf; | |
| 3054 stringstream buf_all; | |
| 3055 stringstream buf_atm; | |
| 3056 stringstream buf_all_atm; | |
| 3057 stringstream buf_all_atm_lig; | |
| 3058 stringstream buf_pdb; | |
| 3059 stringstream buf_pymol; | |
| 3060 stringstream buf_tm; | |
| 3061 string line; | |
| 3062 double x[3]; // before transform | |
| 3063 double x1[3]; // after transform | |
| 3064 bool after_ter; // true if passed the "TER" line in PDB | |
| 3065 string asym_id; // chain ID | |
| 3066 | |
| 3067 buf_tm<<"REMARK TM-align" | |
| 3068 <<"\nREMARK Chain 1:"<<setw(11)<<left<<xname+chainID1<<" Size= "<<xlen | |
| 3069 <<"\nREMARK Chain 2:"<<setw(11)<<yname+chainID2<<right<<" Size= "<<ylen | |
| 3070 <<" (TM-score is normalized by "<<setw(4)<<ylen<<", d0=" | |
| 3071 <<setiosflags(ios::fixed)<<setprecision(2)<<setw(6)<<d0A<<")" | |
| 3072 <<"\nREMARK Aligned length="<<setw(4)<<n_ali8<<", RMSD=" | |
| 3073 <<setw(6)<<setiosflags(ios::fixed)<<setprecision(2)<<rmsd | |
| 3074 <<", TM-score="<<setw(7)<<setiosflags(ios::fixed)<<setprecision(5)<<TM1 | |
| 3075 <<", ID="<<setw(5)<<setiosflags(ios::fixed)<<setprecision(3) | |
| 3076 <<((n_ali8>0)?Liden/n_ali8:0)<<endl; | |
| 3077 string rasmol_CA_header="load inline\nselect *A\nwireframe .45\nselect *B\nwireframe .20\nselect all\ncolor white\n"; | |
| 3078 string rasmol_cartoon_header="load inline\nselect all\ncartoon\nselect *A\ncolor blue\nselect *B\ncolor red\nselect ligand\nwireframe 0.25\nselect solvent\nspacefill 0.25\nselect all\nexit\n"+buf_tm.str(); | |
| 3079 buf<<rasmol_CA_header; | |
| 3080 buf_all<<rasmol_CA_header; | |
| 3081 buf_atm<<rasmol_cartoon_header; | |
| 3082 buf_all_atm<<rasmol_cartoon_header; | |
| 3083 buf_all_atm_lig<<rasmol_cartoon_header; | |
| 3084 | |
| 3085 /* for PDBx/mmCIF only */ | |
| 3086 map<string,int> _atom_site; | |
| 3087 int atom_site_pos; | |
| 3088 vector<string> line_vec; | |
| 3089 string atom; // 4-character atom name | |
| 3090 string AA; // 3-character residue name | |
| 3091 string resi; // 4-character residue sequence number | |
| 3092 string inscode; // 1-character insertion code | |
| 3093 string model_index; // model index | |
| 3094 bool is_mmcif=false; | |
| 3095 int chain_num=0; | |
| 3096 | |
| 3097 /* used for CONECT record of chain1 */ | |
| 3098 int ca_idx1=0; // all CA atoms | |
| 3099 int lig_idx1=0; // all atoms | |
| 3100 vector <int> idx_vec; | |
| 3101 | |
| 3102 /* used for CONECT record of chain2 */ | |
| 3103 int ca_idx2=0; // all CA atoms | |
| 3104 int lig_idx2=0; // all atoms | |
| 3105 | |
| 3106 /* extract aligned region */ | |
| 3107 vector<string> resi_aln1; | |
| 3108 vector<string> resi_aln2; | |
| 3109 int i1=-1; | |
| 3110 int i2=-1; | |
| 3111 int i; | |
| 3112 for (i=0;i<strlen(seqM);i++) | |
| 3113 { | |
| 3114 i1+=(seqxA[i]!='-'); | |
| 3115 i2+=(seqyA[i]!='-'); | |
| 3116 if (seqM[i]==' ') continue; | |
| 3117 resi_aln1.push_back(resi_vec1[i1].substr(0,4)); | |
| 3118 resi_aln2.push_back(resi_vec2[i2].substr(0,4)); | |
| 3119 if (seqM[i]!=':') continue; | |
| 3120 buf <<"select "<<resi_aln1.back()<<":A," | |
| 3121 <<resi_aln2.back()<<":B\ncolor red\n"; | |
| 3122 buf_all<<"select "<<resi_aln1.back()<<":A," | |
| 3123 <<resi_aln2.back()<<":B\ncolor red\n"; | |
| 3124 } | |
| 3125 buf<<"select all\nexit\n"<<buf_tm.str(); | |
| 3126 buf_all<<"select all\nexit\n"<<buf_tm.str(); | |
| 3127 | |
| 3128 ifstream fin; | |
| 3129 /* read first file */ | |
| 3130 after_ter=false; | |
| 3131 asym_id=""; | |
| 3132 fin.open(xname.c_str()); | |
| 3133 while (fin.good()) | |
| 3134 { | |
| 3135 getline(fin, line); | |
| 3136 if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true; | |
| 3137 if (is_mmcif==false && line.size()>=54 && | |
| 3138 (line.compare(0, 6, "ATOM ")==0 || | |
| 3139 line.compare(0, 6, "HETATM")==0)) // PDB format | |
| 3140 { | |
| 3141 x[0]=atof(line.substr(30,8).c_str()); | |
| 3142 x[1]=atof(line.substr(38,8).c_str()); | |
| 3143 x[2]=atof(line.substr(46,8).c_str()); | |
| 3144 if (mirror_opt) x[2]=-x[2]; | |
| 3145 transform(t, u, x, x1); | |
| 3146 buf_pdb<<line.substr(0,30)<<setiosflags(ios::fixed) | |
| 3147 <<setprecision(3) | |
| 3148 <<setw(8)<<x1[0] <<setw(8)<<x1[1] <<setw(8)<<x1[2] | |
| 3149 <<line.substr(54)<<'\n'; | |
| 3150 | |
| 3151 if (line[16]!='A' && line[16]!=' ') continue; | |
| 3152 if (after_ter && line.compare(0,6,"ATOM ")==0) continue; | |
| 3153 lig_idx1++; | |
| 3154 buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1 | |
| 3155 <<line.substr(11,9)<<" A"<<line.substr(22,8) | |
| 3156 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3157 <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n'; | |
| 3158 if (after_ter || line.compare(0,6,"ATOM ")) continue; | |
| 3159 if (ter_opt>=2) | |
| 3160 { | |
| 3161 if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1)) | |
| 3162 { | |
| 3163 after_ter=true; | |
| 3164 continue; | |
| 3165 } | |
| 3166 asym_id=line[21]; | |
| 3167 } | |
| 3168 buf_all_atm<<"ATOM "<<setw(5)<<lig_idx1 | |
| 3169 <<line.substr(11,9)<<" A"<<line.substr(22,8) | |
| 3170 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3171 <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n'; | |
| 3172 if (find(resi_aln1.begin(),resi_aln1.end(),line.substr(22,4) | |
| 3173 )!=resi_aln1.end()) | |
| 3174 { | |
| 3175 buf_atm<<"ATOM "<<setw(5)<<lig_idx1 | |
| 3176 <<line.substr(11,9)<<" A"<<line.substr(22,8) | |
| 3177 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3178 <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n'; | |
| 3179 } | |
| 3180 if (line.substr(12,4)!=" CA ") continue; | |
| 3181 ca_idx1++; | |
| 3182 buf_all<<"ATOM "<<setw(5)<<ca_idx1 | |
| 3183 <<" CA "<<line.substr(17,3)<<" A"<<line.substr(22,8) | |
| 3184 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3185 <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n'; | |
| 3186 if (find(resi_aln1.begin(),resi_aln1.end(),line.substr(22,4) | |
| 3187 )==resi_aln1.end()) continue; | |
| 3188 buf<<"ATOM "<<setw(5)<<ca_idx1 | |
| 3189 <<" CA "<<line.substr(17,3)<<" A"<<line.substr(22,8) | |
| 3190 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3191 <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n'; | |
| 3192 idx_vec.push_back(ca_idx1); | |
| 3193 } | |
| 3194 else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF | |
| 3195 { | |
| 3196 while(1) | |
| 3197 { | |
| 3198 if (fin.good()) getline(fin, line); | |
| 3199 else PrintErrorAndQuit("ERROR! Unexpected end of "+xname); | |
| 3200 if (line.size()) break; | |
| 3201 } | |
| 3202 if (line.compare(0,11,"_atom_site.")) continue; | |
| 3203 _atom_site.clear(); | |
| 3204 atom_site_pos=0; | |
| 3205 _atom_site[line.substr(11,line.size()-12)]=atom_site_pos; | |
| 3206 while(1) | |
| 3207 { | |
| 3208 if (fin.good()) getline(fin, line); | |
| 3209 else PrintErrorAndQuit("ERROR! Unexpected end of "+xname); | |
| 3210 if (line.size()==0) continue; | |
| 3211 if (line.compare(0,11,"_atom_site.")) break; | |
| 3212 _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos; | |
| 3213 } | |
| 3214 | |
| 3215 if (is_mmcif==false) | |
| 3216 { | |
| 3217 buf_pdb.str(string()); | |
| 3218 is_mmcif=true; | |
| 3219 } | |
| 3220 | |
| 3221 while(1) | |
| 3222 { | |
| 3223 line_vec.clear(); | |
| 3224 split(line,line_vec); | |
| 3225 if (line_vec[_atom_site["group_PDB"]]!="ATOM" && | |
| 3226 line_vec[_atom_site["group_PDB"]]!="HETATM") break; | |
| 3227 if (_atom_site.count("pdbx_PDB_model_num")) | |
| 3228 { | |
| 3229 if (model_index.size() && model_index!= | |
| 3230 line_vec[_atom_site["pdbx_PDB_model_num"]]) | |
| 3231 break; | |
| 3232 model_index=line_vec[_atom_site["pdbx_PDB_model_num"]]; | |
| 3233 } | |
| 3234 | |
| 3235 x[0]=atof(line_vec[_atom_site["Cartn_x"]].c_str()); | |
| 3236 x[1]=atof(line_vec[_atom_site["Cartn_y"]].c_str()); | |
| 3237 x[2]=atof(line_vec[_atom_site["Cartn_z"]].c_str()); | |
| 3238 if (mirror_opt) x[2]=-x[2]; | |
| 3239 transform(t, u, x, x1); | |
| 3240 | |
| 3241 if (_atom_site.count("label_alt_id")==0 || | |
| 3242 line_vec[_atom_site["label_alt_id"]]=="." || | |
| 3243 line_vec[_atom_site["label_alt_id"]]=="A") | |
| 3244 { | |
| 3245 atom=line_vec[_atom_site["label_atom_id"]]; | |
| 3246 if (atom[0]=='"') atom=atom.substr(1); | |
| 3247 if (atom.size() && atom[atom.size()-1]=='"') | |
| 3248 atom=atom.substr(0,atom.size()-1); | |
| 3249 if (atom.size()==0) atom=" "; | |
| 3250 else if (atom.size()==1) atom=" "+atom+" "; | |
| 3251 else if (atom.size()==2) atom=" "+atom+" "; | |
| 3252 else if (atom.size()==3) atom=" "+atom; | |
| 3253 else if (atom.size()>=5) atom=atom.substr(0,4); | |
| 3254 | |
| 3255 AA=line_vec[_atom_site["label_comp_id"]]; // residue name | |
| 3256 if (AA.size()==1) AA=" "+AA; | |
| 3257 else if (AA.size()==2) AA=" " +AA; | |
| 3258 else if (AA.size()>=4) AA=AA.substr(0,3); | |
| 3259 | |
| 3260 if (_atom_site.count("auth_seq_id")) | |
| 3261 resi=line_vec[_atom_site["auth_seq_id"]]; | |
| 3262 else resi=line_vec[_atom_site["label_seq_id"]]; | |
| 3263 while (resi.size()<4) resi=' '+resi; | |
| 3264 if (resi.size()>4) resi=resi.substr(0,4); | |
| 3265 | |
| 3266 inscode=' '; | |
| 3267 if (_atom_site.count("pdbx_PDB_ins_code") && | |
| 3268 line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") | |
| 3269 inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; | |
| 3270 | |
| 3271 if (_atom_site.count("auth_asym_id")) | |
| 3272 { | |
| 3273 if (ter_opt>=2 && ca_idx1 && asym_id.size() && | |
| 3274 asym_id!=line_vec[_atom_site["auth_asym_id"]]) | |
| 3275 after_ter=true; | |
| 3276 asym_id=line_vec[_atom_site["auth_asym_id"]]; | |
| 3277 } | |
| 3278 else if (_atom_site.count("label_asym_id")) | |
| 3279 { | |
| 3280 if (ter_opt>=2 && ca_idx1 && asym_id.size() && | |
| 3281 asym_id!=line_vec[_atom_site["label_asym_id"]]) | |
| 3282 after_ter=true; | |
| 3283 asym_id=line_vec[_atom_site["label_asym_id"]]; | |
| 3284 } | |
| 3285 buf_pdb<<left<<setw(6) | |
| 3286 <<line_vec[_atom_site["group_PDB"]]<<right | |
| 3287 <<setw(5)<<lig_idx1%100000<<' '<<atom<<' ' | |
| 3288 <<AA<<" "<<asym_id[asym_id.size()-1] | |
| 3289 <<resi<<inscode<<" " | |
| 3290 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3291 <<setw(8)<<x1[0] | |
| 3292 <<setw(8)<<x1[1] | |
| 3293 <<setw(8)<<x1[2]<<'\n'; | |
| 3294 | |
| 3295 if (after_ter==false || | |
| 3296 line_vec[_atom_site["group_pdb"]]=="HETATM") | |
| 3297 { | |
| 3298 lig_idx1++; | |
| 3299 buf_all_atm_lig<<left<<setw(6) | |
| 3300 <<line_vec[_atom_site["group_PDB"]]<<right | |
| 3301 <<setw(5)<<lig_idx1%100000<<' '<<atom<<' ' | |
| 3302 <<AA<<" A"<<resi<<inscode<<" " | |
| 3303 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3304 <<setw(8)<<x1[0] | |
| 3305 <<setw(8)<<x1[1] | |
| 3306 <<setw(8)<<x1[2]<<'\n'; | |
| 3307 if (after_ter==false && | |
| 3308 line_vec[_atom_site["group_PDB"]]=="ATOM") | |
| 3309 { | |
| 3310 buf_all_atm<<"ATOM "<<setw(6) | |
| 3311 <<setw(5)<<lig_idx1%100000<<' '<<atom<<' ' | |
| 3312 <<AA<<" A"<<resi<<inscode<<" " | |
| 3313 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3314 <<setw(8)<<x1[0] | |
| 3315 <<setw(8)<<x1[1] | |
| 3316 <<setw(8)<<x1[2]<<'\n'; | |
| 3317 if (find(resi_aln1.begin(),resi_aln1.end(),resi | |
| 3318 )!=resi_aln1.end()) | |
| 3319 { | |
| 3320 buf_atm<<"ATOM "<<setw(6) | |
| 3321 <<setw(5)<<lig_idx1%100000<<' ' | |
| 3322 <<atom<<' '<<AA<<" A"<<resi<<inscode<<" " | |
| 3323 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3324 <<setw(8)<<x1[0] | |
| 3325 <<setw(8)<<x1[1] | |
| 3326 <<setw(8)<<x1[2]<<'\n'; | |
| 3327 } | |
| 3328 if (atom==" CA ") | |
| 3329 { | |
| 3330 ca_idx1++; | |
| 3331 buf_all<<"ATOM "<<setw(6) | |
| 3332 <<setw(5)<<ca_idx1%100000<<" CA " | |
| 3333 <<AA<<" A"<<resi<<inscode<<" " | |
| 3334 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3335 <<setw(8)<<x1[0] | |
| 3336 <<setw(8)<<x1[1] | |
| 3337 <<setw(8)<<x1[2]<<'\n'; | |
| 3338 if (find(resi_aln1.begin(),resi_aln1.end(),resi | |
| 3339 )!=resi_aln1.end()) | |
| 3340 { | |
| 3341 buf<<"ATOM "<<setw(6) | |
| 3342 <<setw(5)<<ca_idx1%100000<<" CA " | |
| 3343 <<AA<<" A"<<resi<<inscode<<" " | |
| 3344 <<setiosflags(ios::fixed)<<setprecision(3) | |
| 3345 <<setw(8)<<x1[0] | |
| 3346 <<setw(8)<<x1[1] | |
| 3347 <<setw(8)<<x1[2]<<'\n'; | |
| 3348 idx_vec.push_back(ca_idx1); | |
| 3349 } | |
| 3350 } | |
| 3351 } | |
| 3352 } | |
| 3353 } | |
| 3354 | |
| 3355 while(1) | |
| 3356 { | |
| 3357 if (fin.good()) getline(fin, line); | |
| 3358 else break; | |
| 3359 if (line.size()) break; | |
| 3360 } | |
| 3361 } | |
| 3362 } | |
| 3363 else if (line.size() && is_mmcif==false) | |
| 3364 { | |
| 3365 buf_pdb<<line<<'\n'; | |
| 3366 if (ter_opt>=1 && line.compare(0,3,"END")==0) break; | |
| 3367 } | |
| 3368 } | |
| 3369 fin.close(); | |
| 3370 buf<<"TER\n"; | |
| 3371 buf_all<<"TER\n"; | |
| 3372 buf_atm<<"TER\n"; | |
| 3373 buf_all_atm<<"TER\n"; | |
| 3374 buf_all_atm_lig<<"TER\n"; | |
| 3375 for (i=1;i<ca_idx1;i++) buf_all<<"CONECT" | |
| 3376 <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n'; | |
| 3377 for (i=1;i<idx_vec.size();i++) buf<<"CONECT" | |
| 3378 <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n'; | |
| 3379 idx_vec.clear(); | |
| 3380 | |
| 3381 /* read second file */ | |
| 3382 after_ter=false; | |
| 3383 asym_id=""; | |
| 3384 fin.open(yname.c_str()); | |
| 3385 while (fin.good()) | |
| 3386 { | |
| 3387 getline(fin, line); | |
| 3388 if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true; | |
| 3389 if (line.size()>=54 && (line.compare(0, 6, "ATOM ")==0 || | |
| 3390 line.compare(0, 6, "HETATM")==0)) // PDB format | |
| 3391 { | |
| 3392 if (line[16]!='A' && line[16]!=' ') continue; | |
| 3393 if (after_ter && line.compare(0,6,"ATOM ")==0) continue; | |
| 3394 lig_idx2++; | |
| 3395 buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1+lig_idx2 | |
| 3396 <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n'; | |
| 3397 if (after_ter || line.compare(0,6,"ATOM ")) continue; | |
| 3398 if (ter_opt>=2) | |
| 3399 { | |
| 3400 if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1)) | |
| 3401 { | |
| 3402 after_ter=true; | |
| 3403 continue; | |
| 3404 } | |
| 3405 asym_id=line[21]; | |
| 3406 } | |
| 3407 buf_all_atm<<"ATOM "<<setw(5)<<lig_idx1+lig_idx2 | |
| 3408 <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n'; | |
| 3409 if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4) | |
| 3410 )!=resi_aln2.end()) | |
| 3411 { | |
| 3412 buf_atm<<"ATOM "<<setw(5)<<lig_idx1+lig_idx2 | |
| 3413 <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n'; | |
| 3414 } | |
| 3415 if (line.substr(12,4)!=" CA ") continue; | |
| 3416 ca_idx2++; | |
| 3417 buf_all<<"ATOM "<<setw(5)<<ca_idx1+ca_idx2<<" CA " | |
| 3418 <<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n'; | |
| 3419 if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4) | |
| 3420 )==resi_aln2.end()) continue; | |
| 3421 buf<<"ATOM "<<setw(5)<<ca_idx1+ca_idx2<<" CA " | |
| 3422 <<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n'; | |
| 3423 idx_vec.push_back(ca_idx1+ca_idx2); | |
| 3424 } | |
| 3425 else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF | |
| 3426 { | |
| 3427 while(1) | |
| 3428 { | |
| 3429 if (fin.good()) getline(fin, line); | |
| 3430 else PrintErrorAndQuit("ERROR! Unexpected end of "+yname); | |
| 3431 if (line.size()) break; | |
| 3432 } | |
| 3433 if (line.compare(0,11,"_atom_site.")) continue; | |
| 3434 _atom_site.clear(); | |
| 3435 atom_site_pos=0; | |
| 3436 _atom_site[line.substr(11,line.size()-12)]=atom_site_pos; | |
| 3437 while(1) | |
| 3438 { | |
| 3439 if (fin.good()) getline(fin, line); | |
| 3440 else PrintErrorAndQuit("ERROR! Unexpected end of "+yname); | |
| 3441 if (line.size()==0) continue; | |
| 3442 if (line.compare(0,11,"_atom_site.")) break; | |
| 3443 _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos; | |
| 3444 } | |
| 3445 | |
| 3446 while(1) | |
| 3447 { | |
| 3448 line_vec.clear(); | |
| 3449 split(line,line_vec); | |
| 3450 if (line_vec[_atom_site["group_PDB"]]!="ATOM" && | |
| 3451 line_vec[_atom_site["group_PDB"]]!="HETATM") break; | |
| 3452 if (_atom_site.count("pdbx_PDB_model_num")) | |
| 3453 { | |
| 3454 if (model_index.size() && model_index!= | |
| 3455 line_vec[_atom_site["pdbx_PDB_model_num"]]) | |
| 3456 break; | |
| 3457 model_index=line_vec[_atom_site["pdbx_PDB_model_num"]]; | |
| 3458 } | |
| 3459 | |
| 3460 if (_atom_site.count("label_alt_id")==0 || | |
| 3461 line_vec[_atom_site["label_alt_id"]]=="." || | |
| 3462 line_vec[_atom_site["label_alt_id"]]=="A") | |
| 3463 { | |
| 3464 atom=line_vec[_atom_site["label_atom_id"]]; | |
| 3465 if (atom[0]=='"') atom=atom.substr(1); | |
| 3466 if (atom.size() && atom[atom.size()-1]=='"') | |
| 3467 atom=atom.substr(0,atom.size()-1); | |
| 3468 if (atom.size()==0) atom=" "; | |
| 3469 else if (atom.size()==1) atom=" "+atom+" "; | |
| 3470 else if (atom.size()==2) atom=" "+atom+" "; | |
| 3471 else if (atom.size()==3) atom=" "+atom; | |
| 3472 else if (atom.size()>=5) atom=atom.substr(0,4); | |
| 3473 | |
| 3474 AA=line_vec[_atom_site["label_comp_id"]]; // residue name | |
| 3475 if (AA.size()==1) AA=" "+AA; | |
| 3476 else if (AA.size()==2) AA=" " +AA; | |
| 3477 else if (AA.size()>=4) AA=AA.substr(0,3); | |
| 3478 | |
| 3479 if (_atom_site.count("auth_seq_id")) | |
| 3480 resi=line_vec[_atom_site["auth_seq_id"]]; | |
| 3481 else resi=line_vec[_atom_site["label_seq_id"]]; | |
| 3482 while (resi.size()<4) resi=' '+resi; | |
| 3483 if (resi.size()>4) resi=resi.substr(0,4); | |
| 3484 | |
| 3485 inscode=' '; | |
| 3486 if (_atom_site.count("pdbx_PDB_ins_code") && | |
| 3487 line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?") | |
| 3488 inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0]; | |
| 3489 | |
| 3490 if (ter_opt>=2) | |
| 3491 { | |
| 3492 if (_atom_site.count("auth_asym_id")) | |
| 3493 { | |
| 3494 if (ca_idx2 && asym_id.size() && | |
| 3495 asym_id!=line_vec[_atom_site["auth_asym_id"]]) | |
| 3496 after_ter=true; | |
| 3497 else | |
| 3498 asym_id=line_vec[_atom_site["auth_asym_id"]]; | |
| 3499 } | |
| 3500 else if (_atom_site.count("label_asym_id")) | |
| 3501 { | |
| 3502 if (ca_idx2 && asym_id.size() && | |
| 3503 asym_id!=line_vec[_atom_site["label_asym_id"]]) | |
| 3504 after_ter=true; | |
| 3505 else | |
| 3506 asym_id=line_vec[_atom_site["label_asym_id"]]; | |
| 3507 } | |
| 3508 } | |
| 3509 if (after_ter==false || | |
| 3510 line_vec[_atom_site["group_PDB"]]=="HETATM") | |
| 3511 { | |
| 3512 lig_idx2++; | |
| 3513 buf_all_atm_lig<<left<<setw(6) | |
| 3514 <<line_vec[_atom_site["group_PDB"]]<<right | |
| 3515 <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' ' | |
| 3516 <<atom<<' '<<AA<<" B"<<resi<<inscode<<" " | |
| 3517 <<setw(8)<<line_vec[_atom_site["Cartn_x"]] | |
| 3518 <<setw(8)<<line_vec[_atom_site["Cartn_y"]] | |
| 3519 <<setw(8)<<line_vec[_atom_site["Cartn_z"]] | |
| 3520 <<'\n'; | |
| 3521 if (after_ter==false && | |
| 3522 line_vec[_atom_site["group_PDB"]]=="ATOM") | |
| 3523 { | |
| 3524 buf_all_atm<<"ATOM "<<setw(6) | |
| 3525 <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' ' | |
| 3526 <<atom<<' '<<AA<<" B"<<resi<<inscode<<" " | |
| 3527 <<setw(8)<<line_vec[_atom_site["Cartn_x"]] | |
| 3528 <<setw(8)<<line_vec[_atom_site["Cartn_y"]] | |
| 3529 <<setw(8)<<line_vec[_atom_site["Cartn_z"]] | |
| 3530 <<'\n'; | |
| 3531 if (find(resi_aln2.begin(),resi_aln2.end(),resi | |
| 3532 )!=resi_aln2.end()) | |
| 3533 { | |
| 3534 buf_atm<<"ATOM "<<setw(6) | |
| 3535 <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' ' | |
| 3536 <<atom<<' '<<AA<<" B"<<resi<<inscode<<" " | |
| 3537 <<setw(8)<<line_vec[_atom_site["Cartn_x"]] | |
| 3538 <<setw(8)<<line_vec[_atom_site["Cartn_y"]] | |
| 3539 <<setw(8)<<line_vec[_atom_site["Cartn_z"]] | |
| 3540 <<'\n'; | |
| 3541 } | |
| 3542 if (atom==" CA ") | |
| 3543 { | |
| 3544 ca_idx2++; | |
| 3545 buf_all<<"ATOM "<<setw(6) | |
| 3546 <<setw(5)<<(ca_idx1+ca_idx2)%100000 | |
| 3547 <<" CA "<<AA<<" B"<<resi<<inscode<<" " | |
| 3548 <<setw(8)<<line_vec[_atom_site["Cartn_x"]] | |
| 3549 <<setw(8)<<line_vec[_atom_site["Cartn_y"]] | |
| 3550 <<setw(8)<<line_vec[_atom_site["Cartn_z"]] | |
| 3551 <<'\n'; | |
| 3552 if (find(resi_aln2.begin(),resi_aln2.end(),resi | |
| 3553 )!=resi_aln2.end()) | |
| 3554 { | |
| 3555 buf<<"ATOM "<<setw(6) | |
| 3556 <<setw(5)<<(ca_idx1+ca_idx2)%100000 | |
| 3557 <<" CA "<<AA<<" B"<<resi<<inscode<<" " | |
| 3558 <<setw(8)<<line_vec[_atom_site["Cartn_x"]] | |
| 3559 <<setw(8)<<line_vec[_atom_site["Cartn_y"]] | |
| 3560 <<setw(8)<<line_vec[_atom_site["Cartn_z"]] | |
| 3561 <<'\n'; | |
| 3562 idx_vec.push_back(ca_idx1+ca_idx2); | |
| 3563 } | |
| 3564 } | |
| 3565 } | |
| 3566 } | |
| 3567 } | |
| 3568 | |
| 3569 if (fin.good()) getline(fin, line); | |
| 3570 else break; | |
| 3571 } | |
| 3572 } | |
| 3573 else if (line.size()) | |
| 3574 { | |
| 3575 if (ter_opt>=1 && line.compare(0,3,"END")==0) break; | |
| 3576 } | |
| 3577 } | |
| 3578 fin.close(); | |
| 3579 buf<<"TER\n"; | |
| 3580 buf_all<<"TER\n"; | |
| 3581 buf_atm<<"TER\n"; | |
| 3582 buf_all_atm<<"TER\n"; | |
| 3583 buf_all_atm_lig<<"TER\n"; | |
| 3584 for (i=ca_idx1+1;i<ca_idx1+ca_idx2;i++) buf_all<<"CONECT" | |
| 3585 <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n'; | |
| 3586 for (i=1;i<idx_vec.size();i++) buf<<"CONECT" | |
| 3587 <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n'; | |
| 3588 idx_vec.clear(); | |
| 3589 | |
| 3590 /* write pymol script */ | |
| 3591 ofstream fp; | |
| 3592 vector<string> pml_list; | |
| 3593 pml_list.push_back(fname_super+""); | |
| 3594 pml_list.push_back(fname_super+"_atm"); | |
| 3595 pml_list.push_back(fname_super+"_all"); | |
| 3596 pml_list.push_back(fname_super+"_all_atm"); | |
| 3597 pml_list.push_back(fname_super+"_all_atm_lig"); | |
| 3598 for (i=0;i<pml_list.size();i++) | |
| 3599 { | |
| 3600 buf_pymol<<"#!/usr/bin/env pymol\n" | |
| 3601 <<"load "<<pml_list[i]<<"\n" | |
| 3602 <<"hide all\n" | |
| 3603 <<((i==0 || i==2)?("show stick\n"):("show cartoon\n")) | |
| 3604 <<"color blue, chain A\n" | |
| 3605 <<"color red, chain B\n" | |
| 3606 <<"set ray_shadow, 0\n" | |
| 3607 <<"set stick_radius, 0.3\n" | |
| 3608 <<"set sphere_scale, 0.25\n" | |
| 3609 <<"show stick, not polymer\n" | |
| 3610 <<"show sphere, not polymer\n" | |
| 3611 <<"bg_color white\n" | |
| 3612 <<"set transparency=0.2\n" | |
| 3613 <<"zoom polymer\n" | |
| 3614 <<endl; | |
| 3615 fp.open((pml_list[i]+".pml").c_str()); | |
| 3616 fp<<buf_pymol.str(); | |
| 3617 fp.close(); | |
| 3618 buf_pymol.str(string()); | |
| 3619 pml_list[i].clear(); | |
| 3620 } | |
| 3621 pml_list.clear(); | |
| 3622 | |
| 3623 /* write rasmol script */ | |
| 3624 fp.open((fname_super).c_str()); | |
| 3625 fp<<buf.str(); | |
| 3626 fp.close(); | |
| 3627 fp.open((fname_super+"_all").c_str()); | |
| 3628 fp<<buf_all.str(); | |
| 3629 fp.close(); | |
| 3630 fp.open((fname_super+"_atm").c_str()); | |
| 3631 fp<<buf_atm.str(); | |
| 3632 fp.close(); | |
| 3633 fp.open((fname_super+"_all_atm").c_str()); | |
| 3634 fp<<buf_all_atm.str(); | |
| 3635 fp.close(); | |
| 3636 fp.open((fname_super+"_all_atm_lig").c_str()); | |
| 3637 fp<<buf_all_atm_lig.str(); | |
| 3638 fp.close(); | |
| 3639 fp.open((fname_super+".pdb").c_str()); | |
| 3640 fp<<buf_pdb.str(); | |
| 3641 fp.close(); | |
| 3642 | |
| 3643 /* clear stream */ | |
| 3644 buf.str(string()); | |
| 3645 buf_all.str(string()); | |
| 3646 buf_atm.str(string()); | |
| 3647 buf_all_atm.str(string()); | |
| 3648 buf_all_atm_lig.str(string()); | |
| 3649 buf_pdb.str(string()); | |
| 3650 buf_tm.str(string()); | |
| 3651 resi_aln1.clear(); | |
| 3652 resi_aln2.clear(); | |
| 3653 asym_id.clear(); | |
| 3654 line_vec.clear(); | |
| 3655 atom.clear(); | |
| 3656 AA.clear(); | |
| 3657 resi.clear(); | |
| 3658 inscode.clear(); | |
| 3659 model_index.clear(); | |
| 3660 } | |
| 3661 | |
| 3662 /* extract rotation matrix based on TMscore8 */ | |
| 3663 void output_rotation_matrix(const char* fname_matrix, | |
| 3664 const double t[3], const double u[3][3]) | |
| 3665 { | |
| 3666 fstream fout; | |
| 3667 fout.open(fname_matrix, ios::out | ios::trunc); | |
| 3668 if (fout)// succeed | |
| 3669 { | |
| 3670 fout << "------ The rotation matrix to rotate Chain_1 to Chain_2 ------\n"; | |
| 3671 char dest[1000]; | |
| 3672 sprintf(dest, "m %18s %14s %14s %14s\n", "t[m]", "u[m][0]", "u[m][1]", "u[m][2]"); | |
| 3673 fout << string(dest); | |
| 3674 for (int k = 0; k < 3; k++) | |
| 3675 { | |
| 3676 sprintf(dest, "%d %18.10f %14.10f %14.10f %14.10f\n", k, t[k], u[k][0], u[k][1], u[k][2]); | |
| 3677 fout << string(dest); | |
| 3678 } | |
| 3679 fout << "\nCode for rotating Structure A from (x,y,z) to (X,Y,Z):\n" | |
| 3680 "for(i=0; i<L; i++)\n" | |
| 3681 "{\n" | |
| 3682 " X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];\n" | |
| 3683 " Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];\n" | |
| 3684 " Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];\n" | |
| 3685 "}\n"; | |
| 3686 fout.close(); | |
| 3687 } | |
| 3688 else | |
| 3689 cout << "Open file to output rotation matrix fail.\n"; | |
| 3690 } | |
| 3691 | |
| 3692 //output the final results | |
| 3693 void output_results( | |
| 3694 const string xname, const string yname, | |
| 3695 const char *chainID1, const char *chainID2, | |
| 3696 const int xlen, const int ylen, double t[3], double u[3][3], | |
| 3697 const double TM1, const double TM2, | |
| 3698 const double TM3, const double TM4, const double TM5, | |
| 3699 const double rmsd, const double d0_out, | |
| 3700 const char *seqM, const char *seqxA, const char *seqyA, const double Liden, | |
| 3701 const int n_ali8, const int L_ali, | |
| 3702 const double TM_ali, const double rmsd_ali, const double TM_0, | |
| 3703 const double d0_0, const double d0A, const double d0B, | |
| 3704 const double Lnorm_ass, const double d0_scale, | |
| 3705 const double d0a, const double d0u, const char* fname_matrix, | |
| 3706 const int outfmt_opt, const int ter_opt, const string fname_super, | |
| 3707 const int i_opt, const int a_opt, const bool u_opt, const bool d_opt, | |
| 3708 const int mirror_opt, | |
| 3709 const vector<string>&resi_vec1, const vector<string>&resi_vec2) | |
| 3710 { | |
| 3711 if (outfmt_opt<=0) | |
| 3712 { | |
| 3713 printf("\nName of Chain_1: %s%s (to be superimposed onto Chain_2)\n", | |
| 3714 xname.c_str(), chainID1); | |
| 3715 printf("Name of Chain_2: %s%s\n", yname.c_str(), chainID2); | |
| 3716 printf("Length of Chain_1: %d residues\n", xlen); | |
| 3717 printf("Length of Chain_2: %d residues\n\n", ylen); | |
| 3718 | |
| 3719 if (i_opt) | |
| 3720 printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali); | |
| 3721 | |
| 3722 printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); | |
| 3723 printf("TM-score= %6.5f (if normalized by length of Chain_1, i.e., LN=%d, d0=%.2f)\n", TM2, xlen, d0B); | |
| 3724 printf("TM-score= %6.5f (if normalized by length of Chain_2, i.e., LN=%d, d0=%.2f)\n", TM1, ylen, d0A); | |
| 3725 | |
| 3726 if (a_opt==1) | |
| 3727 printf("TM-score= %6.5f (if normalized by average length of two structures, i.e., LN= %.1f, d0= %.2f)\n", TM3, (xlen+ylen)*0.5, d0a); | |
| 3728 if (u_opt) | |
| 3729 printf("TM-score= %6.5f (if normalized by user-specified LN=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u); | |
| 3730 if (d_opt) | |
| 3731 printf("TM-score= %6.5f (if scaled by user-specified d0= %.2f, and LN= %d)\n", TM5, d0_scale, ylen); | |
| 3732 printf("(You should use TM-score normalized by length of the reference structure)\n"); | |
| 3733 | |
| 3734 //output alignment | |
| 3735 printf("\n(\":\" denotes residue pairs of d < %4.1f Angstrom, ", d0_out); | |
| 3736 printf("\".\" denotes other aligned residues)\n"); | |
| 3737 printf("%s\n", seqxA); | |
| 3738 printf("%s\n", seqM); | |
| 3739 printf("%s\n", seqyA); | |
| 3740 } | |
| 3741 else if (outfmt_opt==1) | |
| 3742 { | |
| 3743 printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", | |
| 3744 xname.c_str(), chainID1, xlen, d0B, Liden/xlen, TM2); | |
| 3745 printf("%s\n", seqxA); | |
| 3746 printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n", | |
| 3747 yname.c_str(), chainID2, ylen, d0A, Liden/ylen, TM1); | |
| 3748 printf("%s\n", seqyA); | |
| 3749 | |
| 3750 printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n", | |
| 3751 n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0); | |
| 3752 | |
| 3753 if (i_opt) | |
| 3754 printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali); | |
| 3755 | |
| 3756 if(a_opt) | |
| 3757 printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a); | |
| 3758 | |
| 3759 if(u_opt) | |
| 3760 printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u); | |
| 3761 | |
| 3762 if(d_opt) | |
| 3763 printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen); | |
| 3764 | |
| 3765 printf("$$$$\n"); | |
| 3766 } | |
| 3767 else if (outfmt_opt==2) | |
| 3768 { | |
| 3769 printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d", | |
| 3770 xname.c_str(), chainID1, yname.c_str(), chainID2, TM2, TM1, rmsd, | |
| 3771 Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0, | |
| 3772 xlen, ylen, n_ali8); | |
| 3773 } | |
| 3774 cout << endl; | |
| 3775 | |
| 3776 if (strlen(fname_matrix)) | |
| 3777 output_rotation_matrix(fname_matrix, t, u); | |
| 3778 if (fname_super.size()) | |
| 3779 output_superpose(xname, yname, fname_super, t, u, ter_opt, mirror_opt, | |
| 3780 seqM, seqxA, seqyA, resi_vec1, resi_vec2, chainID1, chainID2, | |
| 3781 xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden); | |
| 3782 } | |
| 3783 | |
| 3784 double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm, | |
| 3785 double **xt, double **x, double **y, int xlen, int ylen, int invmap[], | |
| 3786 int& L_ali, double& RMSD, double D0_MIN, double Lnorm, double d0, | |
| 3787 double d0_search, double score_d8, double t[3], double u[3][3], | |
| 3788 const int mol_type) | |
| 3789 { | |
| 3790 D0_MIN = 0.5; | |
| 3791 Lnorm = ylen; | |
| 3792 if (mol_type>0) // RNA | |
| 3793 { | |
| 3794 if (Lnorm<=11) d0=0.3; | |
| 3795 else if(Lnorm>11 && Lnorm<=15) d0=0.4; | |
| 3796 else if(Lnorm>15 && Lnorm<=19) d0=0.5; | |
| 3797 else if(Lnorm>19 && Lnorm<=23) d0=0.6; | |
| 3798 else if(Lnorm>23 && Lnorm<30) d0=0.7; | |
| 3799 else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5); | |
| 3800 } | |
| 3801 else | |
| 3802 { | |
| 3803 if (Lnorm > 21) d0=(1.24*pow((Lnorm*1.0-15), 1.0/3) -1.8); | |
| 3804 else d0 = D0_MIN; | |
| 3805 if (d0 < D0_MIN) d0 = D0_MIN; | |
| 3806 } | |
| 3807 double d0_input = d0;// Scaled by seq_min | |
| 3808 | |
| 3809 double tmscore;// collected alined residues from invmap | |
| 3810 int n_al = 0; | |
| 3811 int i; | |
| 3812 for (int j = 0; j<ylen; j++) | |
| 3813 { | |
| 3814 i = invmap[j]; | |
| 3815 if (i >= 0) | |
| 3816 { | |
| 3817 xtm[n_al][0] = x[i][0]; | |
| 3818 xtm[n_al][1] = x[i][1]; | |
| 3819 xtm[n_al][2] = x[i][2]; | |
| 3820 | |
| 3821 ytm[n_al][0] = y[j][0]; | |
| 3822 ytm[n_al][1] = y[j][1]; | |
| 3823 ytm[n_al][2] = y[j][2]; | |
| 3824 | |
| 3825 r1[n_al][0] = x[i][0]; | |
| 3826 r1[n_al][1] = x[i][1]; | |
| 3827 r1[n_al][2] = x[i][2]; | |
| 3828 | |
| 3829 r2[n_al][0] = y[j][0]; | |
| 3830 r2[n_al][1] = y[j][1]; | |
| 3831 r2[n_al][2] = y[j][2]; | |
| 3832 | |
| 3833 n_al++; | |
| 3834 } | |
| 3835 else if (i != -1) PrintErrorAndQuit("Wrong map!\n"); | |
| 3836 } | |
| 3837 L_ali = n_al; | |
| 3838 | |
| 3839 Kabsch(r1, r2, n_al, 0, &RMSD, t, u); | |
| 3840 RMSD = sqrt( RMSD/(1.0*n_al) ); | |
| 3841 | |
| 3842 int temp_simplify_step = 1; | |
| 3843 int temp_score_sum_method = 0; | |
| 3844 d0_search = d0_input; | |
| 3845 double rms = 0.0; | |
| 3846 tmscore = TMscore8_search_standard(r1, r2, xtm, ytm, xt, n_al, t, u, | |
| 3847 temp_simplify_step, temp_score_sum_method, &rms, d0_input, | |
| 3848 score_d8, d0); | |
| 3849 tmscore = tmscore * n_al / (1.0*Lnorm); | |
| 3850 | |
| 3851 return tmscore; | |
| 3852 } | |
| 3853 | |
| 3854 /* copy the value of t and u into t0,u0 */ | |
| 3855 void copy_t_u(double t[3], double u[3][3], double t0[3], double u0[3][3]) | |
| 3856 { | |
| 3857 int i,j; | |
| 3858 for (i=0;i<3;i++) | |
| 3859 { | |
| 3860 t0[i]=t[i]; | |
| 3861 for (j=0;j<3;j++) u0[i][j]=u[i][j]; | |
| 3862 } | |
| 3863 } | |
| 3864 | |
| 3865 /* calculate approximate TM-score given rotation matrix */ | |
| 3866 double approx_TM(const int xlen, const int ylen, const int a_opt, | |
| 3867 double **xa, double **ya, double t[3], double u[3][3], | |
| 3868 const int invmap0[], const int mol_type) | |
| 3869 { | |
| 3870 double Lnorm_0=ylen; // normalized by the second protein | |
| 3871 if (a_opt==-2 && xlen>ylen) Lnorm_0=xlen; // longer | |
| 3872 else if (a_opt==-1 && xlen<ylen) Lnorm_0=xlen; // shorter | |
| 3873 else if (a_opt==1) Lnorm_0=(xlen+ylen)/2.; // average | |
| 3874 | |
| 3875 double D0_MIN; | |
| 3876 double Lnorm; | |
| 3877 double d0; | |
| 3878 double d0_search; | |
| 3879 parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type); | |
| 3880 double TMtmp=0; | |
| 3881 double d; | |
| 3882 double xtmp[3]={0,0,0}; | |
| 3883 | |
| 3884 for(int i=0,j=0; j<ylen; j++) | |
| 3885 { | |
| 3886 i=invmap0[j]; | |
| 3887 if(i>=0)//aligned | |
| 3888 { | |
| 3889 transform(t, u, &xa[i][0], &xtmp[0]); | |
| 3890 d=sqrt(dist(&xtmp[0], &ya[j][0])); | |
| 3891 TMtmp+=1/(1+(d/d0)*(d/d0)); | |
| 3892 //if (d <= score_d8) TMtmp+=1/(1+(d/d0)*(d/d0)); | |
| 3893 } | |
| 3894 } | |
| 3895 TMtmp/=Lnorm_0; | |
| 3896 return TMtmp; | |
| 3897 } | |
| 3898 | |
| 3899 void clean_up_after_approx_TM(int *invmap0, int *invmap, | |
| 3900 double **score, bool **path, double **val, double **xtm, double **ytm, | |
| 3901 double **xt, double **r1, double **r2, const int xlen, const int minlen) | |
| 3902 { | |
| 3903 delete [] invmap0; | |
| 3904 delete [] invmap; | |
| 3905 DeleteArray(&score, xlen+1); | |
| 3906 DeleteArray(&path, xlen+1); | |
| 3907 DeleteArray(&val, xlen+1); | |
| 3908 DeleteArray(&xtm, minlen); | |
| 3909 DeleteArray(&ytm, minlen); | |
| 3910 DeleteArray(&xt, xlen); | |
| 3911 DeleteArray(&r1, minlen); | |
| 3912 DeleteArray(&r2, minlen); | |
| 3913 return; | |
| 3914 } | |
| 3915 | |
| 3916 /* Entry function for TM-align. Return TM-score calculation status: | |
| 3917 * 0 - full TM-score calculation | |
| 3918 * 1 - terminated due to exception | |
| 3919 * 2-7 - pre-terminated due to low TM-score */ | |
| 3920 int TMalign_main(double **xa, double **ya, | |
| 3921 const char *seqx, const char *seqy, const char *secx, const char *secy, | |
| 3922 double t0[3], double u0[3][3], | |
| 3923 double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, | |
| 3924 double &d0_0, double &TM_0, | |
| 3925 double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, | |
| 3926 string &seqM, string &seqxA, string &seqyA, | |
| 3927 double &rmsd0, int &L_ali, double &Liden, | |
| 3928 double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, | |
| 3929 const int xlen, const int ylen, | |
| 3930 const vector<string> sequence, const double Lnorm_ass, | |
| 3931 const double d0_scale, const int i_opt, const int a_opt, | |
| 3932 const bool u_opt, const bool d_opt, const bool fast_opt, | |
| 3933 const int mol_type, const double TMcut=-1) | |
| 3934 { | |
| 3935 double D0_MIN; //for d0 | |
| 3936 double Lnorm; //normalization length | |
| 3937 double score_d8,d0,d0_search,dcu0;//for TMscore search | |
| 3938 double t[3], u[3][3]; //Kabsch translation vector and rotation matrix | |
| 3939 double **score; // Input score table for dynamic programming | |
| 3940 bool **path; // for dynamic programming | |
| 3941 double **val; // for dynamic programming | |
| 3942 double **xtm, **ytm; // for TMscore search engine | |
| 3943 double **xt; //for saving the superposed version of r_1 or xtm | |
| 3944 double **r1, **r2; // for Kabsch rotation | |
| 3945 | |
| 3946 /***********************/ | |
| 3947 /* allocate memory */ | |
| 3948 /***********************/ | |
| 3949 int minlen = min(xlen, ylen); | |
| 3950 NewArray(&score, xlen+1, ylen+1); | |
| 3951 NewArray(&path, xlen+1, ylen+1); | |
| 3952 NewArray(&val, xlen+1, ylen+1); | |
| 3953 NewArray(&xtm, minlen, 3); | |
| 3954 NewArray(&ytm, minlen, 3); | |
| 3955 NewArray(&xt, xlen, 3); | |
| 3956 NewArray(&r1, minlen, 3); | |
| 3957 NewArray(&r2, minlen, 3); | |
| 3958 | |
| 3959 /***********************/ | |
| 3960 /* parameter set */ | |
| 3961 /***********************/ | |
| 3962 parameter_set4search(xlen, ylen, D0_MIN, Lnorm, | |
| 3963 score_d8, d0, d0_search, dcu0); | |
| 3964 int simplify_step = 40; //for similified search engine | |
| 3965 int score_sum_method = 8; //for scoring method, whether only sum over pairs with dis<score_d8 | |
| 3966 | |
| 3967 int i; | |
| 3968 int *invmap0 = new int[ylen+1]; | |
| 3969 int *invmap = new int[ylen+1]; | |
| 3970 double TM, TMmax=-1; | |
| 3971 for(i=0; i<ylen; i++) invmap0[i]=-1; | |
| 3972 | |
| 3973 double ddcc=0.4; | |
| 3974 if (Lnorm <= 40) ddcc=0.1; //Lnorm was setted in parameter_set4search | |
| 3975 double local_d0_search = d0_search; | |
| 3976 | |
| 3977 //************************************************// | |
| 3978 // get initial alignment from user's input: // | |
| 3979 // Stick to the initial alignment // | |
| 3980 //************************************************// | |
| 3981 bool bAlignStick = false; | |
| 3982 if (i_opt==3)// if input has set parameter for "-I" | |
| 3983 { | |
| 3984 // In the original code, this loop starts from 1, which is | |
| 3985 // incorrect. Fortran starts from 1 but C++ should starts from 0. | |
| 3986 for (int j = 0; j < ylen; j++)// Set aligned position to be "-1" | |
| 3987 invmap[j] = -1; | |
| 3988 | |
| 3989 int i1 = -1;// in C version, index starts from zero, not from one | |
| 3990 int i2 = -1; | |
| 3991 int L1 = sequence[0].size(); | |
| 3992 int L2 = sequence[1].size(); | |
| 3993 int L = min(L1, L2);// Get positions for aligned residues | |
| 3994 for (int kk1 = 0; kk1 < L; kk1++) | |
| 3995 { | |
| 3996 if (sequence[0][kk1] != '-') i1++; | |
| 3997 if (sequence[1][kk1] != '-') | |
| 3998 { | |
| 3999 i2++; | |
| 4000 if (i2 >= ylen || i1 >= xlen) kk1 = L; | |
| 4001 else if (sequence[0][kk1] != '-') invmap[i2] = i1; | |
| 4002 } | |
| 4003 } | |
| 4004 | |
| 4005 //--------------- 2. Align proteins from original alignment | |
| 4006 double prevD0_MIN = D0_MIN;// stored for later use | |
| 4007 int prevLnorm = Lnorm; | |
| 4008 double prevd0 = d0; | |
| 4009 TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, | |
| 4010 invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8, | |
| 4011 t, u, mol_type); | |
| 4012 D0_MIN = prevD0_MIN; | |
| 4013 Lnorm = prevLnorm; | |
| 4014 d0 = prevd0; | |
| 4015 TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, | |
| 4016 invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0); | |
| 4017 if (TM > TMmax) | |
| 4018 { | |
| 4019 TMmax = TM; | |
| 4020 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4021 } | |
| 4022 bAlignStick = true; | |
| 4023 } | |
| 4024 | |
| 4025 /******************************************************/ | |
| 4026 /* get initial alignment with gapless threading */ | |
| 4027 /******************************************************/ | |
| 4028 if (!bAlignStick) | |
| 4029 { | |
| 4030 get_initial(r1, r2, xtm, ytm, xa, ya, xlen, ylen, invmap0, d0, | |
| 4031 d0_search, fast_opt, t, u); | |
| 4032 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap0, | |
| 4033 t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, | |
| 4034 score_d8, d0); | |
| 4035 if (TM>TMmax) TMmax = TM; | |
| 4036 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4037 //run dynamic programing iteratively to find the best alignment | |
| 4038 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, xlen, ylen, | |
| 4039 t, u, invmap, 0, 2, (fast_opt)?2:30, local_d0_search, | |
| 4040 D0_MIN, Lnorm, d0, score_d8); | |
| 4041 if (TM>TMmax) | |
| 4042 { | |
| 4043 TMmax = TM; | |
| 4044 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4045 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4046 } | |
| 4047 | |
| 4048 if (TMcut>0) // pre-terminate if TM-score is too low | |
| 4049 { | |
| 4050 double TMtmp=approx_TM(xlen, ylen, a_opt, | |
| 4051 xa, ya, t0, u0, invmap0, mol_type); | |
| 4052 | |
| 4053 if (TMtmp<0.5*TMcut) | |
| 4054 { | |
| 4055 TM1=TM2=TM3=TM4=TM5=TMtmp; | |
| 4056 clean_up_after_approx_TM(invmap0, invmap, score, path, val, | |
| 4057 xtm, ytm, xt, r1, r2, xlen, minlen); | |
| 4058 return 2; | |
| 4059 } | |
| 4060 } | |
| 4061 | |
| 4062 /************************************************************/ | |
| 4063 /* get initial alignment based on secondary structure */ | |
| 4064 /************************************************************/ | |
| 4065 get_initial_ss(path, val, secx, secy, xlen, ylen, invmap); | |
| 4066 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, | |
| 4067 t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, | |
| 4068 score_d8, d0); | |
| 4069 if (TM>TMmax) | |
| 4070 { | |
| 4071 TMmax = TM; | |
| 4072 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4073 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4074 } | |
| 4075 if (TM > TMmax*0.2) | |
| 4076 { | |
| 4077 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, | |
| 4078 xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30, | |
| 4079 local_d0_search, D0_MIN, Lnorm, d0, score_d8); | |
| 4080 if (TM>TMmax) | |
| 4081 { | |
| 4082 TMmax = TM; | |
| 4083 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4084 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4085 } | |
| 4086 } | |
| 4087 | |
| 4088 if (TMcut>0) // pre-terminate if TM-score is too low | |
| 4089 { | |
| 4090 double TMtmp=approx_TM(xlen, ylen, a_opt, | |
| 4091 xa, ya, t0, u0, invmap0, mol_type); | |
| 4092 | |
| 4093 if (TMtmp<0.52*TMcut) | |
| 4094 { | |
| 4095 TM1=TM2=TM3=TM4=TM5=TMtmp; | |
| 4096 clean_up_after_approx_TM(invmap0, invmap, score, path, val, | |
| 4097 xtm, ytm, xt, r1, r2, xlen, minlen); | |
| 4098 return 3; | |
| 4099 } | |
| 4100 } | |
| 4101 | |
| 4102 /************************************************************/ | |
| 4103 /* get initial alignment based on local superposition */ | |
| 4104 /************************************************************/ | |
| 4105 //=initial5 in original TM-align | |
| 4106 if (get_initial5( r1, r2, xtm, ytm, path, val, xa, ya, | |
| 4107 xlen, ylen, invmap, d0, d0_search, fast_opt, D0_MIN)) | |
| 4108 { | |
| 4109 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, | |
| 4110 invmap, t, u, simplify_step, score_sum_method, | |
| 4111 local_d0_search, Lnorm, score_d8, d0); | |
| 4112 if (TM>TMmax) | |
| 4113 { | |
| 4114 TMmax = TM; | |
| 4115 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4116 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4117 } | |
| 4118 if (TM > TMmax*ddcc) | |
| 4119 { | |
| 4120 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, | |
| 4121 xlen, ylen, t, u, invmap, 0, 2, 2, local_d0_search, | |
| 4122 D0_MIN, Lnorm, d0, score_d8); | |
| 4123 if (TM>TMmax) | |
| 4124 { | |
| 4125 TMmax = TM; | |
| 4126 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4127 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4128 } | |
| 4129 } | |
| 4130 } | |
| 4131 else | |
| 4132 cerr << "\n\nWarning: initial alignment from local superposition fail!\n\n" << endl; | |
| 4133 | |
| 4134 if (TMcut>0) // pre-terminate if TM-score is too low | |
| 4135 { | |
| 4136 double TMtmp=approx_TM(xlen, ylen, a_opt, | |
| 4137 xa, ya, t0, u0, invmap0, mol_type); | |
| 4138 | |
| 4139 if (TMtmp<0.54*TMcut) | |
| 4140 { | |
| 4141 TM1=TM2=TM3=TM4=TM5=TMtmp; | |
| 4142 clean_up_after_approx_TM(invmap0, invmap, score, path, val, | |
| 4143 xtm, ytm, xt, r1, r2, xlen, minlen); | |
| 4144 return 4; | |
| 4145 } | |
| 4146 } | |
| 4147 | |
| 4148 /********************************************************************/ | |
| 4149 /* get initial alignment by local superposition+secondary structure */ | |
| 4150 /********************************************************************/ | |
| 4151 //=initial3 in original TM-align | |
| 4152 get_initial_ssplus(r1, r2, score, path, val, secx, secy, xa, ya, | |
| 4153 xlen, ylen, invmap0, invmap, D0_MIN, d0); | |
| 4154 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, | |
| 4155 t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, | |
| 4156 score_d8, d0); | |
| 4157 if (TM>TMmax) | |
| 4158 { | |
| 4159 TMmax = TM; | |
| 4160 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4161 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4162 } | |
| 4163 if (TM > TMmax*ddcc) | |
| 4164 { | |
| 4165 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, | |
| 4166 xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30, | |
| 4167 local_d0_search, D0_MIN, Lnorm, d0, score_d8); | |
| 4168 if (TM>TMmax) | |
| 4169 { | |
| 4170 TMmax = TM; | |
| 4171 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4172 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4173 } | |
| 4174 } | |
| 4175 | |
| 4176 if (TMcut>0) // pre-terminate if TM-score is too low | |
| 4177 { | |
| 4178 double TMtmp=approx_TM(xlen, ylen, a_opt, | |
| 4179 xa, ya, t0, u0, invmap0, mol_type); | |
| 4180 | |
| 4181 if (TMtmp<0.56*TMcut) | |
| 4182 { | |
| 4183 TM1=TM2=TM3=TM4=TM5=TMtmp; | |
| 4184 clean_up_after_approx_TM(invmap0, invmap, score, path, val, | |
| 4185 xtm, ytm, xt, r1, r2, xlen, minlen); | |
| 4186 return 5; | |
| 4187 } | |
| 4188 } | |
| 4189 | |
| 4190 /*******************************************************************/ | |
| 4191 /* get initial alignment based on fragment gapless threading */ | |
| 4192 /*******************************************************************/ | |
| 4193 //=initial4 in original TM-align | |
| 4194 get_initial_fgt(r1, r2, xtm, ytm, xa, ya, xlen, ylen, | |
| 4195 invmap, d0, d0_search, dcu0, fast_opt, t, u); | |
| 4196 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap, | |
| 4197 t, u, simplify_step, score_sum_method, local_d0_search, Lnorm, | |
| 4198 score_d8, d0); | |
| 4199 if (TM>TMmax) | |
| 4200 { | |
| 4201 TMmax = TM; | |
| 4202 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4203 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4204 } | |
| 4205 if (TM > TMmax*ddcc) | |
| 4206 { | |
| 4207 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, | |
| 4208 xlen, ylen, t, u, invmap, 1, 2, 2, local_d0_search, D0_MIN, | |
| 4209 Lnorm, d0, score_d8); | |
| 4210 if (TM>TMmax) | |
| 4211 { | |
| 4212 TMmax = TM; | |
| 4213 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4214 if (TMcut>0) copy_t_u(t, u, t0, u0); | |
| 4215 } | |
| 4216 } | |
| 4217 | |
| 4218 if (TMcut>0) // pre-terminate if TM-score is too low | |
| 4219 { | |
| 4220 double TMtmp=approx_TM(xlen, ylen, a_opt, | |
| 4221 xa, ya, t0, u0, invmap0, mol_type); | |
| 4222 | |
| 4223 if (TMtmp<0.58*TMcut) | |
| 4224 { | |
| 4225 TM1=TM2=TM3=TM4=TM5=TMtmp; | |
| 4226 clean_up_after_approx_TM(invmap0, invmap, score, path, val, | |
| 4227 xtm, ytm, xt, r1, r2, xlen, minlen); | |
| 4228 return 6; | |
| 4229 } | |
| 4230 } | |
| 4231 | |
| 4232 //************************************************// | |
| 4233 // get initial alignment from user's input: // | |
| 4234 //************************************************// | |
| 4235 if (i_opt==1)// if input has set parameter for "-i" | |
| 4236 { | |
| 4237 for (int j = 0; j < ylen; j++)// Set aligned position to be "-1" | |
| 4238 invmap[j] = -1; | |
| 4239 | |
| 4240 int i1 = -1;// in C version, index starts from zero, not from one | |
| 4241 int i2 = -1; | |
| 4242 int L1 = sequence[0].size(); | |
| 4243 int L2 = sequence[1].size(); | |
| 4244 int L = min(L1, L2);// Get positions for aligned residues | |
| 4245 for (int kk1 = 0; kk1 < L; kk1++) | |
| 4246 { | |
| 4247 if (sequence[0][kk1] != '-') | |
| 4248 i1++; | |
| 4249 if (sequence[1][kk1] != '-') | |
| 4250 { | |
| 4251 i2++; | |
| 4252 if (i2 >= ylen || i1 >= xlen) kk1 = L; | |
| 4253 else if (sequence[0][kk1] != '-') invmap[i2] = i1; | |
| 4254 } | |
| 4255 } | |
| 4256 | |
| 4257 //--------------- 2. Align proteins from original alignment | |
| 4258 double prevD0_MIN = D0_MIN;// stored for later use | |
| 4259 int prevLnorm = Lnorm; | |
| 4260 double prevd0 = d0; | |
| 4261 TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, | |
| 4262 xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, | |
| 4263 d0_search, score_d8, t, u, mol_type); | |
| 4264 D0_MIN = prevD0_MIN; | |
| 4265 Lnorm = prevLnorm; | |
| 4266 d0 = prevd0; | |
| 4267 | |
| 4268 TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, | |
| 4269 xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm, | |
| 4270 score_d8, d0); | |
| 4271 if (TM > TMmax) | |
| 4272 { | |
| 4273 TMmax = TM; | |
| 4274 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4275 } | |
| 4276 // Different from get_initial, get_initial_ss and get_initial_ssplus | |
| 4277 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, | |
| 4278 xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30, | |
| 4279 local_d0_search, D0_MIN, Lnorm, d0, score_d8); | |
| 4280 if (TM>TMmax) | |
| 4281 { | |
| 4282 TMmax = TM; | |
| 4283 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i]; | |
| 4284 } | |
| 4285 } | |
| 4286 } | |
| 4287 | |
| 4288 | |
| 4289 | |
| 4290 //*******************************************************************// | |
| 4291 // The alignment will not be changed any more in the following // | |
| 4292 //*******************************************************************// | |
| 4293 //check if the initial alignment is generated approriately | |
| 4294 bool flag=false; | |
| 4295 for(i=0; i<ylen; i++) | |
| 4296 { | |
| 4297 if(invmap0[i]>=0) | |
| 4298 { | |
| 4299 flag=true; | |
| 4300 break; | |
| 4301 } | |
| 4302 } | |
| 4303 if(!flag) | |
| 4304 { | |
| 4305 cout << "There is no alignment between the two proteins!" << endl; | |
| 4306 cout << "Program stop with no result!" << endl; | |
| 4307 return 1; | |
| 4308 } | |
| 4309 | |
| 4310 /* last TM-score pre-termination */ | |
| 4311 if (TMcut>0) | |
| 4312 { | |
| 4313 double TMtmp=approx_TM(xlen, ylen, a_opt, | |
| 4314 xa, ya, t0, u0, invmap0, mol_type); | |
| 4315 | |
| 4316 if (TMtmp<0.6*TMcut) | |
| 4317 { | |
| 4318 TM1=TM2=TM3=TM4=TM5=TMtmp; | |
| 4319 clean_up_after_approx_TM(invmap0, invmap, score, path, val, | |
| 4320 xtm, ytm, xt, r1, r2, xlen, minlen); | |
| 4321 return 7; | |
| 4322 } | |
| 4323 } | |
| 4324 | |
| 4325 //********************************************************************// | |
| 4326 // Detailed TMscore search engine --> prepare for final TMscore // | |
| 4327 //********************************************************************// | |
| 4328 //run detailed TMscore search engine for the best alignment, and | |
| 4329 //extract the best rotation matrix (t, u) for the best alginment | |
| 4330 simplify_step=1; | |
| 4331 if (fast_opt) simplify_step=40; | |
| 4332 score_sum_method=8; | |
| 4333 TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, | |
| 4334 invmap0, t, u, simplify_step, score_sum_method, local_d0_search, | |
| 4335 false, Lnorm, score_d8, d0); | |
| 4336 | |
| 4337 //select pairs with dis<d8 for final TMscore computation and output alignment | |
| 4338 int k=0; | |
| 4339 int *m1, *m2; | |
| 4340 double d; | |
| 4341 m1=new int[xlen]; //alignd index in x | |
| 4342 m2=new int[ylen]; //alignd index in y | |
| 4343 do_rotation(xa, xt, xlen, t, u); | |
| 4344 k=0; | |
| 4345 for(int j=0; j<ylen; j++) | |
| 4346 { | |
| 4347 i=invmap0[j]; | |
| 4348 if(i>=0)//aligned | |
| 4349 { | |
| 4350 n_ali++; | |
| 4351 d=sqrt(dist(&xt[i][0], &ya[j][0])); | |
| 4352 if (d <= score_d8 || (i_opt == 3)) | |
| 4353 { | |
| 4354 m1[k]=i; | |
| 4355 m2[k]=j; | |
| 4356 | |
| 4357 xtm[k][0]=xa[i][0]; | |
| 4358 xtm[k][1]=xa[i][1]; | |
| 4359 xtm[k][2]=xa[i][2]; | |
| 4360 | |
| 4361 ytm[k][0]=ya[j][0]; | |
| 4362 ytm[k][1]=ya[j][1]; | |
| 4363 ytm[k][2]=ya[j][2]; | |
| 4364 | |
| 4365 r1[k][0] = xt[i][0]; | |
| 4366 r1[k][1] = xt[i][1]; | |
| 4367 r1[k][2] = xt[i][2]; | |
| 4368 r2[k][0] = ya[j][0]; | |
| 4369 r2[k][1] = ya[j][1]; | |
| 4370 r2[k][2] = ya[j][2]; | |
| 4371 | |
| 4372 k++; | |
| 4373 } | |
| 4374 } | |
| 4375 } | |
| 4376 n_ali8=k; | |
| 4377 | |
| 4378 Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u);// rmsd0 is used for final output, only recalculate rmsd0, not t & u | |
| 4379 rmsd0 = sqrt(rmsd0 / n_ali8); | |
| 4380 | |
| 4381 | |
| 4382 //****************************************// | |
| 4383 // Final TMscore // | |
| 4384 // Please set parameters for output // | |
| 4385 //****************************************// | |
| 4386 double rmsd; | |
| 4387 simplify_step=1; | |
| 4388 score_sum_method=0; | |
| 4389 double Lnorm_0=ylen; | |
| 4390 | |
| 4391 | |
| 4392 //normalized by length of structure A | |
| 4393 parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type); | |
| 4394 d0A=d0; | |
| 4395 d0_0=d0A; | |
| 4396 local_d0_search = d0_search; | |
| 4397 TM1 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, simplify_step, | |
| 4398 score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); | |
| 4399 TM_0 = TM1; | |
| 4400 | |
| 4401 //normalized by length of structure B | |
| 4402 parameter_set4final(xlen+0.0, D0_MIN, Lnorm, d0, d0_search, mol_type); | |
| 4403 d0B=d0; | |
| 4404 local_d0_search = d0_search; | |
| 4405 TM2 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t, u, simplify_step, | |
| 4406 score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0); | |
| 4407 | |
| 4408 double Lnorm_d0; | |
| 4409 if (a_opt>0) | |
| 4410 { | |
| 4411 //normalized by average length of structures A, B | |
| 4412 Lnorm_0=(xlen+ylen)*0.5; | |
| 4413 parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type); | |
| 4414 d0a=d0; | |
| 4415 d0_0=d0a; | |
| 4416 local_d0_search = d0_search; | |
| 4417 | |
| 4418 TM3 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, | |
| 4419 simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, | |
| 4420 score_d8, d0); | |
| 4421 TM_0=TM3; | |
| 4422 } | |
| 4423 if (u_opt) | |
| 4424 { | |
| 4425 //normalized by user assigned length | |
| 4426 parameter_set4final(Lnorm_ass, D0_MIN, Lnorm, | |
| 4427 d0, d0_search, mol_type); | |
| 4428 d0u=d0; | |
| 4429 d0_0=d0u; | |
| 4430 Lnorm_0=Lnorm_ass; | |
| 4431 local_d0_search = d0_search; | |
| 4432 TM4 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, | |
| 4433 simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, | |
| 4434 score_d8, d0); | |
| 4435 TM_0=TM4; | |
| 4436 } | |
| 4437 if (d_opt) | |
| 4438 { | |
| 4439 //scaled by user assigned d0 | |
| 4440 parameter_set4scale(ylen, d0_scale, Lnorm, d0, d0_search); | |
| 4441 d0_out=d0_scale; | |
| 4442 d0_0=d0_scale; | |
| 4443 //Lnorm_0=ylen; | |
| 4444 Lnorm_d0=Lnorm_0; | |
| 4445 local_d0_search = d0_search; | |
| 4446 TM5 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, | |
| 4447 simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm, | |
| 4448 score_d8, d0); | |
| 4449 TM_0=TM5; | |
| 4450 } | |
| 4451 | |
| 4452 /* derive alignment from superposition */ | |
| 4453 int ali_len=xlen+ylen; //maximum length of alignment | |
| 4454 seqxA.assign(ali_len,'-'); | |
| 4455 seqM.assign( ali_len,' '); | |
| 4456 seqyA.assign(ali_len,'-'); | |
| 4457 | |
| 4458 //do_rotation(xa, xt, xlen, t, u); | |
| 4459 do_rotation(xa, xt, xlen, t0, u0); | |
| 4460 | |
| 4461 int kk=0, i_old=0, j_old=0; | |
| 4462 d=0; | |
| 4463 for(int k=0; k<n_ali8; k++) | |
| 4464 { | |
| 4465 for(int i=i_old; i<m1[k]; i++) | |
| 4466 { | |
| 4467 //align x to gap | |
| 4468 seqxA[kk]=seqx[i]; | |
| 4469 seqyA[kk]='-'; | |
| 4470 seqM[kk]=' '; | |
| 4471 kk++; | |
| 4472 } | |
| 4473 | |
| 4474 for(int j=j_old; j<m2[k]; j++) | |
| 4475 { | |
| 4476 //align y to gap | |
| 4477 seqxA[kk]='-'; | |
| 4478 seqyA[kk]=seqy[j]; | |
| 4479 seqM[kk]=' '; | |
| 4480 kk++; | |
| 4481 } | |
| 4482 | |
| 4483 seqxA[kk]=seqx[m1[k]]; | |
| 4484 seqyA[kk]=seqy[m2[k]]; | |
| 4485 Liden+=(seqxA[kk]==seqyA[kk]); | |
| 4486 d=sqrt(dist(&xt[m1[k]][0], &ya[m2[k]][0])); | |
| 4487 if(d<d0_out) seqM[kk]=':'; | |
| 4488 else seqM[kk]='.'; | |
| 4489 kk++; | |
| 4490 i_old=m1[k]+1; | |
| 4491 j_old=m2[k]+1; | |
| 4492 } | |
| 4493 | |
| 4494 //tail | |
| 4495 for(int i=i_old; i<xlen; i++) | |
| 4496 { | |
| 4497 //align x to gap | |
| 4498 seqxA[kk]=seqx[i]; | |
| 4499 seqyA[kk]='-'; | |
| 4500 seqM[kk]=' '; | |
| 4501 kk++; | |
| 4502 } | |
| 4503 for(int j=j_old; j<ylen; j++) | |
| 4504 { | |
| 4505 //align y to gap | |
| 4506 seqxA[kk]='-'; | |
| 4507 seqyA[kk]=seqy[j]; | |
| 4508 seqM[kk]=' '; | |
| 4509 kk++; | |
| 4510 } | |
| 4511 seqxA=seqxA.substr(0,kk); | |
| 4512 seqyA=seqyA.substr(0,kk); | |
| 4513 seqM =seqM.substr(0,kk); | |
| 4514 | |
| 4515 /* free memory */ | |
| 4516 clean_up_after_approx_TM(invmap0, invmap, score, path, val, | |
| 4517 xtm, ytm, xt, r1, r2, xlen, minlen); | |
| 4518 delete [] m1; | |
| 4519 delete [] m2; | |
| 4520 return 0; // zero for no exception | |
| 4521 } | |
| 4522 | |
| 4523 /* entry function for TM-align with circular permutation | |
| 4524 * i_opt, a_opt, u_opt, d_opt, TMcut are not implemented yet */ | |
| 4525 int CPalign_main(double **xa, double **ya, | |
| 4526 const char *seqx, const char *seqy, const char *secx, const char *secy, | |
| 4527 double t0[3], double u0[3][3], | |
| 4528 double &TM1, double &TM2, double &TM3, double &TM4, double &TM5, | |
| 4529 double &d0_0, double &TM_0, | |
| 4530 double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out, | |
| 4531 string &seqM, string &seqxA, string &seqyA, | |
| 4532 double &rmsd0, int &L_ali, double &Liden, | |
| 4533 double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8, | |
| 4534 const int xlen, const int ylen, | |
| 4535 const vector<string> sequence, const double Lnorm_ass, | |
| 4536 const double d0_scale, const int i_opt, const int a_opt, | |
| 4537 const bool u_opt, const bool d_opt, const bool fast_opt, | |
| 4538 const int mol_type, const double TMcut=-1) | |
| 4539 { | |
| 4540 char *seqx_cp, *seqy_cp; // for the protein sequence | |
| 4541 char *secx_cp, *secy_cp; // for the secondary structure | |
| 4542 double **xa_cp, **ya_cp; // coordinates | |
| 4543 string seqxA_cp,seqyA_cp; // alignment | |
| 4544 int i,r; | |
| 4545 int cp_point=0; // position of circular permutation | |
| 4546 int cp_aln_best=0; // amount of aligned residue in sliding window | |
| 4547 int cp_aln_current;// amount of aligned residue in sliding window | |
| 4548 | |
| 4549 /* duplicate structure */ | |
| 4550 NewArray(&xa_cp, xlen*2, 3); | |
| 4551 seqx_cp = new char[xlen*2 + 1]; | |
| 4552 secx_cp = new char[xlen*2 + 1]; | |
| 4553 for (r=0;r<xlen;r++) | |
| 4554 { | |
| 4555 xa_cp[r+xlen][0]=xa_cp[r][0]=xa[r][0]; | |
| 4556 xa_cp[r+xlen][1]=xa_cp[r][1]=xa[r][1]; | |
| 4557 xa_cp[r+xlen][2]=xa_cp[r][2]=xa[r][2]; | |
| 4558 seqx_cp[r+xlen]=seqx_cp[r]=seqx[r]; | |
| 4559 secx_cp[r+xlen]=secx_cp[r]=secx[r]; | |
| 4560 } | |
| 4561 seqx_cp[2*xlen]=0; | |
| 4562 secx_cp[2*xlen]=0; | |
| 4563 | |
| 4564 /* fTM-align alignment */ | |
| 4565 double TM1_cp,TM2_cp; | |
| 4566 TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy, | |
| 4567 t0, u0, TM1_cp, TM2_cp, TM3, TM4, TM5, | |
| 4568 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp, | |
| 4569 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, | |
| 4570 xlen*2, ylen, sequence, Lnorm_ass, d0_scale, | |
| 4571 0, false, false, false, true, mol_type, -1); | |
| 4572 | |
| 4573 /* delete gap in seqxA_cp */ | |
| 4574 r=0; | |
| 4575 seqxA=seqxA_cp; | |
| 4576 seqyA=seqyA_cp; | |
| 4577 for (i=0;i<seqxA_cp.size();i++) | |
| 4578 { | |
| 4579 if (seqxA_cp[i]!='-') | |
| 4580 { | |
| 4581 seqxA[r]=seqxA_cp[i]; | |
| 4582 seqyA[r]=seqyA_cp[i]; | |
| 4583 r++; | |
| 4584 } | |
| 4585 } | |
| 4586 seqxA=seqxA.substr(0,r); | |
| 4587 seqyA=seqyA.substr(0,r); | |
| 4588 | |
| 4589 /* count the number of aligned residues in each window | |
| 4590 * r - residue index in the original unaligned sequence | |
| 4591 * i - position in the alignment */ | |
| 4592 for (r=0;r<xlen-1;r++) | |
| 4593 { | |
| 4594 cp_aln_current=0; | |
| 4595 for (i=r;i<r+xlen;i++) cp_aln_current+=(seqyA[i]!='-'); | |
| 4596 | |
| 4597 if (cp_aln_current>cp_aln_best) | |
| 4598 { | |
| 4599 cp_aln_best=cp_aln_current; | |
| 4600 cp_point=r; | |
| 4601 } | |
| 4602 } | |
| 4603 seqM.clear(); | |
| 4604 seqxA.clear(); | |
| 4605 seqyA.clear(); | |
| 4606 seqxA_cp.clear(); | |
| 4607 seqyA_cp.clear(); | |
| 4608 rmsd0=Liden=n_ali=n_ali8=0; | |
| 4609 | |
| 4610 /* fTM-align alignment */ | |
| 4611 TMalign_main(xa, ya, seqx, seqy, secx, secy, | |
| 4612 t0, u0, TM1, TM2, TM3, TM4, TM5, | |
| 4613 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA, | |
| 4614 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, | |
| 4615 xlen, ylen, sequence, Lnorm_ass, d0_scale, | |
| 4616 0, false, false, false, true, mol_type, -1); | |
| 4617 | |
| 4618 /* do not use cricular permutation of number of aligned residues is not | |
| 4619 * larger than sequence-order dependent alignment */ | |
| 4620 if (n_ali8>cp_aln_best) cp_point=0; | |
| 4621 | |
| 4622 /* prepare structure for final alignment */ | |
| 4623 seqM.clear(); | |
| 4624 seqxA.clear(); | |
| 4625 seqyA.clear(); | |
| 4626 rmsd0=Liden=n_ali=n_ali8=0; | |
| 4627 if (cp_point!=0) | |
| 4628 { | |
| 4629 for (r=0;r<xlen;r++) | |
| 4630 { | |
| 4631 xa_cp[r][0]=xa_cp[r+cp_point][0]; | |
| 4632 xa_cp[r][1]=xa_cp[r+cp_point][1]; | |
| 4633 xa_cp[r][2]=xa_cp[r+cp_point][2]; | |
| 4634 seqx_cp[r]=seqx_cp[r+cp_point]; | |
| 4635 secx_cp[r]=secx_cp[r+cp_point]; | |
| 4636 } | |
| 4637 } | |
| 4638 seqx_cp[xlen]=0; | |
| 4639 secx_cp[xlen]=0; | |
| 4640 | |
| 4641 /* full TM-align */ | |
| 4642 TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy, | |
| 4643 t0, u0, TM1, TM2, TM3, TM4, TM5, | |
| 4644 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp, | |
| 4645 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, | |
| 4646 xlen, ylen, sequence, Lnorm_ass, d0_scale, | |
| 4647 i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type, TMcut); | |
| 4648 | |
| 4649 /* correct alignment | |
| 4650 * r - residue index in the original unaligned sequence | |
| 4651 * i - position in the alignment */ | |
| 4652 if (cp_point>0) | |
| 4653 { | |
| 4654 r=0; | |
| 4655 for (i=0;i<seqxA_cp.size();i++) | |
| 4656 { | |
| 4657 r+=(seqxA_cp[i]!='-'); | |
| 4658 if (r>=(xlen-cp_point)) | |
| 4659 { | |
| 4660 i++; | |
| 4661 break; | |
| 4662 } | |
| 4663 } | |
| 4664 seqxA=seqxA_cp.substr(0,i)+'*'+seqxA_cp.substr(i); | |
| 4665 seqM =seqM.substr(0,i) +' '+seqM.substr(i); | |
| 4666 seqyA=seqyA_cp.substr(0,i)+'-'+seqyA_cp.substr(i); | |
| 4667 } | |
| 4668 else | |
| 4669 { | |
| 4670 seqxA=seqxA_cp; | |
| 4671 seqyA=seqyA_cp; | |
| 4672 } | |
| 4673 | |
| 4674 /* clean up */ | |
| 4675 delete[]seqx_cp; | |
| 4676 delete[]secx_cp; | |
| 4677 DeleteArray(&xa_cp,xlen*2); | |
| 4678 seqxA_cp.clear(); | |
| 4679 seqyA_cp.clear(); | |
| 4680 return cp_point; | |
| 4681 } | |
| 4682 | |
| 4683 int main(int argc, char *argv[]) | |
| 4684 { | |
| 4685 if (argc < 2) print_help(); | |
| 4686 | |
| 4687 | |
| 4688 clock_t t1, t2; | |
| 4689 t1 = clock(); | |
| 4690 | |
| 4691 /**********************/ | |
| 4692 /* get argument */ | |
| 4693 /**********************/ | |
| 4694 string xname = ""; | |
| 4695 string yname = ""; | |
| 4696 string fname_super = ""; // file name for superposed structure | |
| 4697 string fname_lign = ""; // file name for user alignment | |
| 4698 string fname_matrix= ""; // file name for output matrix | |
| 4699 vector<string> sequence; // get value from alignment file | |
| 4700 double Lnorm_ass, d0_scale; | |
| 4701 | |
| 4702 bool h_opt = false; // print full help message | |
| 4703 bool v_opt = false; // print version | |
| 4704 bool m_opt = false; // flag for -m, output rotation matrix | |
| 4705 int i_opt = 0; // 1 for -i, 3 for -I | |
| 4706 bool o_opt = false; // flag for -o, output superposed structure | |
| 4707 int a_opt = 0; // flag for -a, do not normalized by average length | |
| 4708 bool u_opt = false; // flag for -u, normalized by user specified length | |
| 4709 bool d_opt = false; // flag for -d, user specified d0 | |
| 4710 | |
| 4711 double TMcut =-1; | |
| 4712 int infmt1_opt=-1; // PDB or PDBx/mmCIF format for chain_1 | |
| 4713 int infmt2_opt=-1; // PDB or PDBx/mmCIF format for chain_2 | |
| 4714 int ter_opt =3; // TER, END, or different chainID | |
| 4715 int split_opt =0; // do not split chain | |
| 4716 int outfmt_opt=0; // set -outfmt to full output | |
| 4717 bool fast_opt =false; // flags for -fast, fTM-align algorithm | |
| 4718 int cp_opt =0; // do not check circular permutation | |
| 4719 int mirror_opt=0; // do not align mirror | |
| 4720 int het_opt=0; // do not read HETATM residues | |
| 4721 string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA | |
| 4722 string mol_opt ="auto";// auto-detect the molecule type as protein/RNA | |
| 4723 string suffix_opt=""; // set -suffix to empty | |
| 4724 string dir_opt =""; // set -dir to empty | |
| 4725 string dir1_opt =""; // set -dir1 to empty | |
| 4726 string dir2_opt =""; // set -dir2 to empty | |
| 4727 int byresi_opt=0; // set -byresi to 0 | |
| 4728 vector<string> chain1_list; // only when -dir1 is set | |
| 4729 vector<string> chain2_list; // only when -dir2 is set | |
| 4730 | |
| 4731 for(int i = 1; i < argc; i++) | |
| 4732 { | |
| 4733 if ( !strcmp(argv[i],"-o") && i < (argc-1) ) | |
| 4734 { | |
| 4735 fname_super = argv[i + 1]; o_opt = true; i++; | |
| 4736 } | |
| 4737 else if ( (!strcmp(argv[i],"-u") || | |
| 4738 !strcmp(argv[i],"-L")) && i < (argc-1) ) | |
| 4739 { | |
| 4740 Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++; | |
| 4741 } | |
| 4742 else if ( !strcmp(argv[i],"-a") && i < (argc-1) ) | |
| 4743 { | |
| 4744 if (!strcmp(argv[i + 1], "T")) a_opt=true; | |
| 4745 else if (!strcmp(argv[i + 1], "F")) a_opt=false; | |
| 4746 else | |
| 4747 { | |
| 4748 a_opt=atoi(argv[i + 1]); | |
| 4749 if (a_opt!=-2 && a_opt!=-1 && a_opt!=1) | |
| 4750 PrintErrorAndQuit("-a must be -2, -1, 1, T or F"); | |
| 4751 } | |
| 4752 i++; | |
| 4753 } | |
| 4754 else if ( !strcmp(argv[i],"-d") && i < (argc-1) ) | |
| 4755 { | |
| 4756 d0_scale = atof(argv[i + 1]); d_opt = true; i++; | |
| 4757 } | |
| 4758 else if ( !strcmp(argv[i],"-v") ) | |
| 4759 { | |
| 4760 v_opt = true; | |
| 4761 } | |
| 4762 else if ( !strcmp(argv[i],"-h") ) | |
| 4763 { | |
| 4764 h_opt = true; | |
| 4765 } | |
| 4766 else if ( !strcmp(argv[i],"-i") && i < (argc-1) ) | |
| 4767 { | |
| 4768 if (i_opt==3) | |
| 4769 PrintErrorAndQuit("ERROR! -i and -I cannot be used together"); | |
| 4770 fname_lign = argv[i + 1]; i_opt = 1; i++; | |
| 4771 } | |
| 4772 else if (!strcmp(argv[i], "-I") && i < (argc-1) ) | |
| 4773 { | |
| 4774 if (i_opt==1) | |
| 4775 PrintErrorAndQuit("ERROR! -I and -i cannot be used together"); | |
| 4776 fname_lign = argv[i + 1]; i_opt = 3; i++; | |
| 4777 } | |
| 4778 else if (!strcmp(argv[i], "-m") && i < (argc-1) ) | |
| 4779 { | |
| 4780 fname_matrix = argv[i + 1]; m_opt = true; i++; | |
| 4781 }// get filename for rotation matrix | |
| 4782 else if (!strcmp(argv[i], "-fast")) | |
| 4783 { | |
| 4784 fast_opt = true; | |
| 4785 } | |
| 4786 else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) ) | |
| 4787 { | |
| 4788 infmt1_opt=atoi(argv[i + 1]); i++; | |
| 4789 } | |
| 4790 else if ( !strcmp(argv[i],"-infmt2") && i < (argc-1) ) | |
| 4791 { | |
| 4792 infmt2_opt=atoi(argv[i + 1]); i++; | |
| 4793 } | |
| 4794 else if ( !strcmp(argv[i],"-ter") && i < (argc-1) ) | |
| 4795 { | |
| 4796 ter_opt=atoi(argv[i + 1]); i++; | |
| 4797 } | |
| 4798 else if ( !strcmp(argv[i],"-split") && i < (argc-1) ) | |
| 4799 { | |
| 4800 split_opt=atoi(argv[i + 1]); i++; | |
| 4801 } | |
| 4802 else if ( !strcmp(argv[i],"-atom") && i < (argc-1) ) | |
| 4803 { | |
| 4804 atom_opt=argv[i + 1]; i++; | |
| 4805 } | |
| 4806 else if ( !strcmp(argv[i],"-mol") && i < (argc-1) ) | |
| 4807 { | |
| 4808 mol_opt=argv[i + 1]; i++; | |
| 4809 } | |
| 4810 else if ( !strcmp(argv[i],"-dir") && i < (argc-1) ) | |
| 4811 { | |
| 4812 dir_opt=argv[i + 1]; i++; | |
| 4813 } | |
| 4814 else if ( !strcmp(argv[i],"-dir1") && i < (argc-1) ) | |
| 4815 { | |
| 4816 dir1_opt=argv[i + 1]; i++; | |
| 4817 } | |
| 4818 else if ( !strcmp(argv[i],"-dir2") && i < (argc-1) ) | |
| 4819 { | |
| 4820 dir2_opt=argv[i + 1]; i++; | |
| 4821 } | |
| 4822 else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) ) | |
| 4823 { | |
| 4824 suffix_opt=argv[i + 1]; i++; | |
| 4825 } | |
| 4826 else if ( !strcmp(argv[i],"-outfmt") && i < (argc-1) ) | |
| 4827 { | |
| 4828 outfmt_opt=atoi(argv[i + 1]); i++; | |
| 4829 } | |
| 4830 else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) ) | |
| 4831 { | |
| 4832 TMcut=atof(argv[i + 1]); i++; | |
| 4833 } | |
| 4834 else if ( !strcmp(argv[i],"-byresi") && i < (argc-1) ) | |
| 4835 { | |
| 4836 byresi_opt=atoi(argv[i + 1]); i++; | |
| 4837 } | |
| 4838 else if ( !strcmp(argv[i],"-cp") ) | |
| 4839 { | |
| 4840 cp_opt=1; | |
| 4841 } | |
| 4842 else if ( !strcmp(argv[i],"-mirror") && i < (argc-1) ) | |
| 4843 { | |
| 4844 mirror_opt=atoi(argv[i + 1]); i++; | |
| 4845 } | |
| 4846 else if ( !strcmp(argv[i],"-het") && i < (argc-1) ) | |
| 4847 { | |
| 4848 het_opt=atoi(argv[i + 1]); i++; | |
| 4849 } | |
| 4850 else if (xname.size() == 0) xname=argv[i]; | |
| 4851 else if (yname.size() == 0) yname=argv[i]; | |
| 4852 else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]); | |
| 4853 } | |
| 4854 | |
| 4855 if(xname.size()==0 || (yname.size()==0 && dir_opt.size()==0) || | |
| 4856 (yname.size() && dir_opt.size())) | |
| 4857 { | |
| 4858 if (h_opt) print_help(h_opt); | |
| 4859 if (v_opt) | |
| 4860 { | |
| 4861 print_version(); | |
| 4862 exit(EXIT_FAILURE); | |
| 4863 } | |
| 4864 if (xname.size()==0) | |
| 4865 PrintErrorAndQuit("Please provide input structures"); | |
| 4866 else if (yname.size()==0 && dir_opt.size()==0) | |
| 4867 PrintErrorAndQuit("Please provide structure B"); | |
| 4868 else if (yname.size() && dir_opt.size()) | |
| 4869 PrintErrorAndQuit("Please provide only one file name if -dir is set"); | |
| 4870 } | |
| 4871 | |
| 4872 if (suffix_opt.size() && dir_opt.size()+dir1_opt.size()+dir2_opt.size()==0) | |
| 4873 PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set"); | |
| 4874 if ((dir_opt.size() || dir1_opt.size() || dir2_opt.size())) | |
| 4875 { | |
| 4876 if (m_opt || o_opt) | |
| 4877 PrintErrorAndQuit("-m or -o cannot be set with -dir, -dir1 or -dir2"); | |
| 4878 else if (dir_opt.size() && (dir1_opt.size() || dir2_opt.size())) | |
| 4879 PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2"); | |
| 4880 } | |
| 4881 if (atom_opt.size()!=4) | |
| 4882 PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space."); | |
| 4883 if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA") | |
| 4884 PrintErrorAndQuit("ERROR! molecule type must be either RNA or protein."); | |
| 4885 else if (mol_opt=="protein" && atom_opt=="auto") | |
| 4886 atom_opt=" CA "; | |
| 4887 else if (mol_opt=="RNA" && atom_opt=="auto") | |
| 4888 atom_opt=" C3'"; | |
| 4889 | |
| 4890 if (u_opt && Lnorm_ass<=0) | |
| 4891 PrintErrorAndQuit("Wrong value for option -u! It should be >0"); | |
| 4892 if (d_opt && d0_scale<=0) | |
| 4893 PrintErrorAndQuit("Wrong value for option -d! It should be >0"); | |
| 4894 if (outfmt_opt>=2 && (a_opt || u_opt || d_opt)) | |
| 4895 PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d"); | |
| 4896 if (byresi_opt!=0) | |
| 4897 { | |
| 4898 if (i_opt) | |
| 4899 PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I"); | |
| 4900 if (byresi_opt<0 || byresi_opt>3) | |
| 4901 PrintErrorAndQuit("-byresi can only be 0, 1, 2 or 3"); | |
| 4902 if (byresi_opt>=2 && ter_opt>=2) | |
| 4903 PrintErrorAndQuit("-byresi >=2 should be used with -ter <=1"); | |
| 4904 } | |
| 4905 if (split_opt==1 && ter_opt!=0) | |
| 4906 PrintErrorAndQuit("-split 1 should be used with -ter 0"); | |
| 4907 else if (split_opt==2 && ter_opt!=0 && ter_opt!=1) | |
| 4908 PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1"); | |
| 4909 if (split_opt<0 || split_opt>2) | |
| 4910 PrintErrorAndQuit("-split can only be 0, 1 or 2"); | |
| 4911 if (cp_opt!=0 && cp_opt!=1) | |
| 4912 PrintErrorAndQuit("-cp can only be 0 or 1"); | |
| 4913 if (cp_opt && i_opt) | |
| 4914 PrintErrorAndQuit("-cp cannot be used with -i or -I"); | |
| 4915 | |
| 4916 /* read initial alignment file from 'align.txt' */ | |
| 4917 if (i_opt) read_user_alignment(sequence, fname_lign, i_opt); | |
| 4918 | |
| 4919 if (byresi_opt) i_opt=3; | |
| 4920 | |
| 4921 if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt | |
| 4922 PrintErrorAndQuit("ERROR! Please provide a file name for option -m!"); | |
| 4923 | |
| 4924 /* parse file list */ | |
| 4925 if (dir1_opt.size()+dir_opt.size()==0) chain1_list.push_back(xname); | |
| 4926 else file2chainlist(chain1_list, xname, dir_opt+dir1_opt, suffix_opt); | |
| 4927 | |
| 4928 if (dir_opt.size()) | |
| 4929 for (int i=0;i<chain1_list.size();i++) | |
| 4930 chain2_list.push_back(chain1_list[i]); | |
| 4931 else if (dir2_opt.size()==0) chain2_list.push_back(yname); | |
| 4932 else file2chainlist(chain2_list, yname, dir2_opt, suffix_opt); | |
| 4933 | |
| 4934 if (outfmt_opt==2) | |
| 4935 cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t" | |
| 4936 <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"<<endl; | |
| 4937 | |
| 4938 /* declare previously global variables */ | |
| 4939 vector<vector<string> >PDB_lines1; // text of chain1 | |
| 4940 vector<vector<string> >PDB_lines2; // text of chain2 | |
| 4941 vector<int> mol_vec1; // molecule type of chain1, RNA if >0 | |
| 4942 vector<int> mol_vec2; // molecule type of chain2, RNA if >0 | |
| 4943 vector<string> chainID_list1; // list of chainID1 | |
| 4944 vector<string> chainID_list2; // list of chainID2 | |
| 4945 int i,j; // file index | |
| 4946 int chain_i,chain_j; // chain index | |
| 4947 int r; // residue index | |
| 4948 int xlen, ylen; // chain length | |
| 4949 int xchainnum,ychainnum;// number of chains in a PDB file | |
| 4950 char *seqx, *seqy; // for the protein sequence | |
| 4951 char *secx, *secy; // for the secondary structure | |
| 4952 double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and | |
| 4953 // ya[0...ylen-1][0..2], in general, | |
| 4954 // ya is regarded as native structure | |
| 4955 // --> superpose xa onto ya | |
| 4956 vector<string> resi_vec1; // residue index for chain1 | |
| 4957 vector<string> resi_vec2; // residue index for chain2 | |
| 4958 | |
| 4959 /* loop over file names */ | |
| 4960 for (i=0;i<chain1_list.size();i++) | |
| 4961 { | |
| 4962 /* parse chain 1 */ | |
| 4963 xname=chain1_list[i]; | |
| 4964 xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1, | |
| 4965 mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt); | |
| 4966 if (!xchainnum) | |
| 4967 { | |
| 4968 cerr<<"Warning! Cannot parse file: "<<xname | |
| 4969 <<". Chain number 0."<<endl; | |
| 4970 continue; | |
| 4971 } | |
| 4972 for (chain_i=0;chain_i<xchainnum;chain_i++) | |
| 4973 { | |
| 4974 xlen=PDB_lines1[chain_i].size(); | |
| 4975 mol_vec1[chain_i]=-1; | |
| 4976 if (!xlen) | |
| 4977 { | |
| 4978 cerr<<"Warning! Cannot parse file: "<<xname | |
| 4979 <<". Chain length 0."<<endl; | |
| 4980 continue; | |
| 4981 } | |
| 4982 else if (xlen<3) | |
| 4983 { | |
| 4984 cerr<<"Sequence is too short <3!: "<<xname<<endl; | |
| 4985 continue; | |
| 4986 } | |
| 4987 NewArray(&xa, xlen, 3); | |
| 4988 seqx = new char[xlen + 1]; | |
| 4989 secx = new char[xlen + 1]; | |
| 4990 xlen = read_PDB(PDB_lines1[chain_i], xa, seqx, | |
| 4991 resi_vec1, byresi_opt?byresi_opt:o_opt); | |
| 4992 if (mirror_opt) for (r=0;r<xlen;r++) xa[r][2]=-xa[r][2]; | |
| 4993 make_sec(xa, xlen, secx); // secondary structure assignment | |
| 4994 | |
| 4995 for (j=(dir_opt.size()>0)*(i+1);j<chain2_list.size();j++) | |
| 4996 { | |
| 4997 /* parse chain 2 */ | |
| 4998 if (PDB_lines2.size()==0) | |
| 4999 { | |
| 5000 yname=chain2_list[j]; | |
| 5001 ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2, | |
| 5002 mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt, | |
| 5003 het_opt); | |
| 5004 if (!ychainnum) | |
| 5005 { | |
| 5006 cerr<<"Warning! Cannot parse file: "<<yname | |
| 5007 <<". Chain number 0."<<endl; | |
| 5008 continue; | |
| 5009 } | |
| 5010 } | |
| 5011 for (chain_j=0;chain_j<ychainnum;chain_j++) | |
| 5012 { | |
| 5013 ylen=PDB_lines2[chain_j].size(); | |
| 5014 mol_vec2[chain_j]=-1; | |
| 5015 if (!ylen) | |
| 5016 { | |
| 5017 cerr<<"Warning! Cannot parse file: "<<yname | |
| 5018 <<". Chain length 0."<<endl; | |
| 5019 continue; | |
| 5020 } | |
| 5021 else if (ylen<3) | |
| 5022 { | |
| 5023 cerr<<"Sequence is too short <3!: "<<yname<<endl; | |
| 5024 continue; | |
| 5025 } | |
| 5026 NewArray(&ya, ylen, 3); | |
| 5027 seqy = new char[ylen + 1]; | |
| 5028 secy = new char[ylen + 1]; | |
| 5029 ylen = read_PDB(PDB_lines2[chain_j], ya, seqy, | |
| 5030 resi_vec2, byresi_opt?byresi_opt:o_opt); | |
| 5031 make_sec(ya, ylen, secy); | |
| 5032 if (byresi_opt) extract_aln_from_resi(sequence, | |
| 5033 seqx,seqy,resi_vec1,resi_vec2,byresi_opt); | |
| 5034 | |
| 5035 /* declare variable specific to this pair of TMalign */ | |
| 5036 double t0[3], u0[3][3]; | |
| 5037 double TM1, TM2; | |
| 5038 double TM3, TM4, TM5; // for a_opt, u_opt, d_opt | |
| 5039 double d0_0, TM_0; | |
| 5040 double d0A, d0B, d0u, d0a; | |
| 5041 double d0_out=5.0; | |
| 5042 string seqM, seqxA, seqyA;// for output alignment | |
| 5043 double rmsd0 = 0.0; | |
| 5044 int L_ali; // Aligned length in standard_TMscore | |
| 5045 double Liden=0; | |
| 5046 double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore | |
| 5047 int n_ali=0; | |
| 5048 int n_ali8=0; | |
| 5049 | |
| 5050 /* entry function for structure alignment */ | |
| 5051 if (cp_opt) CPalign_main( | |
| 5052 xa, ya, seqx, seqy, secx, secy, | |
| 5053 t0, u0, TM1, TM2, TM3, TM4, TM5, | |
| 5054 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, | |
| 5055 seqM, seqxA, seqyA, | |
| 5056 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, | |
| 5057 xlen, ylen, sequence, Lnorm_ass, d0_scale, | |
| 5058 i_opt, a_opt, u_opt, d_opt, fast_opt, | |
| 5059 mol_vec1[chain_i]+mol_vec2[chain_j],TMcut); | |
| 5060 else TMalign_main( | |
| 5061 xa, ya, seqx, seqy, secx, secy, | |
| 5062 t0, u0, TM1, TM2, TM3, TM4, TM5, | |
| 5063 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, | |
| 5064 seqM, seqxA, seqyA, | |
| 5065 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8, | |
| 5066 xlen, ylen, sequence, Lnorm_ass, d0_scale, | |
| 5067 i_opt, a_opt, u_opt, d_opt, fast_opt, | |
| 5068 mol_vec1[chain_i]+mol_vec2[chain_j],TMcut); | |
| 5069 | |
| 5070 /* print result */ | |
| 5071 if (outfmt_opt==0) print_version(); | |
| 5072 output_results( | |
| 5073 xname.substr(dir1_opt.size()), | |
| 5074 yname.substr(dir2_opt.size()), | |
| 5075 chainID_list1[chain_i].c_str(), | |
| 5076 chainID_list2[chain_j].c_str(), | |
| 5077 xlen, ylen, t0, u0, TM1, TM2, | |
| 5078 TM3, TM4, TM5, rmsd0, d0_out, | |
| 5079 seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden, | |
| 5080 n_ali8, L_ali, TM_ali, rmsd_ali, | |
| 5081 TM_0, d0_0, d0A, d0B, | |
| 5082 Lnorm_ass, d0_scale, d0a, d0u, | |
| 5083 (m_opt?fname_matrix+chainID_list1[chain_i]:"").c_str(), | |
| 5084 outfmt_opt, ter_opt, | |
| 5085 (o_opt?fname_super+chainID_list1[chain_i]:"").c_str(), | |
| 5086 i_opt, a_opt, u_opt, d_opt,mirror_opt, | |
| 5087 resi_vec1,resi_vec2); | |
| 5088 | |
| 5089 /* Done! Free memory */ | |
| 5090 seqM.clear(); | |
| 5091 seqxA.clear(); | |
| 5092 seqyA.clear(); | |
| 5093 DeleteArray(&ya, ylen); | |
| 5094 delete [] seqy; | |
| 5095 delete [] secy; | |
| 5096 resi_vec2.clear(); | |
| 5097 } // chain_j | |
| 5098 if (chain2_list.size()>1) | |
| 5099 { | |
| 5100 yname.clear(); | |
| 5101 for (chain_j=0;chain_j<ychainnum;chain_j++) | |
| 5102 PDB_lines2[chain_j].clear(); | |
| 5103 PDB_lines2.clear(); | |
| 5104 chainID_list2.clear(); | |
| 5105 mol_vec2.clear(); | |
| 5106 } | |
| 5107 } // j | |
| 5108 PDB_lines1[chain_i].clear(); | |
| 5109 DeleteArray(&xa, xlen); | |
| 5110 delete [] seqx; | |
| 5111 delete [] secx; | |
| 5112 resi_vec1.clear(); | |
| 5113 } // chain_i | |
| 5114 xname.clear(); | |
| 5115 PDB_lines1.clear(); | |
| 5116 chainID_list1.clear(); | |
| 5117 mol_vec1.clear(); | |
| 5118 } // i | |
| 5119 if (chain2_list.size()==1) | |
| 5120 { | |
| 5121 yname.clear(); | |
| 5122 for (chain_j=0;chain_j<ychainnum;chain_j++) | |
| 5123 PDB_lines2[chain_j].clear(); | |
| 5124 PDB_lines2.clear(); | |
| 5125 resi_vec2.clear(); | |
| 5126 chainID_list2.clear(); | |
| 5127 mol_vec2.clear(); | |
| 5128 } | |
| 5129 chain1_list.clear(); | |
| 5130 chain2_list.clear(); | |
| 5131 sequence.clear(); | |
| 5132 | |
| 5133 t2 = clock(); | |
| 5134 float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC; | |
| 5135 printf("Total CPU time is %5.2f seconds\n", diff); | |
| 5136 return 0; | |
| 5137 } |
