comparison spring_package/tmalign/TMalign.cpp @ 17:c790d25086dc draft

"planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
author guerler
date Wed, 28 Oct 2020 05:11:56 +0000
parents
children
comparison
equal deleted inserted replaced
16:16eb2acaaa20 17:c790d25086dc
1 /* TM-align: sequence-independent structure alignment of monomer proteins by
2 * TM-score superposition. Please report issues to yangzhanglab@umich.edu
3 *
4 * References to cite:
5 * Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005)
6 *
7 * DISCLAIMER:
8 * Permission to use, copy, modify, and distribute the Software for any
9 * purpose, with or without fee, is hereby granted, provided that the
10 * notices on the head, the reference information, and this copyright
11 * notice appear in all copies or substantial portions of the Software.
12 * It is provided "as is" without express or implied warranty.
13 *
14 * ==========================
15 * How to install the program
16 * ==========================
17 * The following command compiles the program in your Linux computer:
18 *
19 * g++ -static -O3 -ffast-math -lm -o TMalign TMalign.cpp
20 *
21 * The '-static' flag should be removed on Mac OS, which does not support
22 * building static executables.
23 *
24 * ======================
25 * How to use the program
26 * ======================
27 * You can run the program without argument to obtain the document.
28 * Briefly, you can compare two structures by:
29 *
30 * ./TMalign structure1.pdb structure2.pdb
31 *
32 * ==============
33 * Update history
34 * ==============
35 * 2012/01/24: A C/C++ code of TM-align was constructed by Jianyi Yang
36 * 2016/05/21: Several updates of this program were made by Jianji Wu:
37 * (1) fixed several compiling bugs
38 * (2) made I/O of C/C++ version consistent with the Fortran version
39 * (3) added outputs including full-atom and ligand structures
40 * (4) added options of '-i', '-I' and '-m'
41 * 2016/05/25: Fixed a bug on PDB file reading
42 * 2018/06/04: Several updates were made by Chengxin Zhang, including
43 * (1) Fixed bug in reading PDB files with negative residue index,
44 * (2) Implemented the fTM-align algorithm (by the '-fast' option)
45 * as described in R Dong, S Pan, Z Peng, Y Zhang, J Yang
46 * (2018) Nucleic acids research. gky430.
47 * (3) Included option to perform TM-align against a whole
48 * folder of PDB files. A full list of options not available
49 * in the Fortran version can be explored by TMalign -h
50 * 2018/07/27: Added the -byresi option for TM-score superposition without
51 * re-alignment as in TMscore and TMscore -c
52 * 2018/08/07: Added the -dir option
53 * 2018/08/14: Added the -split option
54 * 2018/08/16: Added the -infmt1, -infmt2 options.
55 * 2019/01/07: Added support for PDBx/mmCIF format.
56 * 2019/02/09: Fixed asymmetric alignment bug.
57 * 2019/03/17: Added the -cp option for circular permutation
58 * 2019/07/23: Supported RasMol output by '-o' option
59 * 2019/07/24: Fixed bug on PyMOL format output by '-o' option with mmCIF input
60 * 2019/08/18: Fixed bug on RasMol format output file *_atm. Removed excessive
61 * circular permutation alignment by -cp
62 * 2019/08/20: Clarified PyMOL syntax.
63 * 2019/08/22: Added four additional PyMOL scripts.
64 */
65 #include <math.h>
66 #include <stdio.h>
67 #include <stdlib.h>
68 #include <time.h>
69 #include <string.h>
70 #include <malloc/malloc.h>
71 #include <sstream>
72 #include <iostream>
73 #include <iomanip>
74 #include <fstream>
75 #include <vector>
76 #include <iterator>
77 #include <algorithm>
78 #include <string>
79 #include <map>
80
81 using namespace std;
82
83 void print_version()
84 {
85 cout <<
86 "\n"
87 " *********************************************************************\n"
88 " * TM-align (Version 20190822): protein structure alignment *\n"
89 " * References: Y Zhang, J Skolnick. Nucl Acids Res 33, 2302-9 (2005) *\n"
90 " * Please email comments and suggestions to yangzhanglab@umich.edu *\n"
91 " *********************************************************************"
92 << endl;
93 }
94
95 void print_extra_help()
96 {
97 cout <<
98 "Additional options:\n"
99 " -dir Perform all-against-all alignment among the list of PDB\n"
100 " chains listed by 'chain_list' under 'chain_folder'. Note\n"
101 " that the slash is necessary.\n"
102 " $ TMalign -dir chain_folder/ chain_list\n"
103 "\n"
104 " -dir1 Use chain2 to search a list of PDB chains listed by 'chain1_list'\n"
105 " under 'chain1_folder'. Note that the slash is necessary.\n"
106 " $ TMalign -dir1 chain1_folder/ chain1_list chain2\n"
107 "\n"
108 " -dir2 Use chain1 to search a list of PDB chains listed by 'chain2_list'\n"
109 " under 'chain2_folder'\n"
110 " $ TMalign chain1 -dir2 chain2_folder/ chain2_list\n"
111 "\n"
112 " -suffix (Only when -dir1 and/or -dir2 are set, default is empty)\n"
113 " add file name suffix to files listed by chain1_list or chain2_list\n"
114 "\n"
115 " -atom 4-character atom name used to represent a residue.\n"
116 " Default is \" CA \" for proteins\n"
117 " (note the spaces before and after CA).\n"
118 "\n"
119 " -ter Strings to mark the end of a chain\n"
120 " 3: (default) TER, ENDMDL, END or different chain ID\n"
121 " 2: ENDMDL, END, or different chain ID\n"
122 " 1: ENDMDL or END\n"
123 " 0: (default in the first C++ TMalign) end of file\n"
124 "\n"
125 " -split Whether to split PDB file into multiple chains\n"
126 " 0: (default) treat the whole structure as one single chain\n"
127 " 1: treat each MODEL as a separate chain (-ter should be 0)\n"
128 " 2: treat each chain as a seperate chain (-ter should be <=1)\n"
129 "\n"
130 " -outfmt Output format\n"
131 " 0: (default) full output\n"
132 " 1: fasta format compact output\n"
133 " 2: tabular format very compact output\n"
134 " -1: full output, but without version or citation information\n"
135 "\n"
136 " -byresi Whether to assume residue index correspondence between the\n"
137 " two structures.\n"
138 " 0: (default) sequence independent alignment\n"
139 " 1: (same as TMscore program) sequence-dependent superposition,\n"
140 " i.e. align by residue index\n"
141 " 2: (same as TMscore -c, should be used with -ter <=1)\n"
142 " align by residue index and chain ID\n"
143 " 3: (similar to TMscore -c, should be used with -ter <=1)\n"
144 " align by residue index and order of chain\n"
145 "\n"
146 " -TMcut -1: (default) do not consider TMcut\n"
147 " Values in [0.5,1): Do not proceed with TM-align for this\n"
148 " structure pair if TM-score is unlikely to reach TMcut.\n"
149 " TMcut is normalized is set by -a option:\n"
150 " -2: normalized by longer structure length\n"
151 " -1: normalized by shorter structure length\n"
152 " 0: (default, same as F) normalized by second structure\n"
153 " 1: same as T, normalized by average structure length\n"
154 "\n"
155 " -mirror Whether to align the mirror image of input structure\n"
156 " 0: (default) do not align mirrored structure\n"
157 " 1: align mirror of chain1 to origin chain2\n"
158 "\n"
159 " -het Whether to align residues marked as 'HETATM' in addition to 'ATOM '\n"
160 " 0: (default) only align 'ATOM ' residues\n"
161 " 1: align both 'ATOM ' and 'HETATM' residues\n"
162 "\n"
163 " -infmt1 Input format for chain1\n"
164 " -infmt2 Input format for chain2\n"
165 " -1: (default) automatically detect PDB or PDBx/mmCIF format\n"
166 " 0: PDB format\n"
167 " 1: SPICKER format\n"
168 " 2: xyz format\n"
169 " 3: PDBx/mmCIF format\n"
170 <<endl;
171 }
172
173 void print_help(bool h_opt=false)
174 {
175 print_version();
176 cout <<
177 "\n"
178 "Usage: TMalign PDB1.pdb PDB2.pdb [Options]\n"
179 "\n"
180 "Options:\n"
181 " -u TM-score normalized by user assigned length (the same as -L)\n"
182 " warning: it should be >= minimum length of the two structures\n"
183 " otherwise, TM-score may be >1\n"
184 "\n"
185 " -a TM-score normalized by the average length of two structures\n"
186 " T or F, (default F)\n"
187 "\n"
188 " -i Start with an alignment specified in fasta file 'align.txt'\n"
189 "\n"
190 " -I Stick to the alignment specified in 'align.txt'\n"
191 "\n"
192 " -m Output TM-align rotation matrix\n"
193 "\n"
194 " -d TM-score scaled by an assigned d0, e.g. 5 Angstroms\n"
195 "\n"
196 " -o Output the superposition to 'TM_sup*'\n"
197 " $ TMalign PDB1.pdb PDB2.pdb -o TM_sup\n"
198 " View superposed C-alpha traces of aligned regions by RasMol or PyMOL:\n"
199 " $ rasmol -script TM_sup\n"
200 " $ pymol -d @TM_sup.pml\n"
201 " View superposed C-alpha traces of all regions:\n"
202 " $ rasmol -script TM_sup_all\n"
203 " $ pymol -d @TM_sup_all.pml\n"
204 " View superposed full-atom structures of aligned regions:\n"
205 " $ rasmol -script TM_sup_atm\n"
206 " $ pymol -d @TM_sup_atm.pml\n"
207 " View superposed full-atom structures of all regions:\n"
208 " $ rasmol -script TM_sup_all_atm\n"
209 " $ pymol -d @TM_sup_all_atm.pml\n"
210 " View superposed full-atom structures and ligands of all regions\n"
211 " $ rasmol -script TM_sup_all_atm_lig\n"
212 " $ pymol -d @TM_sup_all_atm_lig.pml\n"
213 "\n"
214 " -fast Fast but slightly inaccurate alignment by fTM-align algorithm\n"
215 "\n"
216 " -cp Alignment with circular permutation\n"
217 "\n"
218 " -v Print the version of TM-align\n"
219 "\n"
220 " -h Print the full help message, including additional options\n"
221 "\n"
222 " (Options -u, -a, -d, -o will not change the final structure alignment)\n\n"
223 "Example usages:\n"
224 " TMalign PDB1.pdb PDB2.pdb\n"
225 " TMalign PDB1.pdb PDB2.pdb -u 100 -d 5.0\n"
226 " TMalign PDB1.pdb PDB2.pdb -a T -o PDB1.sup\n"
227 " TMalign PDB1.pdb PDB2.pdb -i align.txt\n"
228 " TMalign PDB1.pdb PDB2.pdb -m matrix.txt\n"
229 " TMalign PDB1.pdb PDB2.pdb -fast\n"
230 " TMalign PDB1.pdb PDB2.pdb -cp\n"
231 <<endl;
232
233 if (h_opt) print_extra_help();
234
235 exit(EXIT_SUCCESS);
236 }
237
238
239 /* Functions for the core TMalign algorithm, including the entry function
240 * TMalign_main */
241
242 void PrintErrorAndQuit(const string sErrorString)
243 {
244 cout << sErrorString << endl;
245 exit(1);
246 }
247
248 template <typename T> inline T getmin(const T &a, const T &b)
249 {
250 return b<a?b:a;
251 }
252
253 template <class A> void NewArray(A *** array, int Narray1, int Narray2)
254 {
255 *array=new A* [Narray1];
256 for(int i=0; i<Narray1; i++) *(*array+i)=new A [Narray2];
257 }
258
259 template <class A> void DeleteArray(A *** array, int Narray)
260 {
261 for(int i=0; i<Narray; i++)
262 if(*(*array+i)) delete [] *(*array+i);
263 if(Narray) delete [] (*array);
264 (*array)=NULL;
265 }
266
267 string AAmap(char A)
268 {
269 if (A=='A') return "ALA";
270 if (A=='B') return "ASX";
271 if (A=='C') return "CYS";
272 if (A=='D') return "ASP";
273 if (A=='E') return "GLU";
274 if (A=='F') return "PHE";
275 if (A=='G') return "GLY";
276 if (A=='H') return "HIS";
277 if (A=='I') return "ILE";
278 if (A=='K') return "LYS";
279 if (A=='L') return "LEU";
280 if (A=='M') return "MET";
281 if (A=='N') return "ASN";
282 if (A=='O') return "PYL";
283 if (A=='P') return "PRO";
284 if (A=='Q') return "GLN";
285 if (A=='R') return "ARG";
286 if (A=='S') return "SER";
287 if (A=='T') return "THR";
288 if (A=='U') return "SEC";
289 if (A=='V') return "VAL";
290 if (A=='W') return "TRP";
291 if (A=='Y') return "TYR";
292 if (A=='Z') return "GLX";
293 return "UNK";
294 }
295
296 char AAmap(const string &AA)
297 {
298 if (AA.compare("ALA")==0 || AA.compare("DAL")==0) return 'A';
299 if (AA.compare("ASX")==0) return 'B';
300 if (AA.compare("CYS")==0 || AA.compare("DCY")==0) return 'C';
301 if (AA.compare("ASP")==0 || AA.compare("DAS")==0) return 'D';
302 if (AA.compare("GLU")==0 || AA.compare("DGL")==0) return 'E';
303 if (AA.compare("PHE")==0 || AA.compare("DPN")==0) return 'F';
304 if (AA.compare("GLY")==0) return 'G';
305 if (AA.compare("HIS")==0 || AA.compare("DHI")==0) return 'H';
306 if (AA.compare("ILE")==0 || AA.compare("DIL")==0) return 'I';
307 if (AA.compare("LYS")==0 || AA.compare("DLY")==0) return 'K';
308 if (AA.compare("LEU")==0 || AA.compare("DLE")==0) return 'L';
309 if (AA.compare("MET")==0 || AA.compare("MED")==0 ||
310 AA.compare("MSE")==0) return 'M';
311 if (AA.compare("ASN")==0 || AA.compare("DSG")==0) return 'N';
312 if (AA.compare("PYL")==0) return 'O';
313 if (AA.compare("PRO")==0 || AA.compare("DPR")==0) return 'P';
314 if (AA.compare("GLN")==0 || AA.compare("DGN")==0) return 'Q';
315 if (AA.compare("ARG")==0 || AA.compare("DAR")==0) return 'R';
316 if (AA.compare("SER")==0 || AA.compare("DSN")==0) return 'S';
317 if (AA.compare("THR")==0 || AA.compare("DTH")==0) return 'T';
318 if (AA.compare("SEC")==0) return 'U';
319 if (AA.compare("VAL")==0 || AA.compare("DVA")==0) return 'V';
320 if (AA.compare("TRP")==0 || AA.compare("DTR")==0) return 'W';
321 if (AA.compare("TYR")==0 || AA.compare("DTY")==0) return 'Y';
322 if (AA.compare("GLX")==0) return 'Z';
323 return 'X';
324 }
325
326 /* split a long string into vectors by whitespace
327 * line - input string
328 * line_vec - output vector
329 * delimiter - delimiter */
330 void split(const string &line, vector<string> &line_vec,
331 const char delimiter=' ')
332 {
333 bool within_word = false;
334 for (int pos=0;pos<line.size();pos++)
335 {
336 if (line[pos]==delimiter)
337 {
338 within_word = false;
339 continue;
340 }
341 if (!within_word)
342 {
343 within_word = true;
344 line_vec.push_back("");
345 }
346 line_vec.back()+=line[pos];
347 }
348 }
349
350 /* split a long string into vectors by whitespace, return both whitespaces
351 * and non-whitespaces
352 * line - input string
353 * line_vec - output vector
354 * space_vec - output vector
355 * delimiter - delimiter */
356 void split_white(const string &line, vector<string> &line_vec,
357 vector<string>&white_vec, const char delimiter=' ')
358 {
359 bool within_word = false;
360 for (int pos=0;pos<line.size();pos++)
361 {
362 if (line[pos]==delimiter)
363 {
364 if (within_word==true)
365 {
366 white_vec.push_back("");
367 within_word = false;
368 }
369 white_vec.back()+=delimiter;
370 }
371 else
372 {
373 if (within_word==false)
374 {
375 line_vec.push_back("");
376 within_word = true;
377 }
378 line_vec.back()+=line[pos];
379 }
380 }
381 }
382
383 size_t get_PDB_lines(const string filename,
384 vector<vector<string> >&PDB_lines, vector<string> &chainID_list,
385 vector<int> &mol_vec, const int ter_opt, const int infmt_opt,
386 const string atom_opt, const int split_opt, const int het_opt)
387 {
388 size_t i=0; // resi i.e. atom index
389 string line;
390 char chainID=0;
391 string resi="";
392 bool select_atom=false;
393 size_t model_idx=0;
394 vector<string> tmp_str_vec;
395
396 ifstream fin;
397 fin.open(filename.c_str());
398
399 if (infmt_opt==0||infmt_opt==-1) // PDB format
400 {
401 while (fin.good())
402 {
403 getline(fin, line);
404 if (infmt_opt==-1 && line.compare(0,5,"loop_")==0) // PDBx/mmCIF
405 return get_PDB_lines(filename,PDB_lines,chainID_list,
406 mol_vec, ter_opt, 3, atom_opt, split_opt,het_opt);
407 if (i > 0)
408 {
409 if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
410 else if (ter_opt>=3 && line.compare(0,3,"TER")==0) break;
411 }
412 if (split_opt && line.compare(0,3,"END")==0) chainID=0;
413 if ((line.compare(0, 6, "ATOM ")==0 ||
414 (line.compare(0, 6, "HETATM")==0 && het_opt))
415 && line.size()>=54 && (line[16]==' ' || line[16]=='A'))
416 {
417 if (atom_opt=="auto")
418 select_atom=(line.compare(12,4," CA ")==0);
419 else select_atom=(line.compare(12,4,atom_opt)==0);
420 if (select_atom)
421 {
422 if (!chainID)
423 {
424 chainID=line[21];
425 model_idx++;
426 stringstream i8_stream;
427 i=0;
428 if (split_opt==2) // split by chain
429 {
430 if (chainID==' ')
431 {
432 if (ter_opt>=1) i8_stream << ":_";
433 else i8_stream<<':'<<model_idx<<":_";
434 }
435 else
436 {
437 if (ter_opt>=1) i8_stream << ':' << chainID;
438 else i8_stream<<':'<<model_idx<<':'<<chainID;
439 }
440 chainID_list.push_back(i8_stream.str());
441 }
442 else if (split_opt==1) // split by model
443 {
444 i8_stream << ':' << model_idx;
445 chainID_list.push_back(i8_stream.str());
446 }
447 PDB_lines.push_back(tmp_str_vec);
448 mol_vec.push_back(0);
449 }
450 else if (ter_opt>=2 && chainID!=line[21]) break;
451 if (split_opt==2 && chainID!=line[21])
452 {
453 chainID=line[21];
454 i=0;
455 stringstream i8_stream;
456 if (chainID==' ')
457 {
458 if (ter_opt>=1) i8_stream << ":_";
459 else i8_stream<<':'<<model_idx<<":_";
460 }
461 else
462 {
463 if (ter_opt>=1) i8_stream << ':' << chainID;
464 else i8_stream<<':'<<model_idx<<':'<<chainID;
465 }
466 chainID_list.push_back(i8_stream.str());
467 PDB_lines.push_back(tmp_str_vec);
468 mol_vec.push_back(0);
469 }
470
471 if (resi==line.substr(22,5))
472 cerr<<"Warning! Duplicated residue "<<resi<<endl;
473 resi=line.substr(22,5); // including insertion code
474
475 PDB_lines.back().push_back(line);
476 if (line[17]==' ' && (line[18]=='D'||line[18]==' ')) mol_vec.back()++;
477 else mol_vec.back()--;
478 i++;
479 }
480 }
481 }
482 }
483 else if (infmt_opt==1) // SPICKER format
484 {
485 int L=0;
486 float x,y,z;
487 stringstream i8_stream;
488 while (fin.good())
489 {
490 fin >>L>>x>>y>>z;
491 getline(fin, line);
492 if (!fin.good()) break;
493 model_idx++;
494 stringstream i8_stream;
495 i8_stream << ':' << model_idx;
496 chainID_list.push_back(i8_stream.str());
497 PDB_lines.push_back(tmp_str_vec);
498 mol_vec.push_back(0);
499 for (i=0;i<L;i++)
500 {
501 fin >>x>>y>>z;
502 i8_stream<<"ATOM "<<setw(4)<<i+1<<" CA UNK "<<setw(4)
503 <<i+1<<" "<<setiosflags(ios::fixed)<<setprecision(3)
504 <<setw(8)<<x<<setw(8)<<y<<setw(8)<<z;
505 line=i8_stream.str();
506 i8_stream.str(string());
507 PDB_lines.back().push_back(line);
508 }
509 getline(fin, line);
510 }
511 }
512 else if (infmt_opt==2) // xyz format
513 {
514 int L=0;
515 char A;
516 stringstream i8_stream;
517 while (fin.good())
518 {
519 getline(fin, line);
520 L=atoi(line.c_str());
521 getline(fin, line);
522 for (i=0;i<line.size();i++)
523 if (line[i]==' '||line[i]=='\t') break;
524 if (!fin.good()) break;
525 chainID_list.push_back(':'+line.substr(0,i));
526 PDB_lines.push_back(tmp_str_vec);
527 mol_vec.push_back(0);
528 for (i=0;i<L;i++)
529 {
530 getline(fin, line);
531 i8_stream<<"ATOM "<<setw(4)<<i+1<<" CA "
532 <<AAmap(line[0])<<" "<<setw(4)<<i+1<<" "
533 <<line.substr(2,8)<<line.substr(11,8)<<line.substr(20,8);
534 line=i8_stream.str();
535 i8_stream.str(string());
536 PDB_lines.back().push_back(line);
537 if (line[0]>='a' && line[0]<='z') mol_vec.back()++; // RNA
538 else mol_vec.back()--;
539 }
540 }
541 }
542 else if (infmt_opt==3) // PDBx/mmCIF format
543 {
544 bool loop_ = false; // not reading following content
545 map<string,int> _atom_site;
546 int atom_site_pos;
547 vector<string> line_vec;
548 string alt_id="."; // alternative location indicator
549 string asym_id="."; // this is similar to chainID, except that
550 // chainID is char while asym_id is a string
551 // with possibly multiple char
552 string prev_asym_id="";
553 string AA=""; // residue name
554 string atom="";
555 string prev_resi="";
556 string model_index=""; // the same as model_idx but type is string
557 stringstream i8_stream;
558 while (fin.good())
559 {
560 getline(fin, line);
561 if (line.size()==0) continue;
562 if (loop_) loop_ = line.compare(0,2,"# ");
563 if (!loop_)
564 {
565 if (line.compare(0,5,"loop_")) continue;
566 while(1)
567 {
568 if (fin.good()) getline(fin, line);
569 else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
570 if (line.size()) break;
571 }
572 if (line.compare(0,11,"_atom_site.")) continue;
573
574 loop_=true;
575 _atom_site.clear();
576 atom_site_pos=0;
577 _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
578
579 while(1)
580 {
581 if (fin.good()) getline(fin, line);
582 else PrintErrorAndQuit("ERROR! Unexpected end of "+filename);
583 if (line.size()==0) continue;
584 if (line.compare(0,11,"_atom_site.")) break;
585 _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
586 }
587
588
589 if (_atom_site.count("group_PDB")*
590 _atom_site.count("label_atom_id")*
591 _atom_site.count("label_comp_id")*
592 (_atom_site.count("auth_asym_id")+
593 _atom_site.count("label_asym_id"))*
594 (_atom_site.count("auth_seq_id")+
595 _atom_site.count("label_seq_id"))*
596 _atom_site.count("Cartn_x")*
597 _atom_site.count("Cartn_y")*
598 _atom_site.count("Cartn_z")==0)
599 {
600 loop_ = false;
601 cerr<<"Warning! Missing one of the following _atom_site data items: group_PDB, label_atom_id, label_atom_id, auth_asym_id/label_asym_id, auth_seq_id/label_seq_id, Cartn_x, Cartn_y, Cartn_z"<<endl;
602 continue;
603 }
604 }
605
606 line_vec.clear();
607 split(line,line_vec);
608 if (line_vec[_atom_site["group_PDB"]]!="ATOM" && (het_opt==0 ||
609 line_vec[_atom_site["group_PDB"]]!="HETATM")) continue;
610
611 alt_id=".";
612 if (_atom_site.count("label_alt_id")) // in 39.4 % of entries
613 alt_id=line_vec[_atom_site["label_alt_id"]];
614 if (alt_id!="." && alt_id!="A") continue;
615
616 atom=line_vec[_atom_site["label_atom_id"]];
617 if (atom[0]=='"') atom=atom.substr(1);
618 if (atom.size() && atom[atom.size()-1]=='"')
619 atom=atom.substr(0,atom.size()-1);
620 if (atom.size()==0) continue;
621 if (atom.size()==1) atom=" "+atom+" ";
622 else if (atom.size()==2) atom=" "+atom+" "; // wrong for sidechain H
623 else if (atom.size()==3) atom=" "+atom;
624 else if (atom.size()>=5) continue;
625
626 AA=line_vec[_atom_site["label_comp_id"]]; // residue name
627 if (AA.size()==1) AA=" "+AA;
628 else if (AA.size()==2) AA=" " +AA;
629 else if (AA.size()>=4) continue;
630
631 if (atom_opt=="auto")
632 select_atom=(atom==" CA ");
633 else select_atom=(atom==atom_opt);
634
635 if (!select_atom) continue;
636
637 if (_atom_site.count("auth_asym_id"))
638 asym_id=line_vec[_atom_site["auth_asym_id"]];
639 else asym_id=line_vec[_atom_site["label_asym_id"]];
640 if (asym_id==".") asym_id=" ";
641
642 if (_atom_site.count("pdbx_PDB_model_num") &&
643 model_index!=line_vec[_atom_site["pdbx_PDB_model_num"]])
644 {
645 model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
646 if (PDB_lines.size() && ter_opt>=1) break;
647 if (PDB_lines.size()==0 || split_opt>=1)
648 {
649 PDB_lines.push_back(tmp_str_vec);
650 mol_vec.push_back(0);
651 prev_asym_id=asym_id;
652
653 if (split_opt==1 && ter_opt==0) chainID_list.push_back(
654 ':'+model_index);
655 else if (split_opt==2 && ter_opt==0)
656 chainID_list.push_back(':'+model_index+':'+asym_id);
657 else if (split_opt==2 && ter_opt==1)
658 chainID_list.push_back(':'+asym_id);
659 }
660 }
661
662 if (prev_asym_id!=asym_id)
663 {
664 if (prev_asym_id!="" && ter_opt>=2) break;
665 if (split_opt>=2)
666 {
667 PDB_lines.push_back(tmp_str_vec);
668 mol_vec.push_back(0);
669
670 if (split_opt==1 && ter_opt==0) chainID_list.push_back(
671 ':'+model_index);
672 else if (split_opt==2 && ter_opt==0)
673 chainID_list.push_back(':'+model_index+':'+asym_id);
674 else if (split_opt==2 && ter_opt==1)
675 chainID_list.push_back(':'+asym_id);
676 }
677 }
678 if (prev_asym_id!=asym_id) prev_asym_id=asym_id;
679
680 if (AA[0]==' ' && (AA[1]=='D'||AA[1]==' ')) mol_vec.back()++;
681 else mol_vec.back()--;
682
683 if (_atom_site.count("auth_seq_id"))
684 resi=line_vec[_atom_site["auth_seq_id"]];
685 else resi=line_vec[_atom_site["label_seq_id"]];
686 if (_atom_site.count("pdbx_PDB_ins_code") &&
687 line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
688 resi+=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
689 else resi+=" ";
690
691 if (prev_resi==resi)
692 cerr<<"Warning! Duplicated residue "<<resi<<endl;
693 prev_resi=resi;
694
695 i++;
696 i8_stream<<"ATOM "
697 <<setw(5)<<i<<" "<<atom<<" "<<AA<<" "<<asym_id[0]
698 <<setw(5)<<resi.substr(0,5)<<" "
699 <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
700 <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
701 <<setw(8)<<line_vec[_atom_site["Cartn_z"]];
702 PDB_lines.back().push_back(i8_stream.str());
703 i8_stream.str(string());
704 }
705 _atom_site.clear();
706 line_vec.clear();
707 alt_id.clear();
708 asym_id.clear();
709 AA.clear();
710 }
711
712 fin.close();
713 line.clear();
714 if (!split_opt) chainID_list.push_back("");
715 return PDB_lines.size();
716 }
717
718 /* read fasta file from filename. sequence is stored into FASTA_lines
719 * while sequence name is stored into chainID_list.
720 * if ter_opt >=1, only read the first sequence.
721 * if ter_opt ==0, read all sequences.
722 * if split_opt >=1 and ter_opt ==0, each sequence is a separate entry.
723 * if split_opt ==0 and ter_opt ==0, all sequences are combined into one */
724 size_t get_FASTA_lines(const string filename,
725 vector<vector<string> >&FASTA_lines, vector<string> &chainID_list,
726 vector<int> &mol_vec, const int ter_opt=3, const int split_opt=0)
727 {
728 string line;
729 vector<string> tmp_str_vec;
730 int l;
731
732 ifstream fin;
733 fin.open(filename.c_str());
734
735 while (fin.good())
736 {
737 getline(fin, line);
738 if (line.size()==0 || line[0]=='#') continue;
739
740 if (line[0]=='>')
741 {
742 if (FASTA_lines.size())
743 {
744 if (ter_opt) break;
745 if (split_opt==0) continue;
746 }
747 FASTA_lines.push_back(tmp_str_vec);
748 FASTA_lines.back().push_back("");
749 mol_vec.push_back(0);
750 if (ter_opt==0 && split_opt)
751 {
752 line[0]=':';
753 chainID_list.push_back(line);
754 }
755 else chainID_list.push_back("");
756 }
757 else
758 {
759 FASTA_lines.back()[0]+=line;
760 for (l=0;l<line.size();l++) mol_vec.back()+=
761 ('a'<=line[l] && line[l]<='z')-('A'<=line[l] && line[l]<='Z');
762 }
763 }
764
765 line.clear();
766 fin.close();
767 return FASTA_lines.size();
768 }
769
770
771 /* extract pairwise sequence alignment from residue index vectors,
772 * assuming that "sequence" contains two empty strings.
773 * return length of alignment, including gap. */
774 int extract_aln_from_resi(vector<string> &sequence, char *seqx, char *seqy,
775 const vector<string> resi_vec1, const vector<string> resi_vec2,
776 const int byresi_opt)
777 {
778 sequence.clear();
779 sequence.push_back("");
780 sequence.push_back("");
781
782 int i1=0; // positions in resi_vec1
783 int i2=0; // positions in resi_vec2
784 int xlen=resi_vec1.size();
785 int ylen=resi_vec2.size();
786 map<char,int> chainID_map1;
787 map<char,int> chainID_map2;
788 if (byresi_opt==3)
789 {
790 vector<char> chainID_vec;
791 char chainID;
792 int i;
793 for (i=0;i<xlen;i++)
794 {
795 chainID=resi_vec1[i][5];
796 if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
797 {
798 chainID_vec.push_back(chainID);
799 chainID_map1[chainID]=chainID_vec.size();
800 }
801 }
802 chainID_vec.clear();
803 for (i=0;i<ylen;i++)
804 {
805 chainID=resi_vec2[i][5];
806 if (!chainID_vec.size()|| chainID_vec.back()!=chainID)
807 {
808 chainID_vec.push_back(chainID);
809 chainID_map2[chainID]=chainID_vec.size();
810 }
811 }
812 chainID_vec.clear();
813 }
814 while(i1<xlen && i2<ylen)
815 {
816 if ((byresi_opt<=2 && resi_vec1[i1]==resi_vec2[i2]) || (byresi_opt==3
817 && resi_vec1[i1].substr(0,5)==resi_vec2[i2].substr(0,5)
818 && chainID_map1[resi_vec1[i1][5]]==chainID_map2[resi_vec2[i2][5]]))
819 {
820 sequence[0]+=seqx[i1++];
821 sequence[1]+=seqy[i2++];
822 }
823 else if (atoi(resi_vec1[i1].substr(0,4).c_str())<=
824 atoi(resi_vec2[i2].substr(0,4).c_str()))
825 {
826 sequence[0]+=seqx[i1++];
827 sequence[1]+='-';
828 }
829 else
830 {
831 sequence[0]+='-';
832 sequence[1]+=seqy[i2++];
833 }
834 }
835 chainID_map1.clear();
836 chainID_map2.clear();
837 return sequence[0].size();
838 }
839
840 int read_PDB(const vector<string> &PDB_lines, double **a, char *seq,
841 vector<string> &resi_vec, const int byresi_opt)
842 {
843 int i;
844 for (i=0;i<PDB_lines.size();i++)
845 {
846 a[i][0] = atof(PDB_lines[i].substr(30, 8).c_str());
847 a[i][1] = atof(PDB_lines[i].substr(38, 8).c_str());
848 a[i][2] = atof(PDB_lines[i].substr(46, 8).c_str());
849 seq[i] = AAmap(PDB_lines[i].substr(17, 3));
850
851 if (byresi_opt>=2) resi_vec.push_back(PDB_lines[i].substr(22,5)+
852 PDB_lines[i][21]);
853 if (byresi_opt==1) resi_vec.push_back(PDB_lines[i].substr(22,5));
854 }
855 seq[i]='\0';
856 return i;
857 }
858
859 double dist(double x[3], double y[3])
860 {
861 double d1=x[0]-y[0];
862 double d2=x[1]-y[1];
863 double d3=x[2]-y[2];
864
865 return (d1*d1 + d2*d2 + d3*d3);
866 }
867
868 double dot(double *a, double *b)
869 {
870 return (a[0] * b[0] + a[1] * b[1] + a[2] * b[2]);
871 }
872
873 void transform(double t[3], double u[3][3], double *x, double *x1)
874 {
875 x1[0]=t[0]+dot(&u[0][0], x);
876 x1[1]=t[1]+dot(&u[1][0], x);
877 x1[2]=t[2]+dot(&u[2][0], x);
878 }
879
880 void do_rotation(double **x, double **x1, int len, double t[3], double u[3][3])
881 {
882 for(int i=0; i<len; i++)
883 {
884 transform(t, u, &x[i][0], &x1[i][0]);
885 }
886 }
887
888 /* strip white space at the begining or end of string */
889 string Trim(const string &inputString)
890 {
891 string result = inputString;
892 int idxBegin = inputString.find_first_not_of(" \n\r\t");
893 int idxEnd = inputString.find_last_not_of(" \n\r\t");
894 if (idxBegin >= 0 && idxEnd >= 0)
895 result = inputString.substr(idxBegin, idxEnd + 1 - idxBegin);
896 return result;
897 }
898
899 /* read user specified pairwise alignment from 'fname_lign' to 'sequence'.
900 * This function should only be called by main function, as it will
901 * terminate a program if wrong alignment is given */
902 void read_user_alignment(vector<string>&sequence, const string &fname_lign,
903 const int i_opt)
904 {
905 if (fname_lign == "")
906 PrintErrorAndQuit("Please provide a file name for option -i!");
907 // open alignment file
908 int n_p = 0;// number of structures in alignment file
909 string line;
910
911 ifstream fileIn(fname_lign.c_str());
912 if (fileIn.is_open())
913 {
914 while (fileIn.good())
915 {
916 getline(fileIn, line);
917 if (line.compare(0, 1, ">") == 0)// Flag for a new structure
918 {
919 if (n_p >= 2) break;
920 sequence.push_back("");
921 n_p++;
922 }
923 else if (n_p > 0 && line!="") sequence.back()+=line;
924 }
925 fileIn.close();
926 }
927 else PrintErrorAndQuit("ERROR! Alignment file does not exist.");
928
929 if (n_p < 2)
930 PrintErrorAndQuit("ERROR: Fasta format is wrong, two proteins should be included.");
931 if (sequence[0].size() != sequence[1].size())
932 PrintErrorAndQuit("ERROR! FASTA file is wrong. The length in alignment should be equal for the two aligned proteins.");
933 if (i_opt==3)
934 {
935 int aligned_resNum=0;
936 for (int i=0;i<sequence[0].size();i++)
937 aligned_resNum+=(sequence[0][i]!='-' && sequence[1][i]!='-');
938 if (aligned_resNum<3)
939 PrintErrorAndQuit("ERROR! Superposition is undefined for <3 aligned residues.");
940 }
941 line.clear();
942 return;
943 }
944
945 /* read list of entries from 'name' to 'chain_list'.
946 * dir_opt is the folder name (prefix).
947 * suffix_opt is the file name extension (suffix_opt).
948 * This function should only be called by main function, as it will
949 * terminate a program if wrong alignment is given */
950 void file2chainlist(vector<string>&chain_list, const string &name,
951 const string &dir_opt, const string &suffix_opt)
952 {
953 ifstream fp(name.c_str());
954 if (! fp.is_open())
955 PrintErrorAndQuit(("Can not open file: "+name+'\n').c_str());
956 string line;
957 while (fp.good())
958 {
959 getline(fp, line);
960 if (! line.size()) continue;
961 chain_list.push_back(dir_opt+Trim(line)+suffix_opt);
962 }
963 fp.close();
964 line.clear();
965 }
966
967 /**************************************************************************
968 Implemetation of Kabsch algoritm for finding the best rotation matrix
969 ---------------------------------------------------------------------------
970 x - x(i,m) are coordinates of atom m in set x (input)
971 y - y(i,m) are coordinates of atom m in set y (input)
972 n - n is number of atom pairs (input)
973 mode - 0:calculate rms only (input)
974 1:calculate u,t only (takes medium)
975 2:calculate rms,u,t (takes longer)
976 rms - sum of w*(ux+t-y)**2 over all atom pairs (output)
977 u - u(i,j) is rotation matrix for best superposition (output)
978 t - t(i) is translation vector for best superposition (output)
979 **************************************************************************/
980 bool Kabsch(double **x, double **y, int n, int mode, double *rms,
981 double t[3], double u[3][3])
982 {
983 int i, j, m, m1, l, k;
984 double e0, rms1, d, h, g;
985 double cth, sth, sqrth, p, det, sigma;
986 double xc[3], yc[3];
987 double a[3][3], b[3][3], r[3][3], e[3], rr[6], ss[6];
988 double sqrt3 = 1.73205080756888, tol = 0.01;
989 int ip[] = { 0, 1, 3, 1, 2, 4, 3, 4, 5 };
990 int ip2312[] = { 1, 2, 0, 1 };
991
992 int a_failed = 0, b_failed = 0;
993 double epsilon = 0.00000001;
994
995 //initializtation
996 *rms = 0;
997 rms1 = 0;
998 e0 = 0;
999 double c1[3], c2[3];
1000 double s1[3], s2[3];
1001 double sx[3], sy[3], sz[3];
1002 for (i = 0; i < 3; i++)
1003 {
1004 s1[i] = 0.0;
1005 s2[i] = 0.0;
1006
1007 sx[i] = 0.0;
1008 sy[i] = 0.0;
1009 sz[i] = 0.0;
1010 }
1011
1012 for (i = 0; i<3; i++)
1013 {
1014 xc[i] = 0.0;
1015 yc[i] = 0.0;
1016 t[i] = 0.0;
1017 for (j = 0; j<3; j++)
1018 {
1019 u[i][j] = 0.0;
1020 r[i][j] = 0.0;
1021 a[i][j] = 0.0;
1022 if (i == j)
1023 {
1024 u[i][j] = 1.0;
1025 a[i][j] = 1.0;
1026 }
1027 }
1028 }
1029
1030 if (n<1) return false;
1031
1032 //compute centers for vector sets x, y
1033 for (i = 0; i<n; i++)
1034 {
1035 for (j = 0; j < 3; j++)
1036 {
1037 c1[j] = x[i][j];
1038 c2[j] = y[i][j];
1039
1040 s1[j] += c1[j];
1041 s2[j] += c2[j];
1042 }
1043
1044 for (j = 0; j < 3; j++)
1045 {
1046 sx[j] += c1[0] * c2[j];
1047 sy[j] += c1[1] * c2[j];
1048 sz[j] += c1[2] * c2[j];
1049 }
1050 }
1051 for (i = 0; i < 3; i++)
1052 {
1053 xc[i] = s1[i] / n;
1054 yc[i] = s2[i] / n;
1055 }
1056 if (mode == 2 || mode == 0)
1057 for (int mm = 0; mm < n; mm++)
1058 for (int nn = 0; nn < 3; nn++)
1059 e0 += (x[mm][nn] - xc[nn]) * (x[mm][nn] - xc[nn]) +
1060 (y[mm][nn] - yc[nn]) * (y[mm][nn] - yc[nn]);
1061 for (j = 0; j < 3; j++)
1062 {
1063 r[j][0] = sx[j] - s1[0] * s2[j] / n;
1064 r[j][1] = sy[j] - s1[1] * s2[j] / n;
1065 r[j][2] = sz[j] - s1[2] * s2[j] / n;
1066 }
1067
1068 //compute determinat of matrix r
1069 det = r[0][0] * (r[1][1] * r[2][2] - r[1][2] * r[2][1])\
1070 - r[0][1] * (r[1][0] * r[2][2] - r[1][2] * r[2][0])\
1071 + r[0][2] * (r[1][0] * r[2][1] - r[1][1] * r[2][0]);
1072 sigma = det;
1073
1074 //compute tras(r)*r
1075 m = 0;
1076 for (j = 0; j<3; j++)
1077 {
1078 for (i = 0; i <= j; i++)
1079 {
1080 rr[m] = r[0][i] * r[0][j] + r[1][i] * r[1][j] + r[2][i] * r[2][j];
1081 m++;
1082 }
1083 }
1084
1085 double spur = (rr[0] + rr[2] + rr[5]) / 3.0;
1086 double cof = (((((rr[2] * rr[5] - rr[4] * rr[4]) + rr[0] * rr[5])\
1087 - rr[3] * rr[3]) + rr[0] * rr[2]) - rr[1] * rr[1]) / 3.0;
1088 det = det*det;
1089
1090 for (i = 0; i<3; i++) e[i] = spur;
1091
1092 if (spur>0)
1093 {
1094 d = spur*spur;
1095 h = d - cof;
1096 g = (spur*cof - det) / 2.0 - spur*h;
1097
1098 if (h>0)
1099 {
1100 sqrth = sqrt(h);
1101 d = h*h*h - g*g;
1102 if (d<0.0) d = 0.0;
1103 d = atan2(sqrt(d), -g) / 3.0;
1104 cth = sqrth * cos(d);
1105 sth = sqrth*sqrt3*sin(d);
1106 e[0] = (spur + cth) + cth;
1107 e[1] = (spur - cth) + sth;
1108 e[2] = (spur - cth) - sth;
1109
1110 if (mode != 0)
1111 {//compute a
1112 for (l = 0; l<3; l = l + 2)
1113 {
1114 d = e[l];
1115 ss[0] = (d - rr[2]) * (d - rr[5]) - rr[4] * rr[4];
1116 ss[1] = (d - rr[5]) * rr[1] + rr[3] * rr[4];
1117 ss[2] = (d - rr[0]) * (d - rr[5]) - rr[3] * rr[3];
1118 ss[3] = (d - rr[2]) * rr[3] + rr[1] * rr[4];
1119 ss[4] = (d - rr[0]) * rr[4] + rr[1] * rr[3];
1120 ss[5] = (d - rr[0]) * (d - rr[2]) - rr[1] * rr[1];
1121
1122 if (fabs(ss[0]) <= epsilon) ss[0] = 0.0;
1123 if (fabs(ss[1]) <= epsilon) ss[1] = 0.0;
1124 if (fabs(ss[2]) <= epsilon) ss[2] = 0.0;
1125 if (fabs(ss[3]) <= epsilon) ss[3] = 0.0;
1126 if (fabs(ss[4]) <= epsilon) ss[4] = 0.0;
1127 if (fabs(ss[5]) <= epsilon) ss[5] = 0.0;
1128
1129 if (fabs(ss[0]) >= fabs(ss[2]))
1130 {
1131 j = 0;
1132 if (fabs(ss[0]) < fabs(ss[5])) j = 2;
1133 }
1134 else if (fabs(ss[2]) >= fabs(ss[5])) j = 1;
1135 else j = 2;
1136
1137 d = 0.0;
1138 j = 3 * j;
1139 for (i = 0; i<3; i++)
1140 {
1141 k = ip[i + j];
1142 a[i][l] = ss[k];
1143 d = d + ss[k] * ss[k];
1144 }
1145
1146
1147 //if( d > 0.0 ) d = 1.0 / sqrt(d);
1148 if (d > epsilon) d = 1.0 / sqrt(d);
1149 else d = 0.0;
1150 for (i = 0; i<3; i++) a[i][l] = a[i][l] * d;
1151 }//for l
1152
1153 d = a[0][0] * a[0][2] + a[1][0] * a[1][2] + a[2][0] * a[2][2];
1154 if ((e[0] - e[1]) >(e[1] - e[2]))
1155 {
1156 m1 = 2;
1157 m = 0;
1158 }
1159 else
1160 {
1161 m1 = 0;
1162 m = 2;
1163 }
1164 p = 0;
1165 for (i = 0; i<3; i++)
1166 {
1167 a[i][m1] = a[i][m1] - d*a[i][m];
1168 p = p + a[i][m1] * a[i][m1];
1169 }
1170 if (p <= tol)
1171 {
1172 p = 1.0;
1173 for (i = 0; i<3; i++)
1174 {
1175 if (p < fabs(a[i][m])) continue;
1176 p = fabs(a[i][m]);
1177 j = i;
1178 }
1179 k = ip2312[j];
1180 l = ip2312[j + 1];
1181 p = sqrt(a[k][m] * a[k][m] + a[l][m] * a[l][m]);
1182 if (p > tol)
1183 {
1184 a[j][m1] = 0.0;
1185 a[k][m1] = -a[l][m] / p;
1186 a[l][m1] = a[k][m] / p;
1187 }
1188 else a_failed = 1;
1189 }//if p<=tol
1190 else
1191 {
1192 p = 1.0 / sqrt(p);
1193 for (i = 0; i<3; i++) a[i][m1] = a[i][m1] * p;
1194 }//else p<=tol
1195 if (a_failed != 1)
1196 {
1197 a[0][1] = a[1][2] * a[2][0] - a[1][0] * a[2][2];
1198 a[1][1] = a[2][2] * a[0][0] - a[2][0] * a[0][2];
1199 a[2][1] = a[0][2] * a[1][0] - a[0][0] * a[1][2];
1200 }
1201 }//if(mode!=0)
1202 }//h>0
1203
1204 //compute b anyway
1205 if (mode != 0 && a_failed != 1)//a is computed correctly
1206 {
1207 //compute b
1208 for (l = 0; l<2; l++)
1209 {
1210 d = 0.0;
1211 for (i = 0; i<3; i++)
1212 {
1213 b[i][l] = r[i][0] * a[0][l] +
1214 r[i][1] * a[1][l] + r[i][2] * a[2][l];
1215 d = d + b[i][l] * b[i][l];
1216 }
1217 //if( d > 0 ) d = 1.0 / sqrt(d);
1218 if (d > epsilon) d = 1.0 / sqrt(d);
1219 else d = 0.0;
1220 for (i = 0; i<3; i++) b[i][l] = b[i][l] * d;
1221 }
1222 d = b[0][0] * b[0][1] + b[1][0] * b[1][1] + b[2][0] * b[2][1];
1223 p = 0.0;
1224
1225 for (i = 0; i<3; i++)
1226 {
1227 b[i][1] = b[i][1] - d*b[i][0];
1228 p += b[i][1] * b[i][1];
1229 }
1230
1231 if (p <= tol)
1232 {
1233 p = 1.0;
1234 for (i = 0; i<3; i++)
1235 {
1236 if (p<fabs(b[i][0])) continue;
1237 p = fabs(b[i][0]);
1238 j = i;
1239 }
1240 k = ip2312[j];
1241 l = ip2312[j + 1];
1242 p = sqrt(b[k][0] * b[k][0] + b[l][0] * b[l][0]);
1243 if (p > tol)
1244 {
1245 b[j][1] = 0.0;
1246 b[k][1] = -b[l][0] / p;
1247 b[l][1] = b[k][0] / p;
1248 }
1249 else b_failed = 1;
1250 }//if( p <= tol )
1251 else
1252 {
1253 p = 1.0 / sqrt(p);
1254 for (i = 0; i<3; i++) b[i][1] = b[i][1] * p;
1255 }
1256 if (b_failed != 1)
1257 {
1258 b[0][2] = b[1][0] * b[2][1] - b[1][1] * b[2][0];
1259 b[1][2] = b[2][0] * b[0][1] - b[2][1] * b[0][0];
1260 b[2][2] = b[0][0] * b[1][1] - b[0][1] * b[1][0];
1261 //compute u
1262 for (i = 0; i<3; i++)
1263 for (j = 0; j<3; j++)
1264 u[i][j] = b[i][0] * a[j][0] +
1265 b[i][1] * a[j][1] + b[i][2] * a[j][2];
1266 }
1267
1268 //compute t
1269 for (i = 0; i<3; i++)
1270 t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) -
1271 u[i][2] * xc[2];
1272 }//if(mode!=0 && a_failed!=1)
1273 }//spur>0
1274 else //just compute t and errors
1275 {
1276 //compute t
1277 for (i = 0; i<3; i++)
1278 t[i] = ((yc[i] - u[i][0] * xc[0]) - u[i][1] * xc[1]) -
1279 u[i][2] * xc[2];
1280 }//else spur>0
1281
1282 //compute rms
1283 for (i = 0; i<3; i++)
1284 {
1285 if (e[i] < 0) e[i] = 0;
1286 e[i] = sqrt(e[i]);
1287 }
1288 d = e[2];
1289 if (sigma < 0.0) d = -d;
1290 d = (d + e[1]) + e[0];
1291
1292 if (mode == 2 || mode == 0)
1293 {
1294 rms1 = (e0 - d) - d;
1295 if (rms1 < 0.0) rms1 = 0.0;
1296 }
1297
1298 *rms = rms1;
1299 return true;
1300 }
1301
1302 /* Partial implementation of Needleman-Wunsch (NW) dymanamic programming for
1303 * global alignment. The three NWDP_TM functions below are not complete
1304 * implementation of NW algorithm because gap jumping in the standard Gotoh
1305 * algorithm is not considered. Since the gap opening and gap extension is
1306 * the same, this is not a problem. This code was exploited in TM-align
1307 * because it is about 1.5 times faster than a complete NW implementation.
1308 * Nevertheless, if gap openning != gap extension shall be implemented in
1309 * the future, the Gotoh algorithm must be implemented. In rare scenarios,
1310 * it is also possible to have asymmetric alignment (i.e.
1311 * TMalign A.pdb B.pdb and TMalign B.pdb A.pdb have different TM_A and TM_B
1312 * values) caused by the NWPD_TM implement.
1313 */
1314
1315 /* Input: score[1:len1, 1:len2], and gap_open
1316 * Output: j2i[1:len2] \in {1:len1} U {-1}
1317 * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
1318 void NWDP_TM(double **score, bool **path, double **val,
1319 int len1, int len2, double gap_open, int j2i[])
1320 {
1321
1322 int i, j;
1323 double h, v, d;
1324
1325 //initialization
1326 for(i=0; i<=len1; i++)
1327 {
1328 val[i][0]=0;
1329 //val[i][0]=i*gap_open;
1330 path[i][0]=false; //not from diagonal
1331 }
1332
1333 for(j=0; j<=len2; j++)
1334 {
1335 val[0][j]=0;
1336 //val[0][j]=j*gap_open;
1337 path[0][j]=false; //not from diagonal
1338 j2i[j]=-1; //all are not aligned, only use j2i[1:len2]
1339 }
1340
1341
1342 //decide matrix and path
1343 for(i=1; i<=len1; i++)
1344 {
1345 for(j=1; j<=len2; j++)
1346 {
1347 d=val[i-1][j-1]+score[i][j]; //diagonal
1348
1349 //symbol insertion in horizontal (= a gap in vertical)
1350 h=val[i-1][j];
1351 if(path[i-1][j]) h += gap_open; //aligned in last position
1352
1353 //symbol insertion in vertical
1354 v=val[i][j-1];
1355 if(path[i][j-1]) v += gap_open; //aligned in last position
1356
1357
1358 if(d>=h && d>=v)
1359 {
1360 path[i][j]=true; //from diagonal
1361 val[i][j]=d;
1362 }
1363 else
1364 {
1365 path[i][j]=false; //from horizontal
1366 if(v>=h) val[i][j]=v;
1367 else val[i][j]=h;
1368 }
1369 } //for i
1370 } //for j
1371
1372 //trace back to extract the alignment
1373 i=len1;
1374 j=len2;
1375 while(i>0 && j>0)
1376 {
1377 if(path[i][j]) //from diagonal
1378 {
1379 j2i[j-1]=i-1;
1380 i--;
1381 j--;
1382 }
1383 else
1384 {
1385 h=val[i-1][j];
1386 if(path[i-1][j]) h +=gap_open;
1387
1388 v=val[i][j-1];
1389 if(path[i][j-1]) v +=gap_open;
1390
1391 if(v>=h) j--;
1392 else i--;
1393 }
1394 }
1395 }
1396
1397 /* Input: vectors x, y, rotation matrix t, u, scale factor d02, and gap_open
1398 * Output: j2i[1:len2] \in {1:len1} U {-1}
1399 * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
1400 void NWDP_TM(bool **path, double **val, double **x, double **y,
1401 int len1, int len2, double t[3], double u[3][3],
1402 double d02, double gap_open, int j2i[])
1403 {
1404 int i, j;
1405 double h, v, d;
1406
1407 //initialization. use old val[i][0] and val[0][j] initialization
1408 //to minimize difference from TMalign fortran version
1409 for(i=0; i<=len1; i++)
1410 {
1411 val[i][0]=0;
1412 //val[i][0]=i*gap_open;
1413 path[i][0]=false; //not from diagonal
1414 }
1415
1416 for(j=0; j<=len2; j++)
1417 {
1418 val[0][j]=0;
1419 //val[0][j]=j*gap_open;
1420 path[0][j]=false; //not from diagonal
1421 j2i[j]=-1; //all are not aligned, only use j2i[1:len2]
1422 }
1423 double xx[3], dij;
1424
1425
1426 //decide matrix and path
1427 for(i=1; i<=len1; i++)
1428 {
1429 transform(t, u, &x[i-1][0], xx);
1430 for(j=1; j<=len2; j++)
1431 {
1432 dij=dist(xx, &y[j-1][0]);
1433 d=val[i-1][j-1] + 1.0/(1+dij/d02);
1434
1435 //symbol insertion in horizontal (= a gap in vertical)
1436 h=val[i-1][j];
1437 if(path[i-1][j]) h += gap_open; //aligned in last position
1438
1439 //symbol insertion in vertical
1440 v=val[i][j-1];
1441 if(path[i][j-1]) v += gap_open; //aligned in last position
1442
1443
1444 if(d>=h && d>=v)
1445 {
1446 path[i][j]=true; //from diagonal
1447 val[i][j]=d;
1448 }
1449 else
1450 {
1451 path[i][j]=false; //from horizontal
1452 if(v>=h) val[i][j]=v;
1453 else val[i][j]=h;
1454 }
1455 } //for i
1456 } //for j
1457
1458 //trace back to extract the alignment
1459 i=len1;
1460 j=len2;
1461 while(i>0 && j>0)
1462 {
1463 if(path[i][j]) //from diagonal
1464 {
1465 j2i[j-1]=i-1;
1466 i--;
1467 j--;
1468 }
1469 else
1470 {
1471 h=val[i-1][j];
1472 if(path[i-1][j]) h +=gap_open;
1473
1474 v=val[i][j-1];
1475 if(path[i][j-1]) v +=gap_open;
1476
1477 if(v>=h) j--;
1478 else i--;
1479 }
1480 }
1481 }
1482
1483 /* This is the same as the previous NWDP_TM, except for the lack of rotation
1484 * Input: vectors x, y, scale factor d02, and gap_open
1485 * Output: j2i[1:len2] \in {1:len1} U {-1}
1486 * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
1487 void NWDP_SE(bool **path, double **val, double **x, double **y,
1488 int len1, int len2, double d02, double gap_open, int j2i[])
1489 {
1490 int i, j;
1491 double h, v, d;
1492
1493 for(i=0; i<=len1; i++)
1494 {
1495 val[i][0]=0;
1496 path[i][0]=false; //not from diagonal
1497 }
1498
1499 for(j=0; j<=len2; j++)
1500 {
1501 val[0][j]=0;
1502 path[0][j]=false; //not from diagonal
1503 j2i[j]=-1; //all are not aligned, only use j2i[1:len2]
1504 }
1505 double dij;
1506
1507 //decide matrix and path
1508 for(i=1; i<=len1; i++)
1509 {
1510 for(j=1; j<=len2; j++)
1511 {
1512 dij=dist(&x[i-1][0], &y[j-1][0]);
1513 d=val[i-1][j-1] + 1.0/(1+dij/d02);
1514
1515 //symbol insertion in horizontal (= a gap in vertical)
1516 h=val[i-1][j];
1517 if(path[i-1][j]) h += gap_open; //aligned in last position
1518
1519 //symbol insertion in vertical
1520 v=val[i][j-1];
1521 if(path[i][j-1]) v += gap_open; //aligned in last position
1522
1523
1524 if(d>=h && d>=v)
1525 {
1526 path[i][j]=true; //from diagonal
1527 val[i][j]=d;
1528 }
1529 else
1530 {
1531 path[i][j]=false; //from horizontal
1532 if(v>=h) val[i][j]=v;
1533 else val[i][j]=h;
1534 }
1535 } //for i
1536 } //for j
1537
1538 //trace back to extract the alignment
1539 i=len1;
1540 j=len2;
1541 while(i>0 && j>0)
1542 {
1543 if(path[i][j]) //from diagonal
1544 {
1545 j2i[j-1]=i-1;
1546 i--;
1547 j--;
1548 }
1549 else
1550 {
1551 h=val[i-1][j];
1552 if(path[i-1][j]) h +=gap_open;
1553
1554 v=val[i][j-1];
1555 if(path[i][j-1]) v +=gap_open;
1556
1557 if(v>=h) j--;
1558 else i--;
1559 }
1560 }
1561 }
1562
1563 /* +ss
1564 * Input: secondary structure secx, secy, and gap_open
1565 * Output: j2i[1:len2] \in {1:len1} U {-1}
1566 * path[0:len1, 0:len2]=1,2,3, from diagonal, horizontal, vertical */
1567 void NWDP_TM(bool **path, double **val, const char *secx, const char *secy,
1568 const int len1, const int len2, const double gap_open, int j2i[])
1569 {
1570
1571 int i, j;
1572 double h, v, d;
1573
1574 //initialization
1575 for(i=0; i<=len1; i++)
1576 {
1577 val[i][0]=0;
1578 //val[i][0]=i*gap_open;
1579 path[i][0]=false; //not from diagonal
1580 }
1581
1582 for(j=0; j<=len2; j++)
1583 {
1584 val[0][j]=0;
1585 //val[0][j]=j*gap_open;
1586 path[0][j]=false; //not from diagonal
1587 j2i[j]=-1; //all are not aligned, only use j2i[1:len2]
1588 }
1589
1590 //decide matrix and path
1591 for(i=1; i<=len1; i++)
1592 {
1593 for(j=1; j<=len2; j++)
1594 {
1595 d=val[i-1][j-1] + 1.0*(secx[i-1]==secy[j-1]);
1596
1597 //symbol insertion in horizontal (= a gap in vertical)
1598 h=val[i-1][j];
1599 if(path[i-1][j]) h += gap_open; //aligned in last position
1600
1601 //symbol insertion in vertical
1602 v=val[i][j-1];
1603 if(path[i][j-1]) v += gap_open; //aligned in last position
1604
1605 if(d>=h && d>=v)
1606 {
1607 path[i][j]=true; //from diagonal
1608 val[i][j]=d;
1609 }
1610 else
1611 {
1612 path[i][j]=false; //from horizontal
1613 if(v>=h) val[i][j]=v;
1614 else val[i][j]=h;
1615 }
1616 } //for i
1617 } //for j
1618
1619 //trace back to extract the alignment
1620 i=len1;
1621 j=len2;
1622 while(i>0 && j>0)
1623 {
1624 if(path[i][j]) //from diagonal
1625 {
1626 j2i[j-1]=i-1;
1627 i--;
1628 j--;
1629 }
1630 else
1631 {
1632 h=val[i-1][j];
1633 if(path[i-1][j]) h +=gap_open;
1634
1635 v=val[i][j-1];
1636 if(path[i][j-1]) v +=gap_open;
1637
1638 if(v>=h) j--;
1639 else i--;
1640 }
1641 }
1642 }
1643
1644 void parameter_set4search(const int xlen, const int ylen,
1645 double &D0_MIN, double &Lnorm,
1646 double &score_d8, double &d0, double &d0_search, double &dcu0)
1647 {
1648 //parameter initilization for searching: D0_MIN, Lnorm, d0, d0_search, score_d8
1649 D0_MIN=0.5;
1650 dcu0=4.25; //update 3.85-->4.25
1651
1652 Lnorm=getmin(xlen, ylen); //normaliz TMscore by this in searching
1653 if (Lnorm<=19) //update 15-->19
1654 d0=0.168; //update 0.5-->0.168
1655 else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8);
1656 D0_MIN=d0+0.8; //this should be moved to above
1657 d0=D0_MIN; //update: best for search
1658
1659 d0_search=d0;
1660 if (d0_search>8) d0_search=8;
1661 if (d0_search<4.5) d0_search=4.5;
1662
1663 score_d8=1.5*pow(Lnorm*1.0, 0.3)+3.5; //remove pairs with dis>d8 during search & final
1664 }
1665
1666 void parameter_set4final_C3prime(const double len, double &D0_MIN,
1667 double &Lnorm, double &d0, double &d0_search)
1668 {
1669 D0_MIN=0.3;
1670
1671 Lnorm=len; //normaliz TMscore by this in searching
1672 if(Lnorm<=11) d0=0.3;
1673 else if(Lnorm>11&&Lnorm<=15) d0=0.4;
1674 else if(Lnorm>15&&Lnorm<=19) d0=0.5;
1675 else if(Lnorm>19&&Lnorm<=23) d0=0.6;
1676 else if(Lnorm>23&&Lnorm<30) d0=0.7;
1677 else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5);
1678
1679 d0_search=d0;
1680 if (d0_search>8) d0_search=8;
1681 if (d0_search<4.5) d0_search=4.5;
1682 }
1683
1684 void parameter_set4final(const double len, double &D0_MIN, double &Lnorm,
1685 double &d0, double &d0_search, const int mol_type)
1686 {
1687 if (mol_type>0) // RNA
1688 {
1689 parameter_set4final_C3prime(len, D0_MIN, Lnorm,
1690 d0, d0_search);
1691 return;
1692 }
1693 D0_MIN=0.5;
1694
1695 Lnorm=len; //normaliz TMscore by this in searching
1696 if (Lnorm<=21) d0=0.5;
1697 else d0=(1.24*pow((Lnorm*1.0-15), 1.0/3)-1.8);
1698 if (d0<D0_MIN) d0=D0_MIN;
1699 d0_search=d0;
1700 if (d0_search>8) d0_search=8;
1701 if (d0_search<4.5) d0_search=4.5;
1702 }
1703
1704 void parameter_set4scale(const int len, const double d_s, double &Lnorm,
1705 double &d0, double &d0_search)
1706 {
1707 d0=d_s;
1708 Lnorm=len; //normaliz TMscore by this in searching
1709 d0_search=d0;
1710 if (d0_search>8) d0_search=8;
1711 if (d0_search<4.5) d0_search=4.5;
1712 }
1713
1714 // 1, collect those residues with dis<d;
1715 // 2, calculate TMscore
1716 int score_fun8( double **xa, double **ya, int n_ali, double d, int i_ali[],
1717 double *score1, int score_sum_method, const double Lnorm,
1718 const double score_d8, const double d0)
1719 {
1720 double score_sum=0, di;
1721 double d_tmp=d*d;
1722 double d02=d0*d0;
1723 double score_d8_cut = score_d8*score_d8;
1724
1725 int i, n_cut, inc=0;
1726
1727 while(1)
1728 {
1729 n_cut=0;
1730 score_sum=0;
1731 for(i=0; i<n_ali; i++)
1732 {
1733 di = dist(xa[i], ya[i]);
1734 if(di<d_tmp)
1735 {
1736 i_ali[n_cut]=i;
1737 n_cut++;
1738 }
1739 if(score_sum_method==8)
1740 {
1741 if(di<=score_d8_cut) score_sum += 1/(1+di/d02);
1742 }
1743 else score_sum += 1/(1+di/d02);
1744 }
1745 //there are not enough feasible pairs, reliefe the threshold
1746 if(n_cut<3 && n_ali>3)
1747 {
1748 inc++;
1749 double dinc=(d+inc*0.5);
1750 d_tmp = dinc * dinc;
1751 }
1752 else break;
1753 }
1754
1755 *score1=score_sum/Lnorm;
1756 return n_cut;
1757 }
1758
1759 int score_fun8_standard(double **xa, double **ya, int n_ali, double d,
1760 int i_ali[], double *score1, int score_sum_method,
1761 double score_d8, double d0)
1762 {
1763 double score_sum = 0, di;
1764 double d_tmp = d*d;
1765 double d02 = d0*d0;
1766 double score_d8_cut = score_d8*score_d8;
1767
1768 int i, n_cut, inc = 0;
1769 while (1)
1770 {
1771 n_cut = 0;
1772 score_sum = 0;
1773 for (i = 0; i<n_ali; i++)
1774 {
1775 di = dist(xa[i], ya[i]);
1776 if (di<d_tmp)
1777 {
1778 i_ali[n_cut] = i;
1779 n_cut++;
1780 }
1781 if (score_sum_method == 8)
1782 {
1783 if (di <= score_d8_cut) score_sum += 1 / (1 + di / d02);
1784 }
1785 else
1786 {
1787 score_sum += 1 / (1 + di / d02);
1788 }
1789 }
1790 //there are not enough feasible pairs, reliefe the threshold
1791 if (n_cut<3 && n_ali>3)
1792 {
1793 inc++;
1794 double dinc = (d + inc*0.5);
1795 d_tmp = dinc * dinc;
1796 }
1797 else break;
1798 }
1799
1800 *score1 = score_sum / n_ali;
1801 return n_cut;
1802 }
1803
1804 double TMscore8_search(double **r1, double **r2, double **xtm, double **ytm,
1805 double **xt, int Lali, double t0[3], double u0[3][3], int simplify_step,
1806 int score_sum_method, double *Rcomm, double local_d0_search, double Lnorm,
1807 double score_d8, double d0)
1808 {
1809 int i, m;
1810 double score_max, score, rmsd;
1811 const int kmax=Lali;
1812 int k_ali[kmax], ka, k;
1813 double t[3];
1814 double u[3][3];
1815 double d;
1816
1817
1818 //iterative parameters
1819 int n_it=20; //maximum number of iterations
1820 int n_init_max=6; //maximum number of different fragment length
1821 int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4
1822 int L_ini_min=4;
1823 if(Lali<L_ini_min) L_ini_min=Lali;
1824
1825 int n_init=0, i_init;
1826 for(i=0; i<n_init_max-1; i++)
1827 {
1828 n_init++;
1829 L_ini[i]=(int) (Lali/pow(2.0, (double) i));
1830 if(L_ini[i]<=L_ini_min)
1831 {
1832 L_ini[i]=L_ini_min;
1833 break;
1834 }
1835 }
1836 if(i==n_init_max-1)
1837 {
1838 n_init++;
1839 L_ini[i]=L_ini_min;
1840 }
1841
1842 score_max=-1;
1843 //find the maximum score starting from local structures superposition
1844 int i_ali[kmax], n_cut;
1845 int L_frag; //fragment length
1846 int iL_max; //maximum starting postion for the fragment
1847
1848 for(i_init=0; i_init<n_init; i_init++)
1849 {
1850 L_frag=L_ini[i_init];
1851 iL_max=Lali-L_frag;
1852
1853 i=0;
1854 while(1)
1855 {
1856 //extract the fragment starting from position i
1857 ka=0;
1858 for(k=0; k<L_frag; k++)
1859 {
1860 int kk=k+i;
1861 r1[k][0]=xtm[kk][0];
1862 r1[k][1]=xtm[kk][1];
1863 r1[k][2]=xtm[kk][2];
1864
1865 r2[k][0]=ytm[kk][0];
1866 r2[k][1]=ytm[kk][1];
1867 r2[k][2]=ytm[kk][2];
1868
1869 k_ali[ka]=kk;
1870 ka++;
1871 }
1872
1873 //extract rotation matrix based on the fragment
1874 Kabsch(r1, r2, L_frag, 1, &rmsd, t, u);
1875 if (simplify_step != 1)
1876 *Rcomm = 0;
1877 do_rotation(xtm, xt, Lali, t, u);
1878
1879 //get subsegment of this fragment
1880 d = local_d0_search - 1;
1881 n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score,
1882 score_sum_method, Lnorm, score_d8, d0);
1883 if(score>score_max)
1884 {
1885 score_max=score;
1886
1887 //save the rotation matrix
1888 for(k=0; k<3; k++)
1889 {
1890 t0[k]=t[k];
1891 u0[k][0]=u[k][0];
1892 u0[k][1]=u[k][1];
1893 u0[k][2]=u[k][2];
1894 }
1895 }
1896
1897 //try to extend the alignment iteratively
1898 d = local_d0_search + 1;
1899 for(int it=0; it<n_it; it++)
1900 {
1901 ka=0;
1902 for(k=0; k<n_cut; k++)
1903 {
1904 m=i_ali[k];
1905 r1[k][0]=xtm[m][0];
1906 r1[k][1]=xtm[m][1];
1907 r1[k][2]=xtm[m][2];
1908
1909 r2[k][0]=ytm[m][0];
1910 r2[k][1]=ytm[m][1];
1911 r2[k][2]=ytm[m][2];
1912
1913 k_ali[ka]=m;
1914 ka++;
1915 }
1916 //extract rotation matrix based on the fragment
1917 Kabsch(r1, r2, n_cut, 1, &rmsd, t, u);
1918 do_rotation(xtm, xt, Lali, t, u);
1919 n_cut=score_fun8(xt, ytm, Lali, d, i_ali, &score,
1920 score_sum_method, Lnorm, score_d8, d0);
1921 if(score>score_max)
1922 {
1923 score_max=score;
1924
1925 //save the rotation matrix
1926 for(k=0; k<3; k++)
1927 {
1928 t0[k]=t[k];
1929 u0[k][0]=u[k][0];
1930 u0[k][1]=u[k][1];
1931 u0[k][2]=u[k][2];
1932 }
1933 }
1934
1935 //check if it converges
1936 if(n_cut==ka)
1937 {
1938 for(k=0; k<n_cut; k++)
1939 {
1940 if(i_ali[k]!=k_ali[k]) break;
1941 }
1942 if(k==n_cut) break;
1943 }
1944 } //for iteration
1945
1946 if(i<iL_max)
1947 {
1948 i=i+simplify_step; //shift the fragment
1949 if(i>iL_max) i=iL_max; //do this to use the last missed fragment
1950 }
1951 else if(i>=iL_max) break;
1952 }//while(1)
1953 //end of one fragment
1954 }//for(i_init
1955 return score_max;
1956 }
1957
1958
1959 double TMscore8_search_standard( double **r1, double **r2,
1960 double **xtm, double **ytm, double **xt, int Lali,
1961 double t0[3], double u0[3][3], int simplify_step, int score_sum_method,
1962 double *Rcomm, double local_d0_search, double score_d8, double d0)
1963 {
1964 int i, m;
1965 double score_max, score, rmsd;
1966 const int kmax = Lali;
1967 int k_ali[kmax], ka, k;
1968 double t[3];
1969 double u[3][3];
1970 double d;
1971
1972 //iterative parameters
1973 int n_it = 20; //maximum number of iterations
1974 int n_init_max = 6; //maximum number of different fragment length
1975 int L_ini[n_init_max]; //fragment lengths, Lali, Lali/2, Lali/4 ... 4
1976 int L_ini_min = 4;
1977 if (Lali<L_ini_min) L_ini_min = Lali;
1978
1979 int n_init = 0, i_init;
1980 for (i = 0; i<n_init_max - 1; i++)
1981 {
1982 n_init++;
1983 L_ini[i] = (int)(Lali / pow(2.0, (double)i));
1984 if (L_ini[i] <= L_ini_min)
1985 {
1986 L_ini[i] = L_ini_min;
1987 break;
1988 }
1989 }
1990 if (i == n_init_max - 1)
1991 {
1992 n_init++;
1993 L_ini[i] = L_ini_min;
1994 }
1995
1996 score_max = -1;
1997 //find the maximum score starting from local structures superposition
1998 int i_ali[kmax], n_cut;
1999 int L_frag; //fragment length
2000 int iL_max; //maximum starting postion for the fragment
2001
2002 for (i_init = 0; i_init<n_init; i_init++)
2003 {
2004 L_frag = L_ini[i_init];
2005 iL_max = Lali - L_frag;
2006
2007 i = 0;
2008 while (1)
2009 {
2010 //extract the fragment starting from position i
2011 ka = 0;
2012 for (k = 0; k<L_frag; k++)
2013 {
2014 int kk = k + i;
2015 r1[k][0] = xtm[kk][0];
2016 r1[k][1] = xtm[kk][1];
2017 r1[k][2] = xtm[kk][2];
2018
2019 r2[k][0] = ytm[kk][0];
2020 r2[k][1] = ytm[kk][1];
2021 r2[k][2] = ytm[kk][2];
2022
2023 k_ali[ka] = kk;
2024 ka++;
2025 }
2026 //extract rotation matrix based on the fragment
2027 Kabsch(r1, r2, L_frag, 1, &rmsd, t, u);
2028 if (simplify_step != 1)
2029 *Rcomm = 0;
2030 do_rotation(xtm, xt, Lali, t, u);
2031
2032 //get subsegment of this fragment
2033 d = local_d0_search - 1;
2034 n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score,
2035 score_sum_method, score_d8, d0);
2036
2037 if (score>score_max)
2038 {
2039 score_max = score;
2040
2041 //save the rotation matrix
2042 for (k = 0; k<3; k++)
2043 {
2044 t0[k] = t[k];
2045 u0[k][0] = u[k][0];
2046 u0[k][1] = u[k][1];
2047 u0[k][2] = u[k][2];
2048 }
2049 }
2050
2051 //try to extend the alignment iteratively
2052 d = local_d0_search + 1;
2053 for (int it = 0; it<n_it; it++)
2054 {
2055 ka = 0;
2056 for (k = 0; k<n_cut; k++)
2057 {
2058 m = i_ali[k];
2059 r1[k][0] = xtm[m][0];
2060 r1[k][1] = xtm[m][1];
2061 r1[k][2] = xtm[m][2];
2062
2063 r2[k][0] = ytm[m][0];
2064 r2[k][1] = ytm[m][1];
2065 r2[k][2] = ytm[m][2];
2066
2067 k_ali[ka] = m;
2068 ka++;
2069 }
2070 //extract rotation matrix based on the fragment
2071 Kabsch(r1, r2, n_cut, 1, &rmsd, t, u);
2072 do_rotation(xtm, xt, Lali, t, u);
2073 n_cut = score_fun8_standard(xt, ytm, Lali, d, i_ali, &score,
2074 score_sum_method, score_d8, d0);
2075 if (score>score_max)
2076 {
2077 score_max = score;
2078
2079 //save the rotation matrix
2080 for (k = 0; k<3; k++)
2081 {
2082 t0[k] = t[k];
2083 u0[k][0] = u[k][0];
2084 u0[k][1] = u[k][1];
2085 u0[k][2] = u[k][2];
2086 }
2087 }
2088
2089 //check if it converges
2090 if (n_cut == ka)
2091 {
2092 for (k = 0; k<n_cut; k++)
2093 {
2094 if (i_ali[k] != k_ali[k]) break;
2095 }
2096 if (k == n_cut) break;
2097 }
2098 } //for iteration
2099
2100 if (i<iL_max)
2101 {
2102 i = i + simplify_step; //shift the fragment
2103 if (i>iL_max) i = iL_max; //do this to use the last missed fragment
2104 }
2105 else if (i >= iL_max) break;
2106 }//while(1)
2107 //end of one fragment
2108 }//for(i_init
2109 return score_max;
2110 }
2111
2112 //Comprehensive TMscore search engine
2113 // input: two vector sets: x, y
2114 // an alignment invmap0[] between x and y
2115 // simplify_step: 1 or 40 or other integers
2116 // score_sum_method: 0 for score over all pairs
2117 // 8 for socre over the pairs with dist<score_d8
2118 // output: the best rotaion matrix t, u that results in highest TMscore
2119 double detailed_search(double **r1, double **r2, double **xtm, double **ytm,
2120 double **xt, double **x, double **y, int xlen, int ylen,
2121 int invmap0[], double t[3], double u[3][3], int simplify_step,
2122 int score_sum_method, double local_d0_search, double Lnorm,
2123 double score_d8, double d0)
2124 {
2125 //x is model, y is template, try to superpose onto y
2126 int i, j, k;
2127 double tmscore;
2128 double rmsd;
2129
2130 k=0;
2131 for(i=0; i<ylen; i++)
2132 {
2133 j=invmap0[i];
2134 if(j>=0) //aligned
2135 {
2136 xtm[k][0]=x[j][0];
2137 xtm[k][1]=x[j][1];
2138 xtm[k][2]=x[j][2];
2139
2140 ytm[k][0]=y[i][0];
2141 ytm[k][1]=y[i][1];
2142 ytm[k][2]=y[i][2];
2143 k++;
2144 }
2145 }
2146
2147 //detailed search 40-->1
2148 tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u, simplify_step,
2149 score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
2150 return tmscore;
2151 }
2152
2153 double detailed_search_standard( double **r1, double **r2,
2154 double **xtm, double **ytm, double **xt, double **x, double **y,
2155 int xlen, int ylen, int invmap0[], double t[3], double u[3][3],
2156 int simplify_step, int score_sum_method, double local_d0_search,
2157 const bool& bNormalize, double Lnorm, double score_d8, double d0)
2158 {
2159 //x is model, y is template, try to superpose onto y
2160 int i, j, k;
2161 double tmscore;
2162 double rmsd;
2163
2164 k=0;
2165 for(i=0; i<ylen; i++)
2166 {
2167 j=invmap0[i];
2168 if(j>=0) //aligned
2169 {
2170 xtm[k][0]=x[j][0];
2171 xtm[k][1]=x[j][1];
2172 xtm[k][2]=x[j][2];
2173
2174 ytm[k][0]=y[i][0];
2175 ytm[k][1]=y[i][1];
2176 ytm[k][2]=y[i][2];
2177 k++;
2178 }
2179 }
2180
2181 //detailed search 40-->1
2182 tmscore = TMscore8_search_standard( r1, r2, xtm, ytm, xt, k, t, u,
2183 simplify_step, score_sum_method, &rmsd, local_d0_search, score_d8, d0);
2184 if (bNormalize)// "-i", to use standard_TMscore, then bNormalize=true, else bNormalize=false;
2185 tmscore = tmscore * k / Lnorm;
2186
2187 return tmscore;
2188 }
2189
2190 //compute the score quickly in three iterations
2191 double get_score_fast( double **r1, double **r2, double **xtm, double **ytm,
2192 double **x, double **y, int xlen, int ylen, int invmap[],
2193 double d0, double d0_search, double t[3], double u[3][3])
2194 {
2195 double rms, tmscore, tmscore1, tmscore2;
2196 int i, j, k;
2197
2198 k=0;
2199 for(j=0; j<ylen; j++)
2200 {
2201 i=invmap[j];
2202 if(i>=0)
2203 {
2204 r1[k][0]=x[i][0];
2205 r1[k][1]=x[i][1];
2206 r1[k][2]=x[i][2];
2207
2208 r2[k][0]=y[j][0];
2209 r2[k][1]=y[j][1];
2210 r2[k][2]=y[j][2];
2211
2212 xtm[k][0]=x[i][0];
2213 xtm[k][1]=x[i][1];
2214 xtm[k][2]=x[i][2];
2215
2216 ytm[k][0]=y[j][0];
2217 ytm[k][1]=y[j][1];
2218 ytm[k][2]=y[j][2];
2219
2220 k++;
2221 }
2222 else if(i!=-1) PrintErrorAndQuit("Wrong map!\n");
2223 }
2224 Kabsch(r1, r2, k, 1, &rms, t, u);
2225
2226 //evaluate score
2227 double di;
2228 const int len=k;
2229 double dis[len];
2230 double d00=d0_search;
2231 double d002=d00*d00;
2232 double d02=d0*d0;
2233
2234 int n_ali=k;
2235 double xrot[3];
2236 tmscore=0;
2237 for(k=0; k<n_ali; k++)
2238 {
2239 transform(t, u, &xtm[k][0], xrot);
2240 di=dist(xrot, &ytm[k][0]);
2241 dis[k]=di;
2242 tmscore += 1/(1+di/d02);
2243 }
2244
2245
2246
2247 //second iteration
2248 double d002t=d002;
2249 while(1)
2250 {
2251 j=0;
2252 for(k=0; k<n_ali; k++)
2253 {
2254 if(dis[k]<=d002t)
2255 {
2256 r1[j][0]=xtm[k][0];
2257 r1[j][1]=xtm[k][1];
2258 r1[j][2]=xtm[k][2];
2259
2260 r2[j][0]=ytm[k][0];
2261 r2[j][1]=ytm[k][1];
2262 r2[j][2]=ytm[k][2];
2263
2264 j++;
2265 }
2266 }
2267 //there are not enough feasible pairs, relieve the threshold
2268 if(j<3 && n_ali>3) d002t += 0.5;
2269 else break;
2270 }
2271
2272 if(n_ali!=j)
2273 {
2274 Kabsch(r1, r2, j, 1, &rms, t, u);
2275 tmscore1=0;
2276 for(k=0; k<n_ali; k++)
2277 {
2278 transform(t, u, &xtm[k][0], xrot);
2279 di=dist(xrot, &ytm[k][0]);
2280 dis[k]=di;
2281 tmscore1 += 1/(1+di/d02);
2282 }
2283
2284 //third iteration
2285 d002t=d002+1;
2286
2287 while(1)
2288 {
2289 j=0;
2290 for(k=0; k<n_ali; k++)
2291 {
2292 if(dis[k]<=d002t)
2293 {
2294 r1[j][0]=xtm[k][0];
2295 r1[j][1]=xtm[k][1];
2296 r1[j][2]=xtm[k][2];
2297
2298 r2[j][0]=ytm[k][0];
2299 r2[j][1]=ytm[k][1];
2300 r2[j][2]=ytm[k][2];
2301
2302 j++;
2303 }
2304 }
2305 //there are not enough feasible pairs, relieve the threshold
2306 if(j<3 && n_ali>3) d002t += 0.5;
2307 else break;
2308 }
2309
2310 //evaluate the score
2311 Kabsch(r1, r2, j, 1, &rms, t, u);
2312 tmscore2=0;
2313 for(k=0; k<n_ali; k++)
2314 {
2315 transform(t, u, &xtm[k][0], xrot);
2316 di=dist(xrot, &ytm[k][0]);
2317 tmscore2 += 1/(1+di/d02);
2318 }
2319 }
2320 else
2321 {
2322 tmscore1=tmscore;
2323 tmscore2=tmscore;
2324 }
2325
2326 if(tmscore1>=tmscore) tmscore=tmscore1;
2327 if(tmscore2>=tmscore) tmscore=tmscore2;
2328 return tmscore; // no need to normalize this score because it will not be used for latter scoring
2329 }
2330
2331
2332 //perform gapless threading to find the best initial alignment
2333 //input: x, y, xlen, ylen
2334 //output: y2x0 stores the best alignment: e.g.,
2335 //y2x0[j]=i means:
2336 //the jth element in y is aligned to the ith element in x if i>=0
2337 //the jth element in y is aligned to a gap in x if i==-1
2338 double get_initial(double **r1, double **r2, double **xtm, double **ytm,
2339 double **x, double **y, int xlen, int ylen, int *y2x,
2340 double d0, double d0_search, const bool fast_opt,
2341 double t[3], double u[3][3])
2342 {
2343 int min_len=getmin(xlen, ylen);
2344 if(min_len<3) PrintErrorAndQuit("Sequence is too short <3!\n");
2345
2346 int min_ali= min_len/2; //minimum size of considered fragment
2347 if(min_ali<=5) min_ali=5;
2348 int n1, n2;
2349 n1 = -ylen+min_ali;
2350 n2 = xlen-min_ali;
2351
2352 int i, j, k, k_best;
2353 double tmscore, tmscore_max=-1;
2354
2355 k_best=n1;
2356 for(k=n1; k<=n2; k+=(fast_opt)?5:1)
2357 {
2358 //get the map
2359 for(j=0; j<ylen; j++)
2360 {
2361 i=j+k;
2362 if(i>=0 && i<xlen) y2x[j]=i;
2363 else y2x[j]=-1;
2364 }
2365
2366 //evaluate the map quickly in three iterations
2367 //this is not real tmscore, it is used to evaluate the goodness of the initial alignment
2368 tmscore=get_score_fast(r1, r2, xtm, ytm,
2369 x, y, xlen, ylen, y2x, d0,d0_search, t, u);
2370 if(tmscore>=tmscore_max)
2371 {
2372 tmscore_max=tmscore;
2373 k_best=k;
2374 }
2375 }
2376
2377 //extract the best map
2378 k=k_best;
2379 for(j=0; j<ylen; j++)
2380 {
2381 i=j+k;
2382 if(i>=0 && i<xlen) y2x[j]=i;
2383 else y2x[j]=-1;
2384 }
2385
2386 return tmscore_max;
2387 }
2388
2389 void smooth(int *sec, int len)
2390 {
2391 int i, j;
2392 //smooth single --x-- => -----
2393 for (i=2; i<len-2; i++)
2394 {
2395 if(sec[i]==2 || sec[i]==4)
2396 {
2397 j=sec[i];
2398 if (sec[i-2]!=j && sec[i-1]!=j && sec[i+1]!=j && sec[i+2]!=j)
2399 sec[i]=1;
2400 }
2401 }
2402
2403 // smooth double
2404 // --xx-- => ------
2405 for (i=0; i<len-5; i++)
2406 {
2407 //helix
2408 if (sec[i]!=2 && sec[i+1]!=2 && sec[i+2]==2 && sec[i+3]==2 &&
2409 sec[i+4]!=2 && sec[i+5]!= 2)
2410 {
2411 sec[i+2]=1;
2412 sec[i+3]=1;
2413 }
2414
2415 //beta
2416 if (sec[i]!=4 && sec[i+1]!=4 && sec[i+2]==4 && sec[i+3]==4 &&
2417 sec[i+4]!=4 && sec[i+5]!= 4)
2418 {
2419 sec[i+2]=1;
2420 sec[i+3]=1;
2421 }
2422 }
2423
2424 //smooth connect
2425 for (i=0; i<len-2; i++)
2426 {
2427 if (sec[i]==2 && sec[i+1]!=2 && sec[i+2]==2) sec[i+1]=2;
2428 else if(sec[i]==4 && sec[i+1]!=4 && sec[i+2]==4) sec[i+1]=4;
2429 }
2430
2431 }
2432
2433 char sec_str(double dis13, double dis14, double dis15,
2434 double dis24, double dis25, double dis35)
2435 {
2436 char s='C';
2437
2438 double delta=2.1;
2439 if (fabs(dis15-6.37)<delta && fabs(dis14-5.18)<delta &&
2440 fabs(dis25-5.18)<delta && fabs(dis13-5.45)<delta &&
2441 fabs(dis24-5.45)<delta && fabs(dis35-5.45)<delta)
2442 {
2443 s='H'; //helix
2444 return s;
2445 }
2446
2447 delta=1.42;
2448 if (fabs(dis15-13 )<delta && fabs(dis14-10.4)<delta &&
2449 fabs(dis25-10.4)<delta && fabs(dis13-6.1 )<delta &&
2450 fabs(dis24-6.1 )<delta && fabs(dis35-6.1 )<delta)
2451 {
2452 s='E'; //strand
2453 return s;
2454 }
2455
2456 if (dis15 < 8) s='T'; //turn
2457 return s;
2458 }
2459
2460
2461 /* secondary stucture assignment for protein:
2462 * 1->coil, 2->helix, 3->turn, 4->strand */
2463 void make_sec(double **x, int len, char *sec)
2464 {
2465 int j1, j2, j3, j4, j5;
2466 double d13, d14, d15, d24, d25, d35;
2467 for(int i=0; i<len; i++)
2468 {
2469 sec[i]='C';
2470 j1=i-2;
2471 j2=i-1;
2472 j3=i;
2473 j4=i+1;
2474 j5=i+2;
2475
2476 if(j1>=0 && j5<len)
2477 {
2478 d13=sqrt(dist(x[j1], x[j3]));
2479 d14=sqrt(dist(x[j1], x[j4]));
2480 d15=sqrt(dist(x[j1], x[j5]));
2481 d24=sqrt(dist(x[j2], x[j4]));
2482 d25=sqrt(dist(x[j2], x[j5]));
2483 d35=sqrt(dist(x[j3], x[j5]));
2484 sec[i]=sec_str(d13, d14, d15, d24, d25, d35);
2485 }
2486 }
2487 sec[len]=0;
2488 }
2489
2490 //get initial alignment from secondary structure alignment
2491 //input: x, y, xlen, ylen
2492 //output: y2x stores the best alignment: e.g.,
2493 //y2x[j]=i means:
2494 //the jth element in y is aligned to the ith element in x if i>=0
2495 //the jth element in y is aligned to a gap in x if i==-1
2496 void get_initial_ss(bool **path, double **val,
2497 const char *secx, const char *secy, int xlen, int ylen, int *y2x)
2498 {
2499 double gap_open=-1.0;
2500 NWDP_TM(path, val, secx, secy, xlen, ylen, gap_open, y2x);
2501 }
2502
2503
2504 // get_initial5 in TMalign fortran, get_initial_local in TMalign c by yangji
2505 //get initial alignment of local structure superposition
2506 //input: x, y, xlen, ylen
2507 //output: y2x stores the best alignment: e.g.,
2508 //y2x[j]=i means:
2509 //the jth element in y is aligned to the ith element in x if i>=0
2510 //the jth element in y is aligned to a gap in x if i==-1
2511 bool get_initial5( double **r1, double **r2, double **xtm, double **ytm,
2512 bool **path, double **val,
2513 double **x, double **y, int xlen, int ylen, int *y2x,
2514 double d0, double d0_search, const bool fast_opt, const double D0_MIN)
2515 {
2516 double GL, rmsd;
2517 double t[3];
2518 double u[3][3];
2519
2520 double d01 = d0 + 1.5;
2521 if (d01 < D0_MIN) d01 = D0_MIN;
2522 double d02 = d01*d01;
2523
2524 double GLmax = 0;
2525 int aL = getmin(xlen, ylen);
2526 int *invmap = new int[ylen + 1];
2527
2528 // jump on sequence1-------------->
2529 int n_jump1 = 0;
2530 if (xlen > 250)
2531 n_jump1 = 45;
2532 else if (xlen > 200)
2533 n_jump1 = 35;
2534 else if (xlen > 150)
2535 n_jump1 = 25;
2536 else
2537 n_jump1 = 15;
2538 if (n_jump1 > (xlen / 3))
2539 n_jump1 = xlen / 3;
2540
2541 // jump on sequence2-------------->
2542 int n_jump2 = 0;
2543 if (ylen > 250)
2544 n_jump2 = 45;
2545 else if (ylen > 200)
2546 n_jump2 = 35;
2547 else if (ylen > 150)
2548 n_jump2 = 25;
2549 else
2550 n_jump2 = 15;
2551 if (n_jump2 > (ylen / 3))
2552 n_jump2 = ylen / 3;
2553
2554 // fragment to superimpose-------------->
2555 int n_frag[2] = { 20, 100 };
2556 if (n_frag[0] > (aL / 3))
2557 n_frag[0] = aL / 3;
2558 if (n_frag[1] > (aL / 2))
2559 n_frag[1] = aL / 2;
2560
2561 // start superimpose search-------------->
2562 if (fast_opt)
2563 {
2564 n_jump1*=5;
2565 n_jump2*=5;
2566 }
2567 bool flag = false;
2568 for (int i_frag = 0; i_frag < 2; i_frag++)
2569 {
2570 int m1 = xlen - n_frag[i_frag] + 1;
2571 int m2 = ylen - n_frag[i_frag] + 1;
2572
2573 for (int i = 0; i<m1; i = i + n_jump1) //index starts from 0, different from FORTRAN
2574 {
2575 for (int j = 0; j<m2; j = j + n_jump2)
2576 {
2577 for (int k = 0; k<n_frag[i_frag]; k++) //fragment in y
2578 {
2579 r1[k][0] = x[k + i][0];
2580 r1[k][1] = x[k + i][1];
2581 r1[k][2] = x[k + i][2];
2582
2583 r2[k][0] = y[k + j][0];
2584 r2[k][1] = y[k + j][1];
2585 r2[k][2] = y[k + j][2];
2586 }
2587
2588 // superpose the two structures and rotate it
2589 Kabsch(r1, r2, n_frag[i_frag], 1, &rmsd, t, u);
2590
2591 double gap_open = 0.0;
2592 NWDP_TM(path, val, x, y, xlen, ylen,
2593 t, u, d02, gap_open, invmap);
2594 GL = get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen,
2595 invmap, d0, d0_search, t, u);
2596 if (GL>GLmax)
2597 {
2598 GLmax = GL;
2599 for (int ii = 0; ii<ylen; ii++) y2x[ii] = invmap[ii];
2600 flag = true;
2601 }
2602 }
2603 }
2604 }
2605
2606 delete[] invmap;
2607 return flag;
2608 }
2609
2610 void score_matrix_rmsd_sec( double **r1, double **r2, double **score,
2611 const char *secx, const char *secy, double **x, double **y,
2612 int xlen, int ylen, int *y2x, const double D0_MIN, double d0)
2613 {
2614 double t[3], u[3][3];
2615 double rmsd, dij;
2616 double d01=d0+1.5;
2617 if(d01 < D0_MIN) d01=D0_MIN;
2618 double d02=d01*d01;
2619
2620 double xx[3];
2621 int i, k=0;
2622 for(int j=0; j<ylen; j++)
2623 {
2624 i=y2x[j];
2625 if(i>=0)
2626 {
2627 r1[k][0]=x[i][0];
2628 r1[k][1]=x[i][1];
2629 r1[k][2]=x[i][2];
2630
2631 r2[k][0]=y[j][0];
2632 r2[k][1]=y[j][1];
2633 r2[k][2]=y[j][2];
2634
2635 k++;
2636 }
2637 }
2638 Kabsch(r1, r2, k, 1, &rmsd, t, u);
2639
2640
2641 for(int ii=0; ii<xlen; ii++)
2642 {
2643 transform(t, u, &x[ii][0], xx);
2644 for(int jj=0; jj<ylen; jj++)
2645 {
2646 dij=dist(xx, &y[jj][0]);
2647 if (secx[ii]==secy[jj])
2648 score[ii+1][jj+1] = 1.0/(1+dij/d02) + 0.5;
2649 else
2650 score[ii+1][jj+1] = 1.0/(1+dij/d02);
2651 }
2652 }
2653 }
2654
2655
2656 //get initial alignment from secondary structure and previous alignments
2657 //input: x, y, xlen, ylen
2658 //output: y2x stores the best alignment: e.g.,
2659 //y2x[j]=i means:
2660 //the jth element in y is aligned to the ith element in x if i>=0
2661 //the jth element in y is aligned to a gap in x if i==-1
2662 void get_initial_ssplus(double **r1, double **r2, double **score, bool **path,
2663 double **val, const char *secx, const char *secy, double **x, double **y,
2664 int xlen, int ylen, int *y2x0, int *y2x, const double D0_MIN, double d0)
2665 {
2666 //create score matrix for DP
2667 score_matrix_rmsd_sec(r1, r2, score, secx, secy, x, y, xlen, ylen,
2668 y2x0, D0_MIN,d0);
2669
2670 double gap_open=-1.0;
2671 NWDP_TM(score, path, val, xlen, ylen, gap_open, y2x);
2672 }
2673
2674
2675 void find_max_frag(double **x, int len, int *start_max,
2676 int *end_max, double dcu0, const bool fast_opt)
2677 {
2678 int r_min, fra_min=4; //minimum fragment for search
2679 if (fast_opt) fra_min=8;
2680 int start;
2681 int Lfr_max=0;
2682
2683 r_min= (int) (len*1.0/3.0); //minimum fragment, in case too small protein
2684 if(r_min > fra_min) r_min=fra_min;
2685
2686 int inc=0;
2687 double dcu0_cut=dcu0*dcu0;;
2688 double dcu_cut=dcu0_cut;
2689
2690 while(Lfr_max < r_min)
2691 {
2692 Lfr_max=0;
2693 int j=1; //number of residues at nf-fragment
2694 start=0;
2695 for(int i=1; i<len; i++)
2696 {
2697 if(dist(x[i-1], x[i]) < dcu_cut)
2698 {
2699 j++;
2700
2701 if(i==(len-1))
2702 {
2703 if(j > Lfr_max)
2704 {
2705 Lfr_max=j;
2706 *start_max=start;
2707 *end_max=i;
2708 }
2709 j=1;
2710 }
2711 }
2712 else
2713 {
2714 if(j>Lfr_max)
2715 {
2716 Lfr_max=j;
2717 *start_max=start;
2718 *end_max=i-1;
2719 }
2720
2721 j=1;
2722 start=i;
2723 }
2724 }// for i;
2725
2726 if(Lfr_max < r_min)
2727 {
2728 inc++;
2729 double dinc=pow(1.1, (double) inc) * dcu0;
2730 dcu_cut= dinc*dinc;
2731 }
2732 }//while <;
2733 }
2734
2735 //perform fragment gapless threading to find the best initial alignment
2736 //input: x, y, xlen, ylen
2737 //output: y2x0 stores the best alignment: e.g.,
2738 //y2x0[j]=i means:
2739 //the jth element in y is aligned to the ith element in x if i>=0
2740 //the jth element in y is aligned to a gap in x if i==-1
2741 double get_initial_fgt(double **r1, double **r2, double **xtm, double **ytm,
2742 double **x, double **y, int xlen, int ylen,
2743 int *y2x, double d0, double d0_search,
2744 double dcu0, const bool fast_opt, double t[3], double u[3][3])
2745 {
2746 int fra_min=4; //minimum fragment for search
2747 if (fast_opt) fra_min=8;
2748 int fra_min1=fra_min-1; //cutoff for shift, save time
2749
2750 int xstart=0, ystart=0, xend=0, yend=0;
2751
2752 find_max_frag(x, xlen, &xstart, &xend, dcu0, fast_opt);
2753 find_max_frag(y, ylen, &ystart, &yend, dcu0, fast_opt);
2754
2755
2756 int Lx = xend-xstart+1;
2757 int Ly = yend-ystart+1;
2758 int *ifr, *y2x_;
2759 int L_fr=getmin(Lx, Ly);
2760 ifr= new int[L_fr];
2761 y2x_= new int[ylen+1];
2762
2763 //select what piece will be used. The original implement may cause
2764 //asymetry, but only when xlen==ylen and Lx==Ly
2765 //if L1=Lfr1 and L2=Lfr2 (normal proteins), it will be the same as initial1
2766
2767 if(Lx<Ly || (Lx==Ly && xlen<ylen))
2768 {
2769 for(int i=0; i<L_fr; i++) ifr[i]=xstart+i;
2770 }
2771 else if(Lx>Ly || (Lx==Ly && xlen>ylen))
2772 {
2773 for(int i=0; i<L_fr; i++) ifr[i]=ystart+i;
2774 }
2775 else // solve asymetric for 1x5gA vs 2q7nA5
2776 {
2777 /* In this case, L0==xlen==ylen; L_fr==Lx==Ly */
2778 int L0=xlen;
2779 double tmscore, tmscore_max=-1;
2780 int i, j, k;
2781 int n1, n2;
2782 int min_len;
2783 int min_ali;
2784
2785 /* part 1, normalized by xlen */
2786 for(i=0; i<L_fr; i++) ifr[i]=xstart+i;
2787
2788 if(L_fr==L0)
2789 {
2790 n1= (int)(L0*0.1); //my index starts from 0
2791 n2= (int)(L0*0.89);
2792 j=0;
2793 for(i=n1; i<= n2; i++)
2794 {
2795 ifr[j]=ifr[i];
2796 j++;
2797 }
2798 L_fr=j;
2799 }
2800
2801 int L1=L_fr;
2802 min_len=getmin(L1, ylen);
2803 min_ali= (int) (min_len/2.5); //minimum size of considered fragment
2804 if(min_ali<=fra_min1) min_ali=fra_min1;
2805 n1 = -ylen+min_ali;
2806 n2 = L1-min_ali;
2807
2808 for(k=n1; k<=n2; k+=(fast_opt)?3:1)
2809 {
2810 //get the map
2811 for(j=0; j<ylen; j++)
2812 {
2813 i=j+k;
2814 if(i>=0 && i<L1) y2x_[j]=ifr[i];
2815 else y2x_[j]=-1;
2816 }
2817
2818 //evaluate the map quickly in three iterations
2819 tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_,
2820 d0, d0_search, t, u);
2821
2822 if(tmscore>=tmscore_max)
2823 {
2824 tmscore_max=tmscore;
2825 for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
2826 }
2827 }
2828
2829 /* part 2, normalized by ylen */
2830 L_fr=Ly;
2831 for(i=0; i<L_fr; i++) ifr[i]=ystart+i;
2832
2833 if (L_fr==L0)
2834 {
2835 n1= (int)(L0*0.1); //my index starts from 0
2836 n2= (int)(L0*0.89);
2837
2838 j=0;
2839 for(i=n1; i<= n2; i++)
2840 {
2841 ifr[j]=ifr[i];
2842 j++;
2843 }
2844 L_fr=j;
2845 }
2846
2847 int L2=L_fr;
2848 min_len=getmin(xlen, L2);
2849 min_ali= (int) (min_len/2.5); //minimum size of considered fragment
2850 if(min_ali<=fra_min1) min_ali=fra_min1;
2851 n1 = -L2+min_ali;
2852 n2 = xlen-min_ali;
2853
2854 for(k=n1; k<=n2; k++)
2855 {
2856 //get the map
2857 for(j=0; j<ylen; j++) y2x_[j]=-1;
2858
2859 for(j=0; j<L2; j++)
2860 {
2861 i=j+k;
2862 if(i>=0 && i<xlen) y2x_[ifr[j]]=i;
2863 }
2864
2865 //evaluate the map quickly in three iterations
2866 tmscore=get_score_fast(r1, r2, xtm, ytm,
2867 x, y, xlen, ylen, y2x_, d0,d0_search, t, u);
2868 if(tmscore>=tmscore_max)
2869 {
2870 tmscore_max=tmscore;
2871 for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
2872 }
2873 }
2874
2875 delete [] ifr;
2876 delete [] y2x_;
2877 return tmscore_max;
2878 }
2879
2880
2881 int L0=getmin(xlen, ylen); //non-redundant to get_initial1
2882 if(L_fr==L0)
2883 {
2884 int n1= (int)(L0*0.1); //my index starts from 0
2885 int n2= (int)(L0*0.89);
2886
2887 int j=0;
2888 for(int i=n1; i<= n2; i++)
2889 {
2890 ifr[j]=ifr[i];
2891 j++;
2892 }
2893 L_fr=j;
2894 }
2895
2896
2897 //gapless threading for the extracted fragment
2898 double tmscore, tmscore_max=-1;
2899
2900 if(Lx<Ly || (Lx==Ly && xlen<=ylen))
2901 {
2902 int L1=L_fr;
2903 int min_len=getmin(L1, ylen);
2904 int min_ali= (int) (min_len/2.5); //minimum size of considered fragment
2905 if(min_ali<=fra_min1) min_ali=fra_min1;
2906 int n1, n2;
2907 n1 = -ylen+min_ali;
2908 n2 = L1-min_ali;
2909
2910 int i, j, k;
2911 for(k=n1; k<=n2; k+=(fast_opt)?3:1)
2912 {
2913 //get the map
2914 for(j=0; j<ylen; j++)
2915 {
2916 i=j+k;
2917 if(i>=0 && i<L1) y2x_[j]=ifr[i];
2918 else y2x_[j]=-1;
2919 }
2920
2921 //evaluate the map quickly in three iterations
2922 tmscore=get_score_fast(r1, r2, xtm, ytm, x, y, xlen, ylen, y2x_,
2923 d0, d0_search, t, u);
2924
2925 if(tmscore>=tmscore_max)
2926 {
2927 tmscore_max=tmscore;
2928 for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
2929 }
2930 }
2931 }
2932 else
2933 {
2934 int L2=L_fr;
2935 int min_len=getmin(xlen, L2);
2936 int min_ali= (int) (min_len/2.5); //minimum size of considered fragment
2937 if(min_ali<=fra_min1) min_ali=fra_min1;
2938 int n1, n2;
2939 n1 = -L2+min_ali;
2940 n2 = xlen-min_ali;
2941
2942 int i, j, k;
2943
2944 for(k=n1; k<=n2; k++)
2945 {
2946 //get the map
2947 for(j=0; j<ylen; j++) y2x_[j]=-1;
2948
2949 for(j=0; j<L2; j++)
2950 {
2951 i=j+k;
2952 if(i>=0 && i<xlen) y2x_[ifr[j]]=i;
2953 }
2954
2955 //evaluate the map quickly in three iterations
2956 tmscore=get_score_fast(r1, r2, xtm, ytm,
2957 x, y, xlen, ylen, y2x_, d0,d0_search, t, u);
2958 if(tmscore>=tmscore_max)
2959 {
2960 tmscore_max=tmscore;
2961 for(j=0; j<ylen; j++) y2x[j]=y2x_[j];
2962 }
2963 }
2964 }
2965
2966
2967 delete [] ifr;
2968 delete [] y2x_;
2969 return tmscore_max;
2970 }
2971
2972 //heuristic run of dynamic programing iteratively to find the best alignment
2973 //input: initial rotation matrix t, u
2974 // vectors x and y, d0
2975 //output: best alignment that maximizes the TMscore, will be stored in invmap
2976 double DP_iter(double **r1, double **r2, double **xtm, double **ytm,
2977 double **xt, bool **path, double **val, double **x, double **y,
2978 int xlen, int ylen, double t[3], double u[3][3], int invmap0[],
2979 int g1, int g2, int iteration_max, double local_d0_search,
2980 double D0_MIN, double Lnorm, double d0, double score_d8)
2981 {
2982 double gap_open[2]={-0.6, 0};
2983 double rmsd;
2984 int *invmap=new int[ylen+1];
2985
2986 int iteration, i, j, k;
2987 double tmscore, tmscore_max, tmscore_old=0;
2988 int score_sum_method=8, simplify_step=40;
2989 tmscore_max=-1;
2990
2991 //double d01=d0+1.5;
2992 double d02=d0*d0;
2993 for(int g=g1; g<g2; g++)
2994 {
2995 for(iteration=0; iteration<iteration_max; iteration++)
2996 {
2997 NWDP_TM(path, val, x, y, xlen, ylen,
2998 t, u, d02, gap_open[g], invmap);
2999
3000 k=0;
3001 for(j=0; j<ylen; j++)
3002 {
3003 i=invmap[j];
3004
3005 if(i>=0) //aligned
3006 {
3007 xtm[k][0]=x[i][0];
3008 xtm[k][1]=x[i][1];
3009 xtm[k][2]=x[i][2];
3010
3011 ytm[k][0]=y[j][0];
3012 ytm[k][1]=y[j][1];
3013 ytm[k][2]=y[j][2];
3014 k++;
3015 }
3016 }
3017
3018 tmscore = TMscore8_search(r1, r2, xtm, ytm, xt, k, t, u,
3019 simplify_step, score_sum_method, &rmsd, local_d0_search,
3020 Lnorm, score_d8, d0);
3021
3022
3023 if(tmscore>tmscore_max)
3024 {
3025 tmscore_max=tmscore;
3026 for(i=0; i<ylen; i++) invmap0[i]=invmap[i];
3027 }
3028
3029 if(iteration>0)
3030 {
3031 if(fabs(tmscore_old-tmscore)<0.000001) break;
3032 }
3033 tmscore_old=tmscore;
3034 }// for iteration
3035
3036 }//for gapopen
3037
3038
3039 delete []invmap;
3040 return tmscore_max;
3041 }
3042
3043
3044 void output_superpose(const string xname, const string yname,
3045 const string fname_super,
3046 double t[3], double u[3][3], const int ter_opt, const int mirror_opt,
3047 const char *seqM, const char *seqxA, const char *seqyA,
3048 const vector<string>&resi_vec1, const vector<string>&resi_vec2,
3049 const char *chainID1, const char *chainID2,
3050 const int xlen, const int ylen, const double d0A, const int n_ali8,
3051 const double rmsd, const double TM1, const double Liden)
3052 {
3053 stringstream buf;
3054 stringstream buf_all;
3055 stringstream buf_atm;
3056 stringstream buf_all_atm;
3057 stringstream buf_all_atm_lig;
3058 stringstream buf_pdb;
3059 stringstream buf_pymol;
3060 stringstream buf_tm;
3061 string line;
3062 double x[3]; // before transform
3063 double x1[3]; // after transform
3064 bool after_ter; // true if passed the "TER" line in PDB
3065 string asym_id; // chain ID
3066
3067 buf_tm<<"REMARK TM-align"
3068 <<"\nREMARK Chain 1:"<<setw(11)<<left<<xname+chainID1<<" Size= "<<xlen
3069 <<"\nREMARK Chain 2:"<<setw(11)<<yname+chainID2<<right<<" Size= "<<ylen
3070 <<" (TM-score is normalized by "<<setw(4)<<ylen<<", d0="
3071 <<setiosflags(ios::fixed)<<setprecision(2)<<setw(6)<<d0A<<")"
3072 <<"\nREMARK Aligned length="<<setw(4)<<n_ali8<<", RMSD="
3073 <<setw(6)<<setiosflags(ios::fixed)<<setprecision(2)<<rmsd
3074 <<", TM-score="<<setw(7)<<setiosflags(ios::fixed)<<setprecision(5)<<TM1
3075 <<", ID="<<setw(5)<<setiosflags(ios::fixed)<<setprecision(3)
3076 <<((n_ali8>0)?Liden/n_ali8:0)<<endl;
3077 string rasmol_CA_header="load inline\nselect *A\nwireframe .45\nselect *B\nwireframe .20\nselect all\ncolor white\n";
3078 string rasmol_cartoon_header="load inline\nselect all\ncartoon\nselect *A\ncolor blue\nselect *B\ncolor red\nselect ligand\nwireframe 0.25\nselect solvent\nspacefill 0.25\nselect all\nexit\n"+buf_tm.str();
3079 buf<<rasmol_CA_header;
3080 buf_all<<rasmol_CA_header;
3081 buf_atm<<rasmol_cartoon_header;
3082 buf_all_atm<<rasmol_cartoon_header;
3083 buf_all_atm_lig<<rasmol_cartoon_header;
3084
3085 /* for PDBx/mmCIF only */
3086 map<string,int> _atom_site;
3087 int atom_site_pos;
3088 vector<string> line_vec;
3089 string atom; // 4-character atom name
3090 string AA; // 3-character residue name
3091 string resi; // 4-character residue sequence number
3092 string inscode; // 1-character insertion code
3093 string model_index; // model index
3094 bool is_mmcif=false;
3095 int chain_num=0;
3096
3097 /* used for CONECT record of chain1 */
3098 int ca_idx1=0; // all CA atoms
3099 int lig_idx1=0; // all atoms
3100 vector <int> idx_vec;
3101
3102 /* used for CONECT record of chain2 */
3103 int ca_idx2=0; // all CA atoms
3104 int lig_idx2=0; // all atoms
3105
3106 /* extract aligned region */
3107 vector<string> resi_aln1;
3108 vector<string> resi_aln2;
3109 int i1=-1;
3110 int i2=-1;
3111 int i;
3112 for (i=0;i<strlen(seqM);i++)
3113 {
3114 i1+=(seqxA[i]!='-');
3115 i2+=(seqyA[i]!='-');
3116 if (seqM[i]==' ') continue;
3117 resi_aln1.push_back(resi_vec1[i1].substr(0,4));
3118 resi_aln2.push_back(resi_vec2[i2].substr(0,4));
3119 if (seqM[i]!=':') continue;
3120 buf <<"select "<<resi_aln1.back()<<":A,"
3121 <<resi_aln2.back()<<":B\ncolor red\n";
3122 buf_all<<"select "<<resi_aln1.back()<<":A,"
3123 <<resi_aln2.back()<<":B\ncolor red\n";
3124 }
3125 buf<<"select all\nexit\n"<<buf_tm.str();
3126 buf_all<<"select all\nexit\n"<<buf_tm.str();
3127
3128 ifstream fin;
3129 /* read first file */
3130 after_ter=false;
3131 asym_id="";
3132 fin.open(xname.c_str());
3133 while (fin.good())
3134 {
3135 getline(fin, line);
3136 if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
3137 if (is_mmcif==false && line.size()>=54 &&
3138 (line.compare(0, 6, "ATOM ")==0 ||
3139 line.compare(0, 6, "HETATM")==0)) // PDB format
3140 {
3141 x[0]=atof(line.substr(30,8).c_str());
3142 x[1]=atof(line.substr(38,8).c_str());
3143 x[2]=atof(line.substr(46,8).c_str());
3144 if (mirror_opt) x[2]=-x[2];
3145 transform(t, u, x, x1);
3146 buf_pdb<<line.substr(0,30)<<setiosflags(ios::fixed)
3147 <<setprecision(3)
3148 <<setw(8)<<x1[0] <<setw(8)<<x1[1] <<setw(8)<<x1[2]
3149 <<line.substr(54)<<'\n';
3150
3151 if (line[16]!='A' && line[16]!=' ') continue;
3152 if (after_ter && line.compare(0,6,"ATOM ")==0) continue;
3153 lig_idx1++;
3154 buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1
3155 <<line.substr(11,9)<<" A"<<line.substr(22,8)
3156 <<setiosflags(ios::fixed)<<setprecision(3)
3157 <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
3158 if (after_ter || line.compare(0,6,"ATOM ")) continue;
3159 if (ter_opt>=2)
3160 {
3161 if (ca_idx1 && asym_id.size() && asym_id!=line.substr(21,1))
3162 {
3163 after_ter=true;
3164 continue;
3165 }
3166 asym_id=line[21];
3167 }
3168 buf_all_atm<<"ATOM "<<setw(5)<<lig_idx1
3169 <<line.substr(11,9)<<" A"<<line.substr(22,8)
3170 <<setiosflags(ios::fixed)<<setprecision(3)
3171 <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
3172 if (find(resi_aln1.begin(),resi_aln1.end(),line.substr(22,4)
3173 )!=resi_aln1.end())
3174 {
3175 buf_atm<<"ATOM "<<setw(5)<<lig_idx1
3176 <<line.substr(11,9)<<" A"<<line.substr(22,8)
3177 <<setiosflags(ios::fixed)<<setprecision(3)
3178 <<setw(8)<<x1[0]<<setw(8)<<x1[1] <<setw(8)<<x1[2]<<'\n';
3179 }
3180 if (line.substr(12,4)!=" CA ") continue;
3181 ca_idx1++;
3182 buf_all<<"ATOM "<<setw(5)<<ca_idx1
3183 <<" CA "<<line.substr(17,3)<<" A"<<line.substr(22,8)
3184 <<setiosflags(ios::fixed)<<setprecision(3)
3185 <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
3186 if (find(resi_aln1.begin(),resi_aln1.end(),line.substr(22,4)
3187 )==resi_aln1.end()) continue;
3188 buf<<"ATOM "<<setw(5)<<ca_idx1
3189 <<" CA "<<line.substr(17,3)<<" A"<<line.substr(22,8)
3190 <<setiosflags(ios::fixed)<<setprecision(3)
3191 <<setw(8)<<x1[0]<<setw(8)<<x1[1]<<setw(8)<<x1[2]<<'\n';
3192 idx_vec.push_back(ca_idx1);
3193 }
3194 else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
3195 {
3196 while(1)
3197 {
3198 if (fin.good()) getline(fin, line);
3199 else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
3200 if (line.size()) break;
3201 }
3202 if (line.compare(0,11,"_atom_site.")) continue;
3203 _atom_site.clear();
3204 atom_site_pos=0;
3205 _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
3206 while(1)
3207 {
3208 if (fin.good()) getline(fin, line);
3209 else PrintErrorAndQuit("ERROR! Unexpected end of "+xname);
3210 if (line.size()==0) continue;
3211 if (line.compare(0,11,"_atom_site.")) break;
3212 _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
3213 }
3214
3215 if (is_mmcif==false)
3216 {
3217 buf_pdb.str(string());
3218 is_mmcif=true;
3219 }
3220
3221 while(1)
3222 {
3223 line_vec.clear();
3224 split(line,line_vec);
3225 if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
3226 line_vec[_atom_site["group_PDB"]]!="HETATM") break;
3227 if (_atom_site.count("pdbx_PDB_model_num"))
3228 {
3229 if (model_index.size() && model_index!=
3230 line_vec[_atom_site["pdbx_PDB_model_num"]])
3231 break;
3232 model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
3233 }
3234
3235 x[0]=atof(line_vec[_atom_site["Cartn_x"]].c_str());
3236 x[1]=atof(line_vec[_atom_site["Cartn_y"]].c_str());
3237 x[2]=atof(line_vec[_atom_site["Cartn_z"]].c_str());
3238 if (mirror_opt) x[2]=-x[2];
3239 transform(t, u, x, x1);
3240
3241 if (_atom_site.count("label_alt_id")==0 ||
3242 line_vec[_atom_site["label_alt_id"]]=="." ||
3243 line_vec[_atom_site["label_alt_id"]]=="A")
3244 {
3245 atom=line_vec[_atom_site["label_atom_id"]];
3246 if (atom[0]=='"') atom=atom.substr(1);
3247 if (atom.size() && atom[atom.size()-1]=='"')
3248 atom=atom.substr(0,atom.size()-1);
3249 if (atom.size()==0) atom=" ";
3250 else if (atom.size()==1) atom=" "+atom+" ";
3251 else if (atom.size()==2) atom=" "+atom+" ";
3252 else if (atom.size()==3) atom=" "+atom;
3253 else if (atom.size()>=5) atom=atom.substr(0,4);
3254
3255 AA=line_vec[_atom_site["label_comp_id"]]; // residue name
3256 if (AA.size()==1) AA=" "+AA;
3257 else if (AA.size()==2) AA=" " +AA;
3258 else if (AA.size()>=4) AA=AA.substr(0,3);
3259
3260 if (_atom_site.count("auth_seq_id"))
3261 resi=line_vec[_atom_site["auth_seq_id"]];
3262 else resi=line_vec[_atom_site["label_seq_id"]];
3263 while (resi.size()<4) resi=' '+resi;
3264 if (resi.size()>4) resi=resi.substr(0,4);
3265
3266 inscode=' ';
3267 if (_atom_site.count("pdbx_PDB_ins_code") &&
3268 line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
3269 inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
3270
3271 if (_atom_site.count("auth_asym_id"))
3272 {
3273 if (ter_opt>=2 && ca_idx1 && asym_id.size() &&
3274 asym_id!=line_vec[_atom_site["auth_asym_id"]])
3275 after_ter=true;
3276 asym_id=line_vec[_atom_site["auth_asym_id"]];
3277 }
3278 else if (_atom_site.count("label_asym_id"))
3279 {
3280 if (ter_opt>=2 && ca_idx1 && asym_id.size() &&
3281 asym_id!=line_vec[_atom_site["label_asym_id"]])
3282 after_ter=true;
3283 asym_id=line_vec[_atom_site["label_asym_id"]];
3284 }
3285 buf_pdb<<left<<setw(6)
3286 <<line_vec[_atom_site["group_PDB"]]<<right
3287 <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
3288 <<AA<<" "<<asym_id[asym_id.size()-1]
3289 <<resi<<inscode<<" "
3290 <<setiosflags(ios::fixed)<<setprecision(3)
3291 <<setw(8)<<x1[0]
3292 <<setw(8)<<x1[1]
3293 <<setw(8)<<x1[2]<<'\n';
3294
3295 if (after_ter==false ||
3296 line_vec[_atom_site["group_pdb"]]=="HETATM")
3297 {
3298 lig_idx1++;
3299 buf_all_atm_lig<<left<<setw(6)
3300 <<line_vec[_atom_site["group_PDB"]]<<right
3301 <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
3302 <<AA<<" A"<<resi<<inscode<<" "
3303 <<setiosflags(ios::fixed)<<setprecision(3)
3304 <<setw(8)<<x1[0]
3305 <<setw(8)<<x1[1]
3306 <<setw(8)<<x1[2]<<'\n';
3307 if (after_ter==false &&
3308 line_vec[_atom_site["group_PDB"]]=="ATOM")
3309 {
3310 buf_all_atm<<"ATOM "<<setw(6)
3311 <<setw(5)<<lig_idx1%100000<<' '<<atom<<' '
3312 <<AA<<" A"<<resi<<inscode<<" "
3313 <<setiosflags(ios::fixed)<<setprecision(3)
3314 <<setw(8)<<x1[0]
3315 <<setw(8)<<x1[1]
3316 <<setw(8)<<x1[2]<<'\n';
3317 if (find(resi_aln1.begin(),resi_aln1.end(),resi
3318 )!=resi_aln1.end())
3319 {
3320 buf_atm<<"ATOM "<<setw(6)
3321 <<setw(5)<<lig_idx1%100000<<' '
3322 <<atom<<' '<<AA<<" A"<<resi<<inscode<<" "
3323 <<setiosflags(ios::fixed)<<setprecision(3)
3324 <<setw(8)<<x1[0]
3325 <<setw(8)<<x1[1]
3326 <<setw(8)<<x1[2]<<'\n';
3327 }
3328 if (atom==" CA ")
3329 {
3330 ca_idx1++;
3331 buf_all<<"ATOM "<<setw(6)
3332 <<setw(5)<<ca_idx1%100000<<" CA "
3333 <<AA<<" A"<<resi<<inscode<<" "
3334 <<setiosflags(ios::fixed)<<setprecision(3)
3335 <<setw(8)<<x1[0]
3336 <<setw(8)<<x1[1]
3337 <<setw(8)<<x1[2]<<'\n';
3338 if (find(resi_aln1.begin(),resi_aln1.end(),resi
3339 )!=resi_aln1.end())
3340 {
3341 buf<<"ATOM "<<setw(6)
3342 <<setw(5)<<ca_idx1%100000<<" CA "
3343 <<AA<<" A"<<resi<<inscode<<" "
3344 <<setiosflags(ios::fixed)<<setprecision(3)
3345 <<setw(8)<<x1[0]
3346 <<setw(8)<<x1[1]
3347 <<setw(8)<<x1[2]<<'\n';
3348 idx_vec.push_back(ca_idx1);
3349 }
3350 }
3351 }
3352 }
3353 }
3354
3355 while(1)
3356 {
3357 if (fin.good()) getline(fin, line);
3358 else break;
3359 if (line.size()) break;
3360 }
3361 }
3362 }
3363 else if (line.size() && is_mmcif==false)
3364 {
3365 buf_pdb<<line<<'\n';
3366 if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
3367 }
3368 }
3369 fin.close();
3370 buf<<"TER\n";
3371 buf_all<<"TER\n";
3372 buf_atm<<"TER\n";
3373 buf_all_atm<<"TER\n";
3374 buf_all_atm_lig<<"TER\n";
3375 for (i=1;i<ca_idx1;i++) buf_all<<"CONECT"
3376 <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
3377 for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
3378 <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
3379 idx_vec.clear();
3380
3381 /* read second file */
3382 after_ter=false;
3383 asym_id="";
3384 fin.open(yname.c_str());
3385 while (fin.good())
3386 {
3387 getline(fin, line);
3388 if (ter_opt>=3 && line.compare(0,3,"TER")==0) after_ter=true;
3389 if (line.size()>=54 && (line.compare(0, 6, "ATOM ")==0 ||
3390 line.compare(0, 6, "HETATM")==0)) // PDB format
3391 {
3392 if (line[16]!='A' && line[16]!=' ') continue;
3393 if (after_ter && line.compare(0,6,"ATOM ")==0) continue;
3394 lig_idx2++;
3395 buf_all_atm_lig<<line.substr(0,6)<<setw(5)<<lig_idx1+lig_idx2
3396 <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
3397 if (after_ter || line.compare(0,6,"ATOM ")) continue;
3398 if (ter_opt>=2)
3399 {
3400 if (ca_idx2 && asym_id.size() && asym_id!=line.substr(21,1))
3401 {
3402 after_ter=true;
3403 continue;
3404 }
3405 asym_id=line[21];
3406 }
3407 buf_all_atm<<"ATOM "<<setw(5)<<lig_idx1+lig_idx2
3408 <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
3409 if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4)
3410 )!=resi_aln2.end())
3411 {
3412 buf_atm<<"ATOM "<<setw(5)<<lig_idx1+lig_idx2
3413 <<line.substr(11,9)<<" B"<<line.substr(22,32)<<'\n';
3414 }
3415 if (line.substr(12,4)!=" CA ") continue;
3416 ca_idx2++;
3417 buf_all<<"ATOM "<<setw(5)<<ca_idx1+ca_idx2<<" CA "
3418 <<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n';
3419 if (find(resi_aln2.begin(),resi_aln2.end(),line.substr(22,4)
3420 )==resi_aln2.end()) continue;
3421 buf<<"ATOM "<<setw(5)<<ca_idx1+ca_idx2<<" CA "
3422 <<line.substr(17,3)<<" B"<<line.substr(22,32)<<'\n';
3423 idx_vec.push_back(ca_idx1+ca_idx2);
3424 }
3425 else if (line.compare(0,5,"loop_")==0) // PDBx/mmCIF
3426 {
3427 while(1)
3428 {
3429 if (fin.good()) getline(fin, line);
3430 else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
3431 if (line.size()) break;
3432 }
3433 if (line.compare(0,11,"_atom_site.")) continue;
3434 _atom_site.clear();
3435 atom_site_pos=0;
3436 _atom_site[line.substr(11,line.size()-12)]=atom_site_pos;
3437 while(1)
3438 {
3439 if (fin.good()) getline(fin, line);
3440 else PrintErrorAndQuit("ERROR! Unexpected end of "+yname);
3441 if (line.size()==0) continue;
3442 if (line.compare(0,11,"_atom_site.")) break;
3443 _atom_site[line.substr(11,line.size()-12)]=++atom_site_pos;
3444 }
3445
3446 while(1)
3447 {
3448 line_vec.clear();
3449 split(line,line_vec);
3450 if (line_vec[_atom_site["group_PDB"]]!="ATOM" &&
3451 line_vec[_atom_site["group_PDB"]]!="HETATM") break;
3452 if (_atom_site.count("pdbx_PDB_model_num"))
3453 {
3454 if (model_index.size() && model_index!=
3455 line_vec[_atom_site["pdbx_PDB_model_num"]])
3456 break;
3457 model_index=line_vec[_atom_site["pdbx_PDB_model_num"]];
3458 }
3459
3460 if (_atom_site.count("label_alt_id")==0 ||
3461 line_vec[_atom_site["label_alt_id"]]=="." ||
3462 line_vec[_atom_site["label_alt_id"]]=="A")
3463 {
3464 atom=line_vec[_atom_site["label_atom_id"]];
3465 if (atom[0]=='"') atom=atom.substr(1);
3466 if (atom.size() && atom[atom.size()-1]=='"')
3467 atom=atom.substr(0,atom.size()-1);
3468 if (atom.size()==0) atom=" ";
3469 else if (atom.size()==1) atom=" "+atom+" ";
3470 else if (atom.size()==2) atom=" "+atom+" ";
3471 else if (atom.size()==3) atom=" "+atom;
3472 else if (atom.size()>=5) atom=atom.substr(0,4);
3473
3474 AA=line_vec[_atom_site["label_comp_id"]]; // residue name
3475 if (AA.size()==1) AA=" "+AA;
3476 else if (AA.size()==2) AA=" " +AA;
3477 else if (AA.size()>=4) AA=AA.substr(0,3);
3478
3479 if (_atom_site.count("auth_seq_id"))
3480 resi=line_vec[_atom_site["auth_seq_id"]];
3481 else resi=line_vec[_atom_site["label_seq_id"]];
3482 while (resi.size()<4) resi=' '+resi;
3483 if (resi.size()>4) resi=resi.substr(0,4);
3484
3485 inscode=' ';
3486 if (_atom_site.count("pdbx_PDB_ins_code") &&
3487 line_vec[_atom_site["pdbx_PDB_ins_code"]]!="?")
3488 inscode=line_vec[_atom_site["pdbx_PDB_ins_code"]][0];
3489
3490 if (ter_opt>=2)
3491 {
3492 if (_atom_site.count("auth_asym_id"))
3493 {
3494 if (ca_idx2 && asym_id.size() &&
3495 asym_id!=line_vec[_atom_site["auth_asym_id"]])
3496 after_ter=true;
3497 else
3498 asym_id=line_vec[_atom_site["auth_asym_id"]];
3499 }
3500 else if (_atom_site.count("label_asym_id"))
3501 {
3502 if (ca_idx2 && asym_id.size() &&
3503 asym_id!=line_vec[_atom_site["label_asym_id"]])
3504 after_ter=true;
3505 else
3506 asym_id=line_vec[_atom_site["label_asym_id"]];
3507 }
3508 }
3509 if (after_ter==false ||
3510 line_vec[_atom_site["group_PDB"]]=="HETATM")
3511 {
3512 lig_idx2++;
3513 buf_all_atm_lig<<left<<setw(6)
3514 <<line_vec[_atom_site["group_PDB"]]<<right
3515 <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
3516 <<atom<<' '<<AA<<" B"<<resi<<inscode<<" "
3517 <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
3518 <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
3519 <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
3520 <<'\n';
3521 if (after_ter==false &&
3522 line_vec[_atom_site["group_PDB"]]=="ATOM")
3523 {
3524 buf_all_atm<<"ATOM "<<setw(6)
3525 <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
3526 <<atom<<' '<<AA<<" B"<<resi<<inscode<<" "
3527 <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
3528 <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
3529 <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
3530 <<'\n';
3531 if (find(resi_aln2.begin(),resi_aln2.end(),resi
3532 )!=resi_aln2.end())
3533 {
3534 buf_atm<<"ATOM "<<setw(6)
3535 <<setw(5)<<(lig_idx1+lig_idx2)%100000<<' '
3536 <<atom<<' '<<AA<<" B"<<resi<<inscode<<" "
3537 <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
3538 <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
3539 <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
3540 <<'\n';
3541 }
3542 if (atom==" CA ")
3543 {
3544 ca_idx2++;
3545 buf_all<<"ATOM "<<setw(6)
3546 <<setw(5)<<(ca_idx1+ca_idx2)%100000
3547 <<" CA "<<AA<<" B"<<resi<<inscode<<" "
3548 <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
3549 <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
3550 <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
3551 <<'\n';
3552 if (find(resi_aln2.begin(),resi_aln2.end(),resi
3553 )!=resi_aln2.end())
3554 {
3555 buf<<"ATOM "<<setw(6)
3556 <<setw(5)<<(ca_idx1+ca_idx2)%100000
3557 <<" CA "<<AA<<" B"<<resi<<inscode<<" "
3558 <<setw(8)<<line_vec[_atom_site["Cartn_x"]]
3559 <<setw(8)<<line_vec[_atom_site["Cartn_y"]]
3560 <<setw(8)<<line_vec[_atom_site["Cartn_z"]]
3561 <<'\n';
3562 idx_vec.push_back(ca_idx1+ca_idx2);
3563 }
3564 }
3565 }
3566 }
3567 }
3568
3569 if (fin.good()) getline(fin, line);
3570 else break;
3571 }
3572 }
3573 else if (line.size())
3574 {
3575 if (ter_opt>=1 && line.compare(0,3,"END")==0) break;
3576 }
3577 }
3578 fin.close();
3579 buf<<"TER\n";
3580 buf_all<<"TER\n";
3581 buf_atm<<"TER\n";
3582 buf_all_atm<<"TER\n";
3583 buf_all_atm_lig<<"TER\n";
3584 for (i=ca_idx1+1;i<ca_idx1+ca_idx2;i++) buf_all<<"CONECT"
3585 <<setw(5)<<i%100000<<setw(5)<<(i+1)%100000<<'\n';
3586 for (i=1;i<idx_vec.size();i++) buf<<"CONECT"
3587 <<setw(5)<<idx_vec[i-1]%100000<<setw(5)<<idx_vec[i]%100000<<'\n';
3588 idx_vec.clear();
3589
3590 /* write pymol script */
3591 ofstream fp;
3592 vector<string> pml_list;
3593 pml_list.push_back(fname_super+"");
3594 pml_list.push_back(fname_super+"_atm");
3595 pml_list.push_back(fname_super+"_all");
3596 pml_list.push_back(fname_super+"_all_atm");
3597 pml_list.push_back(fname_super+"_all_atm_lig");
3598 for (i=0;i<pml_list.size();i++)
3599 {
3600 buf_pymol<<"#!/usr/bin/env pymol\n"
3601 <<"load "<<pml_list[i]<<"\n"
3602 <<"hide all\n"
3603 <<((i==0 || i==2)?("show stick\n"):("show cartoon\n"))
3604 <<"color blue, chain A\n"
3605 <<"color red, chain B\n"
3606 <<"set ray_shadow, 0\n"
3607 <<"set stick_radius, 0.3\n"
3608 <<"set sphere_scale, 0.25\n"
3609 <<"show stick, not polymer\n"
3610 <<"show sphere, not polymer\n"
3611 <<"bg_color white\n"
3612 <<"set transparency=0.2\n"
3613 <<"zoom polymer\n"
3614 <<endl;
3615 fp.open((pml_list[i]+".pml").c_str());
3616 fp<<buf_pymol.str();
3617 fp.close();
3618 buf_pymol.str(string());
3619 pml_list[i].clear();
3620 }
3621 pml_list.clear();
3622
3623 /* write rasmol script */
3624 fp.open((fname_super).c_str());
3625 fp<<buf.str();
3626 fp.close();
3627 fp.open((fname_super+"_all").c_str());
3628 fp<<buf_all.str();
3629 fp.close();
3630 fp.open((fname_super+"_atm").c_str());
3631 fp<<buf_atm.str();
3632 fp.close();
3633 fp.open((fname_super+"_all_atm").c_str());
3634 fp<<buf_all_atm.str();
3635 fp.close();
3636 fp.open((fname_super+"_all_atm_lig").c_str());
3637 fp<<buf_all_atm_lig.str();
3638 fp.close();
3639 fp.open((fname_super+".pdb").c_str());
3640 fp<<buf_pdb.str();
3641 fp.close();
3642
3643 /* clear stream */
3644 buf.str(string());
3645 buf_all.str(string());
3646 buf_atm.str(string());
3647 buf_all_atm.str(string());
3648 buf_all_atm_lig.str(string());
3649 buf_pdb.str(string());
3650 buf_tm.str(string());
3651 resi_aln1.clear();
3652 resi_aln2.clear();
3653 asym_id.clear();
3654 line_vec.clear();
3655 atom.clear();
3656 AA.clear();
3657 resi.clear();
3658 inscode.clear();
3659 model_index.clear();
3660 }
3661
3662 /* extract rotation matrix based on TMscore8 */
3663 void output_rotation_matrix(const char* fname_matrix,
3664 const double t[3], const double u[3][3])
3665 {
3666 fstream fout;
3667 fout.open(fname_matrix, ios::out | ios::trunc);
3668 if (fout)// succeed
3669 {
3670 fout << "------ The rotation matrix to rotate Chain_1 to Chain_2 ------\n";
3671 char dest[1000];
3672 sprintf(dest, "m %18s %14s %14s %14s\n", "t[m]", "u[m][0]", "u[m][1]", "u[m][2]");
3673 fout << string(dest);
3674 for (int k = 0; k < 3; k++)
3675 {
3676 sprintf(dest, "%d %18.10f %14.10f %14.10f %14.10f\n", k, t[k], u[k][0], u[k][1], u[k][2]);
3677 fout << string(dest);
3678 }
3679 fout << "\nCode for rotating Structure A from (x,y,z) to (X,Y,Z):\n"
3680 "for(i=0; i<L; i++)\n"
3681 "{\n"
3682 " X[i] = t[0] + u[0][0]*x[i] + u[0][1]*y[i] + u[0][2]*z[i];\n"
3683 " Y[i] = t[1] + u[1][0]*x[i] + u[1][1]*y[i] + u[1][2]*z[i];\n"
3684 " Z[i] = t[2] + u[2][0]*x[i] + u[2][1]*y[i] + u[2][2]*z[i];\n"
3685 "}\n";
3686 fout.close();
3687 }
3688 else
3689 cout << "Open file to output rotation matrix fail.\n";
3690 }
3691
3692 //output the final results
3693 void output_results(
3694 const string xname, const string yname,
3695 const char *chainID1, const char *chainID2,
3696 const int xlen, const int ylen, double t[3], double u[3][3],
3697 const double TM1, const double TM2,
3698 const double TM3, const double TM4, const double TM5,
3699 const double rmsd, const double d0_out,
3700 const char *seqM, const char *seqxA, const char *seqyA, const double Liden,
3701 const int n_ali8, const int L_ali,
3702 const double TM_ali, const double rmsd_ali, const double TM_0,
3703 const double d0_0, const double d0A, const double d0B,
3704 const double Lnorm_ass, const double d0_scale,
3705 const double d0a, const double d0u, const char* fname_matrix,
3706 const int outfmt_opt, const int ter_opt, const string fname_super,
3707 const int i_opt, const int a_opt, const bool u_opt, const bool d_opt,
3708 const int mirror_opt,
3709 const vector<string>&resi_vec1, const vector<string>&resi_vec2)
3710 {
3711 if (outfmt_opt<=0)
3712 {
3713 printf("\nName of Chain_1: %s%s (to be superimposed onto Chain_2)\n",
3714 xname.c_str(), chainID1);
3715 printf("Name of Chain_2: %s%s\n", yname.c_str(), chainID2);
3716 printf("Length of Chain_1: %d residues\n", xlen);
3717 printf("Length of Chain_2: %d residues\n\n", ylen);
3718
3719 if (i_opt)
3720 printf("User-specified initial alignment: TM/Lali/rmsd = %7.5lf, %4d, %6.3lf\n", TM_ali, L_ali, rmsd_ali);
3721
3722 printf("Aligned length= %d, RMSD= %6.2f, Seq_ID=n_identical/n_aligned= %4.3f\n", n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
3723 printf("TM-score= %6.5f (if normalized by length of Chain_1, i.e., LN=%d, d0=%.2f)\n", TM2, xlen, d0B);
3724 printf("TM-score= %6.5f (if normalized by length of Chain_2, i.e., LN=%d, d0=%.2f)\n", TM1, ylen, d0A);
3725
3726 if (a_opt==1)
3727 printf("TM-score= %6.5f (if normalized by average length of two structures, i.e., LN= %.1f, d0= %.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
3728 if (u_opt)
3729 printf("TM-score= %6.5f (if normalized by user-specified LN=%.2f and d0=%.2f)\n", TM4, Lnorm_ass, d0u);
3730 if (d_opt)
3731 printf("TM-score= %6.5f (if scaled by user-specified d0= %.2f, and LN= %d)\n", TM5, d0_scale, ylen);
3732 printf("(You should use TM-score normalized by length of the reference structure)\n");
3733
3734 //output alignment
3735 printf("\n(\":\" denotes residue pairs of d < %4.1f Angstrom, ", d0_out);
3736 printf("\".\" denotes other aligned residues)\n");
3737 printf("%s\n", seqxA);
3738 printf("%s\n", seqM);
3739 printf("%s\n", seqyA);
3740 }
3741 else if (outfmt_opt==1)
3742 {
3743 printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
3744 xname.c_str(), chainID1, xlen, d0B, Liden/xlen, TM2);
3745 printf("%s\n", seqxA);
3746 printf(">%s%s\tL=%d\td0=%.2f\tseqID=%.3f\tTM-score=%.5f\n",
3747 yname.c_str(), chainID2, ylen, d0A, Liden/ylen, TM1);
3748 printf("%s\n", seqyA);
3749
3750 printf("# Lali=%d\tRMSD=%.2f\tseqID_ali=%.3f\n",
3751 n_ali8, rmsd, (n_ali8>0)?Liden/n_ali8:0);
3752
3753 if (i_opt)
3754 printf("# User-specified initial alignment: TM=%.5lf\tLali=%4d\trmsd=%.3lf\n", TM_ali, L_ali, rmsd_ali);
3755
3756 if(a_opt)
3757 printf("# TM-score=%.5f (normalized by average length of two structures: L=%.1f\td0=%.2f)\n", TM3, (xlen+ylen)*0.5, d0a);
3758
3759 if(u_opt)
3760 printf("# TM-score=%.5f (normalized by user-specified L=%.2f\td0=%.2f)\n", TM4, Lnorm_ass, d0u);
3761
3762 if(d_opt)
3763 printf("# TM-score=%.5f (scaled by user-specified d0=%.2f\tL=%d)\n", TM5, d0_scale, ylen);
3764
3765 printf("$$$$\n");
3766 }
3767 else if (outfmt_opt==2)
3768 {
3769 printf("%s%s\t%s%s\t%.4f\t%.4f\t%.2f\t%4.3f\t%4.3f\t%4.3f\t%d\t%d\t%d",
3770 xname.c_str(), chainID1, yname.c_str(), chainID2, TM2, TM1, rmsd,
3771 Liden/xlen, Liden/ylen, (n_ali8>0)?Liden/n_ali8:0,
3772 xlen, ylen, n_ali8);
3773 }
3774 cout << endl;
3775
3776 if (strlen(fname_matrix))
3777 output_rotation_matrix(fname_matrix, t, u);
3778 if (fname_super.size())
3779 output_superpose(xname, yname, fname_super, t, u, ter_opt, mirror_opt,
3780 seqM, seqxA, seqyA, resi_vec1, resi_vec2, chainID1, chainID2,
3781 xlen, ylen, d0A, n_ali8, rmsd, TM1, Liden);
3782 }
3783
3784 double standard_TMscore(double **r1, double **r2, double **xtm, double **ytm,
3785 double **xt, double **x, double **y, int xlen, int ylen, int invmap[],
3786 int& L_ali, double& RMSD, double D0_MIN, double Lnorm, double d0,
3787 double d0_search, double score_d8, double t[3], double u[3][3],
3788 const int mol_type)
3789 {
3790 D0_MIN = 0.5;
3791 Lnorm = ylen;
3792 if (mol_type>0) // RNA
3793 {
3794 if (Lnorm<=11) d0=0.3;
3795 else if(Lnorm>11 && Lnorm<=15) d0=0.4;
3796 else if(Lnorm>15 && Lnorm<=19) d0=0.5;
3797 else if(Lnorm>19 && Lnorm<=23) d0=0.6;
3798 else if(Lnorm>23 && Lnorm<30) d0=0.7;
3799 else d0=(0.6*pow((Lnorm*1.0-0.5), 1.0/2)-2.5);
3800 }
3801 else
3802 {
3803 if (Lnorm > 21) d0=(1.24*pow((Lnorm*1.0-15), 1.0/3) -1.8);
3804 else d0 = D0_MIN;
3805 if (d0 < D0_MIN) d0 = D0_MIN;
3806 }
3807 double d0_input = d0;// Scaled by seq_min
3808
3809 double tmscore;// collected alined residues from invmap
3810 int n_al = 0;
3811 int i;
3812 for (int j = 0; j<ylen; j++)
3813 {
3814 i = invmap[j];
3815 if (i >= 0)
3816 {
3817 xtm[n_al][0] = x[i][0];
3818 xtm[n_al][1] = x[i][1];
3819 xtm[n_al][2] = x[i][2];
3820
3821 ytm[n_al][0] = y[j][0];
3822 ytm[n_al][1] = y[j][1];
3823 ytm[n_al][2] = y[j][2];
3824
3825 r1[n_al][0] = x[i][0];
3826 r1[n_al][1] = x[i][1];
3827 r1[n_al][2] = x[i][2];
3828
3829 r2[n_al][0] = y[j][0];
3830 r2[n_al][1] = y[j][1];
3831 r2[n_al][2] = y[j][2];
3832
3833 n_al++;
3834 }
3835 else if (i != -1) PrintErrorAndQuit("Wrong map!\n");
3836 }
3837 L_ali = n_al;
3838
3839 Kabsch(r1, r2, n_al, 0, &RMSD, t, u);
3840 RMSD = sqrt( RMSD/(1.0*n_al) );
3841
3842 int temp_simplify_step = 1;
3843 int temp_score_sum_method = 0;
3844 d0_search = d0_input;
3845 double rms = 0.0;
3846 tmscore = TMscore8_search_standard(r1, r2, xtm, ytm, xt, n_al, t, u,
3847 temp_simplify_step, temp_score_sum_method, &rms, d0_input,
3848 score_d8, d0);
3849 tmscore = tmscore * n_al / (1.0*Lnorm);
3850
3851 return tmscore;
3852 }
3853
3854 /* copy the value of t and u into t0,u0 */
3855 void copy_t_u(double t[3], double u[3][3], double t0[3], double u0[3][3])
3856 {
3857 int i,j;
3858 for (i=0;i<3;i++)
3859 {
3860 t0[i]=t[i];
3861 for (j=0;j<3;j++) u0[i][j]=u[i][j];
3862 }
3863 }
3864
3865 /* calculate approximate TM-score given rotation matrix */
3866 double approx_TM(const int xlen, const int ylen, const int a_opt,
3867 double **xa, double **ya, double t[3], double u[3][3],
3868 const int invmap0[], const int mol_type)
3869 {
3870 double Lnorm_0=ylen; // normalized by the second protein
3871 if (a_opt==-2 && xlen>ylen) Lnorm_0=xlen; // longer
3872 else if (a_opt==-1 && xlen<ylen) Lnorm_0=xlen; // shorter
3873 else if (a_opt==1) Lnorm_0=(xlen+ylen)/2.; // average
3874
3875 double D0_MIN;
3876 double Lnorm;
3877 double d0;
3878 double d0_search;
3879 parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
3880 double TMtmp=0;
3881 double d;
3882 double xtmp[3]={0,0,0};
3883
3884 for(int i=0,j=0; j<ylen; j++)
3885 {
3886 i=invmap0[j];
3887 if(i>=0)//aligned
3888 {
3889 transform(t, u, &xa[i][0], &xtmp[0]);
3890 d=sqrt(dist(&xtmp[0], &ya[j][0]));
3891 TMtmp+=1/(1+(d/d0)*(d/d0));
3892 //if (d <= score_d8) TMtmp+=1/(1+(d/d0)*(d/d0));
3893 }
3894 }
3895 TMtmp/=Lnorm_0;
3896 return TMtmp;
3897 }
3898
3899 void clean_up_after_approx_TM(int *invmap0, int *invmap,
3900 double **score, bool **path, double **val, double **xtm, double **ytm,
3901 double **xt, double **r1, double **r2, const int xlen, const int minlen)
3902 {
3903 delete [] invmap0;
3904 delete [] invmap;
3905 DeleteArray(&score, xlen+1);
3906 DeleteArray(&path, xlen+1);
3907 DeleteArray(&val, xlen+1);
3908 DeleteArray(&xtm, minlen);
3909 DeleteArray(&ytm, minlen);
3910 DeleteArray(&xt, xlen);
3911 DeleteArray(&r1, minlen);
3912 DeleteArray(&r2, minlen);
3913 return;
3914 }
3915
3916 /* Entry function for TM-align. Return TM-score calculation status:
3917 * 0 - full TM-score calculation
3918 * 1 - terminated due to exception
3919 * 2-7 - pre-terminated due to low TM-score */
3920 int TMalign_main(double **xa, double **ya,
3921 const char *seqx, const char *seqy, const char *secx, const char *secy,
3922 double t0[3], double u0[3][3],
3923 double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
3924 double &d0_0, double &TM_0,
3925 double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
3926 string &seqM, string &seqxA, string &seqyA,
3927 double &rmsd0, int &L_ali, double &Liden,
3928 double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
3929 const int xlen, const int ylen,
3930 const vector<string> sequence, const double Lnorm_ass,
3931 const double d0_scale, const int i_opt, const int a_opt,
3932 const bool u_opt, const bool d_opt, const bool fast_opt,
3933 const int mol_type, const double TMcut=-1)
3934 {
3935 double D0_MIN; //for d0
3936 double Lnorm; //normalization length
3937 double score_d8,d0,d0_search,dcu0;//for TMscore search
3938 double t[3], u[3][3]; //Kabsch translation vector and rotation matrix
3939 double **score; // Input score table for dynamic programming
3940 bool **path; // for dynamic programming
3941 double **val; // for dynamic programming
3942 double **xtm, **ytm; // for TMscore search engine
3943 double **xt; //for saving the superposed version of r_1 or xtm
3944 double **r1, **r2; // for Kabsch rotation
3945
3946 /***********************/
3947 /* allocate memory */
3948 /***********************/
3949 int minlen = min(xlen, ylen);
3950 NewArray(&score, xlen+1, ylen+1);
3951 NewArray(&path, xlen+1, ylen+1);
3952 NewArray(&val, xlen+1, ylen+1);
3953 NewArray(&xtm, minlen, 3);
3954 NewArray(&ytm, minlen, 3);
3955 NewArray(&xt, xlen, 3);
3956 NewArray(&r1, minlen, 3);
3957 NewArray(&r2, minlen, 3);
3958
3959 /***********************/
3960 /* parameter set */
3961 /***********************/
3962 parameter_set4search(xlen, ylen, D0_MIN, Lnorm,
3963 score_d8, d0, d0_search, dcu0);
3964 int simplify_step = 40; //for similified search engine
3965 int score_sum_method = 8; //for scoring method, whether only sum over pairs with dis<score_d8
3966
3967 int i;
3968 int *invmap0 = new int[ylen+1];
3969 int *invmap = new int[ylen+1];
3970 double TM, TMmax=-1;
3971 for(i=0; i<ylen; i++) invmap0[i]=-1;
3972
3973 double ddcc=0.4;
3974 if (Lnorm <= 40) ddcc=0.1; //Lnorm was setted in parameter_set4search
3975 double local_d0_search = d0_search;
3976
3977 //************************************************//
3978 // get initial alignment from user's input: //
3979 // Stick to the initial alignment //
3980 //************************************************//
3981 bool bAlignStick = false;
3982 if (i_opt==3)// if input has set parameter for "-I"
3983 {
3984 // In the original code, this loop starts from 1, which is
3985 // incorrect. Fortran starts from 1 but C++ should starts from 0.
3986 for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
3987 invmap[j] = -1;
3988
3989 int i1 = -1;// in C version, index starts from zero, not from one
3990 int i2 = -1;
3991 int L1 = sequence[0].size();
3992 int L2 = sequence[1].size();
3993 int L = min(L1, L2);// Get positions for aligned residues
3994 for (int kk1 = 0; kk1 < L; kk1++)
3995 {
3996 if (sequence[0][kk1] != '-') i1++;
3997 if (sequence[1][kk1] != '-')
3998 {
3999 i2++;
4000 if (i2 >= ylen || i1 >= xlen) kk1 = L;
4001 else if (sequence[0][kk1] != '-') invmap[i2] = i1;
4002 }
4003 }
4004
4005 //--------------- 2. Align proteins from original alignment
4006 double prevD0_MIN = D0_MIN;// stored for later use
4007 int prevLnorm = Lnorm;
4008 double prevd0 = d0;
4009 TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
4010 invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0, d0_search, score_d8,
4011 t, u, mol_type);
4012 D0_MIN = prevD0_MIN;
4013 Lnorm = prevLnorm;
4014 d0 = prevd0;
4015 TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
4016 invmap, t, u, 40, 8, local_d0_search, true, Lnorm, score_d8, d0);
4017 if (TM > TMmax)
4018 {
4019 TMmax = TM;
4020 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4021 }
4022 bAlignStick = true;
4023 }
4024
4025 /******************************************************/
4026 /* get initial alignment with gapless threading */
4027 /******************************************************/
4028 if (!bAlignStick)
4029 {
4030 get_initial(r1, r2, xtm, ytm, xa, ya, xlen, ylen, invmap0, d0,
4031 d0_search, fast_opt, t, u);
4032 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap0,
4033 t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
4034 score_d8, d0);
4035 if (TM>TMmax) TMmax = TM;
4036 if (TMcut>0) copy_t_u(t, u, t0, u0);
4037 //run dynamic programing iteratively to find the best alignment
4038 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya, xlen, ylen,
4039 t, u, invmap, 0, 2, (fast_opt)?2:30, local_d0_search,
4040 D0_MIN, Lnorm, d0, score_d8);
4041 if (TM>TMmax)
4042 {
4043 TMmax = TM;
4044 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4045 if (TMcut>0) copy_t_u(t, u, t0, u0);
4046 }
4047
4048 if (TMcut>0) // pre-terminate if TM-score is too low
4049 {
4050 double TMtmp=approx_TM(xlen, ylen, a_opt,
4051 xa, ya, t0, u0, invmap0, mol_type);
4052
4053 if (TMtmp<0.5*TMcut)
4054 {
4055 TM1=TM2=TM3=TM4=TM5=TMtmp;
4056 clean_up_after_approx_TM(invmap0, invmap, score, path, val,
4057 xtm, ytm, xt, r1, r2, xlen, minlen);
4058 return 2;
4059 }
4060 }
4061
4062 /************************************************************/
4063 /* get initial alignment based on secondary structure */
4064 /************************************************************/
4065 get_initial_ss(path, val, secx, secy, xlen, ylen, invmap);
4066 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
4067 t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
4068 score_d8, d0);
4069 if (TM>TMmax)
4070 {
4071 TMmax = TM;
4072 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4073 if (TMcut>0) copy_t_u(t, u, t0, u0);
4074 }
4075 if (TM > TMmax*0.2)
4076 {
4077 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
4078 xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
4079 local_d0_search, D0_MIN, Lnorm, d0, score_d8);
4080 if (TM>TMmax)
4081 {
4082 TMmax = TM;
4083 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4084 if (TMcut>0) copy_t_u(t, u, t0, u0);
4085 }
4086 }
4087
4088 if (TMcut>0) // pre-terminate if TM-score is too low
4089 {
4090 double TMtmp=approx_TM(xlen, ylen, a_opt,
4091 xa, ya, t0, u0, invmap0, mol_type);
4092
4093 if (TMtmp<0.52*TMcut)
4094 {
4095 TM1=TM2=TM3=TM4=TM5=TMtmp;
4096 clean_up_after_approx_TM(invmap0, invmap, score, path, val,
4097 xtm, ytm, xt, r1, r2, xlen, minlen);
4098 return 3;
4099 }
4100 }
4101
4102 /************************************************************/
4103 /* get initial alignment based on local superposition */
4104 /************************************************************/
4105 //=initial5 in original TM-align
4106 if (get_initial5( r1, r2, xtm, ytm, path, val, xa, ya,
4107 xlen, ylen, invmap, d0, d0_search, fast_opt, D0_MIN))
4108 {
4109 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
4110 invmap, t, u, simplify_step, score_sum_method,
4111 local_d0_search, Lnorm, score_d8, d0);
4112 if (TM>TMmax)
4113 {
4114 TMmax = TM;
4115 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4116 if (TMcut>0) copy_t_u(t, u, t0, u0);
4117 }
4118 if (TM > TMmax*ddcc)
4119 {
4120 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
4121 xlen, ylen, t, u, invmap, 0, 2, 2, local_d0_search,
4122 D0_MIN, Lnorm, d0, score_d8);
4123 if (TM>TMmax)
4124 {
4125 TMmax = TM;
4126 for (int i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4127 if (TMcut>0) copy_t_u(t, u, t0, u0);
4128 }
4129 }
4130 }
4131 else
4132 cerr << "\n\nWarning: initial alignment from local superposition fail!\n\n" << endl;
4133
4134 if (TMcut>0) // pre-terminate if TM-score is too low
4135 {
4136 double TMtmp=approx_TM(xlen, ylen, a_opt,
4137 xa, ya, t0, u0, invmap0, mol_type);
4138
4139 if (TMtmp<0.54*TMcut)
4140 {
4141 TM1=TM2=TM3=TM4=TM5=TMtmp;
4142 clean_up_after_approx_TM(invmap0, invmap, score, path, val,
4143 xtm, ytm, xt, r1, r2, xlen, minlen);
4144 return 4;
4145 }
4146 }
4147
4148 /********************************************************************/
4149 /* get initial alignment by local superposition+secondary structure */
4150 /********************************************************************/
4151 //=initial3 in original TM-align
4152 get_initial_ssplus(r1, r2, score, path, val, secx, secy, xa, ya,
4153 xlen, ylen, invmap0, invmap, D0_MIN, d0);
4154 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
4155 t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
4156 score_d8, d0);
4157 if (TM>TMmax)
4158 {
4159 TMmax = TM;
4160 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4161 if (TMcut>0) copy_t_u(t, u, t0, u0);
4162 }
4163 if (TM > TMmax*ddcc)
4164 {
4165 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
4166 xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
4167 local_d0_search, D0_MIN, Lnorm, d0, score_d8);
4168 if (TM>TMmax)
4169 {
4170 TMmax = TM;
4171 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4172 if (TMcut>0) copy_t_u(t, u, t0, u0);
4173 }
4174 }
4175
4176 if (TMcut>0) // pre-terminate if TM-score is too low
4177 {
4178 double TMtmp=approx_TM(xlen, ylen, a_opt,
4179 xa, ya, t0, u0, invmap0, mol_type);
4180
4181 if (TMtmp<0.56*TMcut)
4182 {
4183 TM1=TM2=TM3=TM4=TM5=TMtmp;
4184 clean_up_after_approx_TM(invmap0, invmap, score, path, val,
4185 xtm, ytm, xt, r1, r2, xlen, minlen);
4186 return 5;
4187 }
4188 }
4189
4190 /*******************************************************************/
4191 /* get initial alignment based on fragment gapless threading */
4192 /*******************************************************************/
4193 //=initial4 in original TM-align
4194 get_initial_fgt(r1, r2, xtm, ytm, xa, ya, xlen, ylen,
4195 invmap, d0, d0_search, dcu0, fast_opt, t, u);
4196 TM = detailed_search(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen, invmap,
4197 t, u, simplify_step, score_sum_method, local_d0_search, Lnorm,
4198 score_d8, d0);
4199 if (TM>TMmax)
4200 {
4201 TMmax = TM;
4202 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4203 if (TMcut>0) copy_t_u(t, u, t0, u0);
4204 }
4205 if (TM > TMmax*ddcc)
4206 {
4207 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
4208 xlen, ylen, t, u, invmap, 1, 2, 2, local_d0_search, D0_MIN,
4209 Lnorm, d0, score_d8);
4210 if (TM>TMmax)
4211 {
4212 TMmax = TM;
4213 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4214 if (TMcut>0) copy_t_u(t, u, t0, u0);
4215 }
4216 }
4217
4218 if (TMcut>0) // pre-terminate if TM-score is too low
4219 {
4220 double TMtmp=approx_TM(xlen, ylen, a_opt,
4221 xa, ya, t0, u0, invmap0, mol_type);
4222
4223 if (TMtmp<0.58*TMcut)
4224 {
4225 TM1=TM2=TM3=TM4=TM5=TMtmp;
4226 clean_up_after_approx_TM(invmap0, invmap, score, path, val,
4227 xtm, ytm, xt, r1, r2, xlen, minlen);
4228 return 6;
4229 }
4230 }
4231
4232 //************************************************//
4233 // get initial alignment from user's input: //
4234 //************************************************//
4235 if (i_opt==1)// if input has set parameter for "-i"
4236 {
4237 for (int j = 0; j < ylen; j++)// Set aligned position to be "-1"
4238 invmap[j] = -1;
4239
4240 int i1 = -1;// in C version, index starts from zero, not from one
4241 int i2 = -1;
4242 int L1 = sequence[0].size();
4243 int L2 = sequence[1].size();
4244 int L = min(L1, L2);// Get positions for aligned residues
4245 for (int kk1 = 0; kk1 < L; kk1++)
4246 {
4247 if (sequence[0][kk1] != '-')
4248 i1++;
4249 if (sequence[1][kk1] != '-')
4250 {
4251 i2++;
4252 if (i2 >= ylen || i1 >= xlen) kk1 = L;
4253 else if (sequence[0][kk1] != '-') invmap[i2] = i1;
4254 }
4255 }
4256
4257 //--------------- 2. Align proteins from original alignment
4258 double prevD0_MIN = D0_MIN;// stored for later use
4259 int prevLnorm = Lnorm;
4260 double prevd0 = d0;
4261 TM_ali = standard_TMscore(r1, r2, xtm, ytm, xt, xa, ya,
4262 xlen, ylen, invmap, L_ali, rmsd_ali, D0_MIN, Lnorm, d0,
4263 d0_search, score_d8, t, u, mol_type);
4264 D0_MIN = prevD0_MIN;
4265 Lnorm = prevLnorm;
4266 d0 = prevd0;
4267
4268 TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya,
4269 xlen, ylen, invmap, t, u, 40, 8, local_d0_search, true, Lnorm,
4270 score_d8, d0);
4271 if (TM > TMmax)
4272 {
4273 TMmax = TM;
4274 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4275 }
4276 // Different from get_initial, get_initial_ss and get_initial_ssplus
4277 TM = DP_iter(r1, r2, xtm, ytm, xt, path, val, xa, ya,
4278 xlen, ylen, t, u, invmap, 0, 2, (fast_opt)?2:30,
4279 local_d0_search, D0_MIN, Lnorm, d0, score_d8);
4280 if (TM>TMmax)
4281 {
4282 TMmax = TM;
4283 for (i = 0; i<ylen; i++) invmap0[i] = invmap[i];
4284 }
4285 }
4286 }
4287
4288
4289
4290 //*******************************************************************//
4291 // The alignment will not be changed any more in the following //
4292 //*******************************************************************//
4293 //check if the initial alignment is generated approriately
4294 bool flag=false;
4295 for(i=0; i<ylen; i++)
4296 {
4297 if(invmap0[i]>=0)
4298 {
4299 flag=true;
4300 break;
4301 }
4302 }
4303 if(!flag)
4304 {
4305 cout << "There is no alignment between the two proteins!" << endl;
4306 cout << "Program stop with no result!" << endl;
4307 return 1;
4308 }
4309
4310 /* last TM-score pre-termination */
4311 if (TMcut>0)
4312 {
4313 double TMtmp=approx_TM(xlen, ylen, a_opt,
4314 xa, ya, t0, u0, invmap0, mol_type);
4315
4316 if (TMtmp<0.6*TMcut)
4317 {
4318 TM1=TM2=TM3=TM4=TM5=TMtmp;
4319 clean_up_after_approx_TM(invmap0, invmap, score, path, val,
4320 xtm, ytm, xt, r1, r2, xlen, minlen);
4321 return 7;
4322 }
4323 }
4324
4325 //********************************************************************//
4326 // Detailed TMscore search engine --> prepare for final TMscore //
4327 //********************************************************************//
4328 //run detailed TMscore search engine for the best alignment, and
4329 //extract the best rotation matrix (t, u) for the best alginment
4330 simplify_step=1;
4331 if (fast_opt) simplify_step=40;
4332 score_sum_method=8;
4333 TM = detailed_search_standard(r1, r2, xtm, ytm, xt, xa, ya, xlen, ylen,
4334 invmap0, t, u, simplify_step, score_sum_method, local_d0_search,
4335 false, Lnorm, score_d8, d0);
4336
4337 //select pairs with dis<d8 for final TMscore computation and output alignment
4338 int k=0;
4339 int *m1, *m2;
4340 double d;
4341 m1=new int[xlen]; //alignd index in x
4342 m2=new int[ylen]; //alignd index in y
4343 do_rotation(xa, xt, xlen, t, u);
4344 k=0;
4345 for(int j=0; j<ylen; j++)
4346 {
4347 i=invmap0[j];
4348 if(i>=0)//aligned
4349 {
4350 n_ali++;
4351 d=sqrt(dist(&xt[i][0], &ya[j][0]));
4352 if (d <= score_d8 || (i_opt == 3))
4353 {
4354 m1[k]=i;
4355 m2[k]=j;
4356
4357 xtm[k][0]=xa[i][0];
4358 xtm[k][1]=xa[i][1];
4359 xtm[k][2]=xa[i][2];
4360
4361 ytm[k][0]=ya[j][0];
4362 ytm[k][1]=ya[j][1];
4363 ytm[k][2]=ya[j][2];
4364
4365 r1[k][0] = xt[i][0];
4366 r1[k][1] = xt[i][1];
4367 r1[k][2] = xt[i][2];
4368 r2[k][0] = ya[j][0];
4369 r2[k][1] = ya[j][1];
4370 r2[k][2] = ya[j][2];
4371
4372 k++;
4373 }
4374 }
4375 }
4376 n_ali8=k;
4377
4378 Kabsch(r1, r2, n_ali8, 0, &rmsd0, t, u);// rmsd0 is used for final output, only recalculate rmsd0, not t & u
4379 rmsd0 = sqrt(rmsd0 / n_ali8);
4380
4381
4382 //****************************************//
4383 // Final TMscore //
4384 // Please set parameters for output //
4385 //****************************************//
4386 double rmsd;
4387 simplify_step=1;
4388 score_sum_method=0;
4389 double Lnorm_0=ylen;
4390
4391
4392 //normalized by length of structure A
4393 parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
4394 d0A=d0;
4395 d0_0=d0A;
4396 local_d0_search = d0_search;
4397 TM1 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0, simplify_step,
4398 score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
4399 TM_0 = TM1;
4400
4401 //normalized by length of structure B
4402 parameter_set4final(xlen+0.0, D0_MIN, Lnorm, d0, d0_search, mol_type);
4403 d0B=d0;
4404 local_d0_search = d0_search;
4405 TM2 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t, u, simplify_step,
4406 score_sum_method, &rmsd, local_d0_search, Lnorm, score_d8, d0);
4407
4408 double Lnorm_d0;
4409 if (a_opt>0)
4410 {
4411 //normalized by average length of structures A, B
4412 Lnorm_0=(xlen+ylen)*0.5;
4413 parameter_set4final(Lnorm_0, D0_MIN, Lnorm, d0, d0_search, mol_type);
4414 d0a=d0;
4415 d0_0=d0a;
4416 local_d0_search = d0_search;
4417
4418 TM3 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
4419 simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
4420 score_d8, d0);
4421 TM_0=TM3;
4422 }
4423 if (u_opt)
4424 {
4425 //normalized by user assigned length
4426 parameter_set4final(Lnorm_ass, D0_MIN, Lnorm,
4427 d0, d0_search, mol_type);
4428 d0u=d0;
4429 d0_0=d0u;
4430 Lnorm_0=Lnorm_ass;
4431 local_d0_search = d0_search;
4432 TM4 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
4433 simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
4434 score_d8, d0);
4435 TM_0=TM4;
4436 }
4437 if (d_opt)
4438 {
4439 //scaled by user assigned d0
4440 parameter_set4scale(ylen, d0_scale, Lnorm, d0, d0_search);
4441 d0_out=d0_scale;
4442 d0_0=d0_scale;
4443 //Lnorm_0=ylen;
4444 Lnorm_d0=Lnorm_0;
4445 local_d0_search = d0_search;
4446 TM5 = TMscore8_search(r1, r2, xtm, ytm, xt, n_ali8, t0, u0,
4447 simplify_step, score_sum_method, &rmsd, local_d0_search, Lnorm,
4448 score_d8, d0);
4449 TM_0=TM5;
4450 }
4451
4452 /* derive alignment from superposition */
4453 int ali_len=xlen+ylen; //maximum length of alignment
4454 seqxA.assign(ali_len,'-');
4455 seqM.assign( ali_len,' ');
4456 seqyA.assign(ali_len,'-');
4457
4458 //do_rotation(xa, xt, xlen, t, u);
4459 do_rotation(xa, xt, xlen, t0, u0);
4460
4461 int kk=0, i_old=0, j_old=0;
4462 d=0;
4463 for(int k=0; k<n_ali8; k++)
4464 {
4465 for(int i=i_old; i<m1[k]; i++)
4466 {
4467 //align x to gap
4468 seqxA[kk]=seqx[i];
4469 seqyA[kk]='-';
4470 seqM[kk]=' ';
4471 kk++;
4472 }
4473
4474 for(int j=j_old; j<m2[k]; j++)
4475 {
4476 //align y to gap
4477 seqxA[kk]='-';
4478 seqyA[kk]=seqy[j];
4479 seqM[kk]=' ';
4480 kk++;
4481 }
4482
4483 seqxA[kk]=seqx[m1[k]];
4484 seqyA[kk]=seqy[m2[k]];
4485 Liden+=(seqxA[kk]==seqyA[kk]);
4486 d=sqrt(dist(&xt[m1[k]][0], &ya[m2[k]][0]));
4487 if(d<d0_out) seqM[kk]=':';
4488 else seqM[kk]='.';
4489 kk++;
4490 i_old=m1[k]+1;
4491 j_old=m2[k]+1;
4492 }
4493
4494 //tail
4495 for(int i=i_old; i<xlen; i++)
4496 {
4497 //align x to gap
4498 seqxA[kk]=seqx[i];
4499 seqyA[kk]='-';
4500 seqM[kk]=' ';
4501 kk++;
4502 }
4503 for(int j=j_old; j<ylen; j++)
4504 {
4505 //align y to gap
4506 seqxA[kk]='-';
4507 seqyA[kk]=seqy[j];
4508 seqM[kk]=' ';
4509 kk++;
4510 }
4511 seqxA=seqxA.substr(0,kk);
4512 seqyA=seqyA.substr(0,kk);
4513 seqM =seqM.substr(0,kk);
4514
4515 /* free memory */
4516 clean_up_after_approx_TM(invmap0, invmap, score, path, val,
4517 xtm, ytm, xt, r1, r2, xlen, minlen);
4518 delete [] m1;
4519 delete [] m2;
4520 return 0; // zero for no exception
4521 }
4522
4523 /* entry function for TM-align with circular permutation
4524 * i_opt, a_opt, u_opt, d_opt, TMcut are not implemented yet */
4525 int CPalign_main(double **xa, double **ya,
4526 const char *seqx, const char *seqy, const char *secx, const char *secy,
4527 double t0[3], double u0[3][3],
4528 double &TM1, double &TM2, double &TM3, double &TM4, double &TM5,
4529 double &d0_0, double &TM_0,
4530 double &d0A, double &d0B, double &d0u, double &d0a, double &d0_out,
4531 string &seqM, string &seqxA, string &seqyA,
4532 double &rmsd0, int &L_ali, double &Liden,
4533 double &TM_ali, double &rmsd_ali, int &n_ali, int &n_ali8,
4534 const int xlen, const int ylen,
4535 const vector<string> sequence, const double Lnorm_ass,
4536 const double d0_scale, const int i_opt, const int a_opt,
4537 const bool u_opt, const bool d_opt, const bool fast_opt,
4538 const int mol_type, const double TMcut=-1)
4539 {
4540 char *seqx_cp, *seqy_cp; // for the protein sequence
4541 char *secx_cp, *secy_cp; // for the secondary structure
4542 double **xa_cp, **ya_cp; // coordinates
4543 string seqxA_cp,seqyA_cp; // alignment
4544 int i,r;
4545 int cp_point=0; // position of circular permutation
4546 int cp_aln_best=0; // amount of aligned residue in sliding window
4547 int cp_aln_current;// amount of aligned residue in sliding window
4548
4549 /* duplicate structure */
4550 NewArray(&xa_cp, xlen*2, 3);
4551 seqx_cp = new char[xlen*2 + 1];
4552 secx_cp = new char[xlen*2 + 1];
4553 for (r=0;r<xlen;r++)
4554 {
4555 xa_cp[r+xlen][0]=xa_cp[r][0]=xa[r][0];
4556 xa_cp[r+xlen][1]=xa_cp[r][1]=xa[r][1];
4557 xa_cp[r+xlen][2]=xa_cp[r][2]=xa[r][2];
4558 seqx_cp[r+xlen]=seqx_cp[r]=seqx[r];
4559 secx_cp[r+xlen]=secx_cp[r]=secx[r];
4560 }
4561 seqx_cp[2*xlen]=0;
4562 secx_cp[2*xlen]=0;
4563
4564 /* fTM-align alignment */
4565 double TM1_cp,TM2_cp;
4566 TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy,
4567 t0, u0, TM1_cp, TM2_cp, TM3, TM4, TM5,
4568 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp,
4569 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
4570 xlen*2, ylen, sequence, Lnorm_ass, d0_scale,
4571 0, false, false, false, true, mol_type, -1);
4572
4573 /* delete gap in seqxA_cp */
4574 r=0;
4575 seqxA=seqxA_cp;
4576 seqyA=seqyA_cp;
4577 for (i=0;i<seqxA_cp.size();i++)
4578 {
4579 if (seqxA_cp[i]!='-')
4580 {
4581 seqxA[r]=seqxA_cp[i];
4582 seqyA[r]=seqyA_cp[i];
4583 r++;
4584 }
4585 }
4586 seqxA=seqxA.substr(0,r);
4587 seqyA=seqyA.substr(0,r);
4588
4589 /* count the number of aligned residues in each window
4590 * r - residue index in the original unaligned sequence
4591 * i - position in the alignment */
4592 for (r=0;r<xlen-1;r++)
4593 {
4594 cp_aln_current=0;
4595 for (i=r;i<r+xlen;i++) cp_aln_current+=(seqyA[i]!='-');
4596
4597 if (cp_aln_current>cp_aln_best)
4598 {
4599 cp_aln_best=cp_aln_current;
4600 cp_point=r;
4601 }
4602 }
4603 seqM.clear();
4604 seqxA.clear();
4605 seqyA.clear();
4606 seqxA_cp.clear();
4607 seqyA_cp.clear();
4608 rmsd0=Liden=n_ali=n_ali8=0;
4609
4610 /* fTM-align alignment */
4611 TMalign_main(xa, ya, seqx, seqy, secx, secy,
4612 t0, u0, TM1, TM2, TM3, TM4, TM5,
4613 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA, seqyA,
4614 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
4615 xlen, ylen, sequence, Lnorm_ass, d0_scale,
4616 0, false, false, false, true, mol_type, -1);
4617
4618 /* do not use cricular permutation of number of aligned residues is not
4619 * larger than sequence-order dependent alignment */
4620 if (n_ali8>cp_aln_best) cp_point=0;
4621
4622 /* prepare structure for final alignment */
4623 seqM.clear();
4624 seqxA.clear();
4625 seqyA.clear();
4626 rmsd0=Liden=n_ali=n_ali8=0;
4627 if (cp_point!=0)
4628 {
4629 for (r=0;r<xlen;r++)
4630 {
4631 xa_cp[r][0]=xa_cp[r+cp_point][0];
4632 xa_cp[r][1]=xa_cp[r+cp_point][1];
4633 xa_cp[r][2]=xa_cp[r+cp_point][2];
4634 seqx_cp[r]=seqx_cp[r+cp_point];
4635 secx_cp[r]=secx_cp[r+cp_point];
4636 }
4637 }
4638 seqx_cp[xlen]=0;
4639 secx_cp[xlen]=0;
4640
4641 /* full TM-align */
4642 TMalign_main(xa_cp, ya, seqx_cp, seqy, secx_cp, secy,
4643 t0, u0, TM1, TM2, TM3, TM4, TM5,
4644 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out, seqM, seqxA_cp, seqyA_cp,
4645 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
4646 xlen, ylen, sequence, Lnorm_ass, d0_scale,
4647 i_opt, a_opt, u_opt, d_opt, fast_opt, mol_type, TMcut);
4648
4649 /* correct alignment
4650 * r - residue index in the original unaligned sequence
4651 * i - position in the alignment */
4652 if (cp_point>0)
4653 {
4654 r=0;
4655 for (i=0;i<seqxA_cp.size();i++)
4656 {
4657 r+=(seqxA_cp[i]!='-');
4658 if (r>=(xlen-cp_point))
4659 {
4660 i++;
4661 break;
4662 }
4663 }
4664 seqxA=seqxA_cp.substr(0,i)+'*'+seqxA_cp.substr(i);
4665 seqM =seqM.substr(0,i) +' '+seqM.substr(i);
4666 seqyA=seqyA_cp.substr(0,i)+'-'+seqyA_cp.substr(i);
4667 }
4668 else
4669 {
4670 seqxA=seqxA_cp;
4671 seqyA=seqyA_cp;
4672 }
4673
4674 /* clean up */
4675 delete[]seqx_cp;
4676 delete[]secx_cp;
4677 DeleteArray(&xa_cp,xlen*2);
4678 seqxA_cp.clear();
4679 seqyA_cp.clear();
4680 return cp_point;
4681 }
4682
4683 int main(int argc, char *argv[])
4684 {
4685 if (argc < 2) print_help();
4686
4687
4688 clock_t t1, t2;
4689 t1 = clock();
4690
4691 /**********************/
4692 /* get argument */
4693 /**********************/
4694 string xname = "";
4695 string yname = "";
4696 string fname_super = ""; // file name for superposed structure
4697 string fname_lign = ""; // file name for user alignment
4698 string fname_matrix= ""; // file name for output matrix
4699 vector<string> sequence; // get value from alignment file
4700 double Lnorm_ass, d0_scale;
4701
4702 bool h_opt = false; // print full help message
4703 bool v_opt = false; // print version
4704 bool m_opt = false; // flag for -m, output rotation matrix
4705 int i_opt = 0; // 1 for -i, 3 for -I
4706 bool o_opt = false; // flag for -o, output superposed structure
4707 int a_opt = 0; // flag for -a, do not normalized by average length
4708 bool u_opt = false; // flag for -u, normalized by user specified length
4709 bool d_opt = false; // flag for -d, user specified d0
4710
4711 double TMcut =-1;
4712 int infmt1_opt=-1; // PDB or PDBx/mmCIF format for chain_1
4713 int infmt2_opt=-1; // PDB or PDBx/mmCIF format for chain_2
4714 int ter_opt =3; // TER, END, or different chainID
4715 int split_opt =0; // do not split chain
4716 int outfmt_opt=0; // set -outfmt to full output
4717 bool fast_opt =false; // flags for -fast, fTM-align algorithm
4718 int cp_opt =0; // do not check circular permutation
4719 int mirror_opt=0; // do not align mirror
4720 int het_opt=0; // do not read HETATM residues
4721 string atom_opt ="auto";// use C alpha atom for protein and C3' for RNA
4722 string mol_opt ="auto";// auto-detect the molecule type as protein/RNA
4723 string suffix_opt=""; // set -suffix to empty
4724 string dir_opt =""; // set -dir to empty
4725 string dir1_opt =""; // set -dir1 to empty
4726 string dir2_opt =""; // set -dir2 to empty
4727 int byresi_opt=0; // set -byresi to 0
4728 vector<string> chain1_list; // only when -dir1 is set
4729 vector<string> chain2_list; // only when -dir2 is set
4730
4731 for(int i = 1; i < argc; i++)
4732 {
4733 if ( !strcmp(argv[i],"-o") && i < (argc-1) )
4734 {
4735 fname_super = argv[i + 1]; o_opt = true; i++;
4736 }
4737 else if ( (!strcmp(argv[i],"-u") ||
4738 !strcmp(argv[i],"-L")) && i < (argc-1) )
4739 {
4740 Lnorm_ass = atof(argv[i + 1]); u_opt = true; i++;
4741 }
4742 else if ( !strcmp(argv[i],"-a") && i < (argc-1) )
4743 {
4744 if (!strcmp(argv[i + 1], "T")) a_opt=true;
4745 else if (!strcmp(argv[i + 1], "F")) a_opt=false;
4746 else
4747 {
4748 a_opt=atoi(argv[i + 1]);
4749 if (a_opt!=-2 && a_opt!=-1 && a_opt!=1)
4750 PrintErrorAndQuit("-a must be -2, -1, 1, T or F");
4751 }
4752 i++;
4753 }
4754 else if ( !strcmp(argv[i],"-d") && i < (argc-1) )
4755 {
4756 d0_scale = atof(argv[i + 1]); d_opt = true; i++;
4757 }
4758 else if ( !strcmp(argv[i],"-v") )
4759 {
4760 v_opt = true;
4761 }
4762 else if ( !strcmp(argv[i],"-h") )
4763 {
4764 h_opt = true;
4765 }
4766 else if ( !strcmp(argv[i],"-i") && i < (argc-1) )
4767 {
4768 if (i_opt==3)
4769 PrintErrorAndQuit("ERROR! -i and -I cannot be used together");
4770 fname_lign = argv[i + 1]; i_opt = 1; i++;
4771 }
4772 else if (!strcmp(argv[i], "-I") && i < (argc-1) )
4773 {
4774 if (i_opt==1)
4775 PrintErrorAndQuit("ERROR! -I and -i cannot be used together");
4776 fname_lign = argv[i + 1]; i_opt = 3; i++;
4777 }
4778 else if (!strcmp(argv[i], "-m") && i < (argc-1) )
4779 {
4780 fname_matrix = argv[i + 1]; m_opt = true; i++;
4781 }// get filename for rotation matrix
4782 else if (!strcmp(argv[i], "-fast"))
4783 {
4784 fast_opt = true;
4785 }
4786 else if ( !strcmp(argv[i],"-infmt1") && i < (argc-1) )
4787 {
4788 infmt1_opt=atoi(argv[i + 1]); i++;
4789 }
4790 else if ( !strcmp(argv[i],"-infmt2") && i < (argc-1) )
4791 {
4792 infmt2_opt=atoi(argv[i + 1]); i++;
4793 }
4794 else if ( !strcmp(argv[i],"-ter") && i < (argc-1) )
4795 {
4796 ter_opt=atoi(argv[i + 1]); i++;
4797 }
4798 else if ( !strcmp(argv[i],"-split") && i < (argc-1) )
4799 {
4800 split_opt=atoi(argv[i + 1]); i++;
4801 }
4802 else if ( !strcmp(argv[i],"-atom") && i < (argc-1) )
4803 {
4804 atom_opt=argv[i + 1]; i++;
4805 }
4806 else if ( !strcmp(argv[i],"-mol") && i < (argc-1) )
4807 {
4808 mol_opt=argv[i + 1]; i++;
4809 }
4810 else if ( !strcmp(argv[i],"-dir") && i < (argc-1) )
4811 {
4812 dir_opt=argv[i + 1]; i++;
4813 }
4814 else if ( !strcmp(argv[i],"-dir1") && i < (argc-1) )
4815 {
4816 dir1_opt=argv[i + 1]; i++;
4817 }
4818 else if ( !strcmp(argv[i],"-dir2") && i < (argc-1) )
4819 {
4820 dir2_opt=argv[i + 1]; i++;
4821 }
4822 else if ( !strcmp(argv[i],"-suffix") && i < (argc-1) )
4823 {
4824 suffix_opt=argv[i + 1]; i++;
4825 }
4826 else if ( !strcmp(argv[i],"-outfmt") && i < (argc-1) )
4827 {
4828 outfmt_opt=atoi(argv[i + 1]); i++;
4829 }
4830 else if ( !strcmp(argv[i],"-TMcut") && i < (argc-1) )
4831 {
4832 TMcut=atof(argv[i + 1]); i++;
4833 }
4834 else if ( !strcmp(argv[i],"-byresi") && i < (argc-1) )
4835 {
4836 byresi_opt=atoi(argv[i + 1]); i++;
4837 }
4838 else if ( !strcmp(argv[i],"-cp") )
4839 {
4840 cp_opt=1;
4841 }
4842 else if ( !strcmp(argv[i],"-mirror") && i < (argc-1) )
4843 {
4844 mirror_opt=atoi(argv[i + 1]); i++;
4845 }
4846 else if ( !strcmp(argv[i],"-het") && i < (argc-1) )
4847 {
4848 het_opt=atoi(argv[i + 1]); i++;
4849 }
4850 else if (xname.size() == 0) xname=argv[i];
4851 else if (yname.size() == 0) yname=argv[i];
4852 else PrintErrorAndQuit(string("ERROR! Undefined option ")+argv[i]);
4853 }
4854
4855 if(xname.size()==0 || (yname.size()==0 && dir_opt.size()==0) ||
4856 (yname.size() && dir_opt.size()))
4857 {
4858 if (h_opt) print_help(h_opt);
4859 if (v_opt)
4860 {
4861 print_version();
4862 exit(EXIT_FAILURE);
4863 }
4864 if (xname.size()==0)
4865 PrintErrorAndQuit("Please provide input structures");
4866 else if (yname.size()==0 && dir_opt.size()==0)
4867 PrintErrorAndQuit("Please provide structure B");
4868 else if (yname.size() && dir_opt.size())
4869 PrintErrorAndQuit("Please provide only one file name if -dir is set");
4870 }
4871
4872 if (suffix_opt.size() && dir_opt.size()+dir1_opt.size()+dir2_opt.size()==0)
4873 PrintErrorAndQuit("-suffix is only valid if -dir, -dir1 or -dir2 is set");
4874 if ((dir_opt.size() || dir1_opt.size() || dir2_opt.size()))
4875 {
4876 if (m_opt || o_opt)
4877 PrintErrorAndQuit("-m or -o cannot be set with -dir, -dir1 or -dir2");
4878 else if (dir_opt.size() && (dir1_opt.size() || dir2_opt.size()))
4879 PrintErrorAndQuit("-dir cannot be set with -dir1 or -dir2");
4880 }
4881 if (atom_opt.size()!=4)
4882 PrintErrorAndQuit("ERROR! atom name must have 4 characters, including space.");
4883 if (mol_opt!="auto" && mol_opt!="protein" && mol_opt!="RNA")
4884 PrintErrorAndQuit("ERROR! molecule type must be either RNA or protein.");
4885 else if (mol_opt=="protein" && atom_opt=="auto")
4886 atom_opt=" CA ";
4887 else if (mol_opt=="RNA" && atom_opt=="auto")
4888 atom_opt=" C3'";
4889
4890 if (u_opt && Lnorm_ass<=0)
4891 PrintErrorAndQuit("Wrong value for option -u! It should be >0");
4892 if (d_opt && d0_scale<=0)
4893 PrintErrorAndQuit("Wrong value for option -d! It should be >0");
4894 if (outfmt_opt>=2 && (a_opt || u_opt || d_opt))
4895 PrintErrorAndQuit("-outfmt 2 cannot be used with -a, -u, -L, -d");
4896 if (byresi_opt!=0)
4897 {
4898 if (i_opt)
4899 PrintErrorAndQuit("-byresi >=1 cannot be used with -i or -I");
4900 if (byresi_opt<0 || byresi_opt>3)
4901 PrintErrorAndQuit("-byresi can only be 0, 1, 2 or 3");
4902 if (byresi_opt>=2 && ter_opt>=2)
4903 PrintErrorAndQuit("-byresi >=2 should be used with -ter <=1");
4904 }
4905 if (split_opt==1 && ter_opt!=0)
4906 PrintErrorAndQuit("-split 1 should be used with -ter 0");
4907 else if (split_opt==2 && ter_opt!=0 && ter_opt!=1)
4908 PrintErrorAndQuit("-split 2 should be used with -ter 0 or 1");
4909 if (split_opt<0 || split_opt>2)
4910 PrintErrorAndQuit("-split can only be 0, 1 or 2");
4911 if (cp_opt!=0 && cp_opt!=1)
4912 PrintErrorAndQuit("-cp can only be 0 or 1");
4913 if (cp_opt && i_opt)
4914 PrintErrorAndQuit("-cp cannot be used with -i or -I");
4915
4916 /* read initial alignment file from 'align.txt' */
4917 if (i_opt) read_user_alignment(sequence, fname_lign, i_opt);
4918
4919 if (byresi_opt) i_opt=3;
4920
4921 if (m_opt && fname_matrix == "") // Output rotation matrix: matrix.txt
4922 PrintErrorAndQuit("ERROR! Please provide a file name for option -m!");
4923
4924 /* parse file list */
4925 if (dir1_opt.size()+dir_opt.size()==0) chain1_list.push_back(xname);
4926 else file2chainlist(chain1_list, xname, dir_opt+dir1_opt, suffix_opt);
4927
4928 if (dir_opt.size())
4929 for (int i=0;i<chain1_list.size();i++)
4930 chain2_list.push_back(chain1_list[i]);
4931 else if (dir2_opt.size()==0) chain2_list.push_back(yname);
4932 else file2chainlist(chain2_list, yname, dir2_opt, suffix_opt);
4933
4934 if (outfmt_opt==2)
4935 cout<<"#PDBchain1\tPDBchain2\tTM1\tTM2\t"
4936 <<"RMSD\tID1\tID2\tIDali\tL1\tL2\tLali"<<endl;
4937
4938 /* declare previously global variables */
4939 vector<vector<string> >PDB_lines1; // text of chain1
4940 vector<vector<string> >PDB_lines2; // text of chain2
4941 vector<int> mol_vec1; // molecule type of chain1, RNA if >0
4942 vector<int> mol_vec2; // molecule type of chain2, RNA if >0
4943 vector<string> chainID_list1; // list of chainID1
4944 vector<string> chainID_list2; // list of chainID2
4945 int i,j; // file index
4946 int chain_i,chain_j; // chain index
4947 int r; // residue index
4948 int xlen, ylen; // chain length
4949 int xchainnum,ychainnum;// number of chains in a PDB file
4950 char *seqx, *seqy; // for the protein sequence
4951 char *secx, *secy; // for the secondary structure
4952 double **xa, **ya; // for input vectors xa[0...xlen-1][0..2] and
4953 // ya[0...ylen-1][0..2], in general,
4954 // ya is regarded as native structure
4955 // --> superpose xa onto ya
4956 vector<string> resi_vec1; // residue index for chain1
4957 vector<string> resi_vec2; // residue index for chain2
4958
4959 /* loop over file names */
4960 for (i=0;i<chain1_list.size();i++)
4961 {
4962 /* parse chain 1 */
4963 xname=chain1_list[i];
4964 xchainnum=get_PDB_lines(xname, PDB_lines1, chainID_list1,
4965 mol_vec1, ter_opt, infmt1_opt, atom_opt, split_opt, het_opt);
4966 if (!xchainnum)
4967 {
4968 cerr<<"Warning! Cannot parse file: "<<xname
4969 <<". Chain number 0."<<endl;
4970 continue;
4971 }
4972 for (chain_i=0;chain_i<xchainnum;chain_i++)
4973 {
4974 xlen=PDB_lines1[chain_i].size();
4975 mol_vec1[chain_i]=-1;
4976 if (!xlen)
4977 {
4978 cerr<<"Warning! Cannot parse file: "<<xname
4979 <<". Chain length 0."<<endl;
4980 continue;
4981 }
4982 else if (xlen<3)
4983 {
4984 cerr<<"Sequence is too short <3!: "<<xname<<endl;
4985 continue;
4986 }
4987 NewArray(&xa, xlen, 3);
4988 seqx = new char[xlen + 1];
4989 secx = new char[xlen + 1];
4990 xlen = read_PDB(PDB_lines1[chain_i], xa, seqx,
4991 resi_vec1, byresi_opt?byresi_opt:o_opt);
4992 if (mirror_opt) for (r=0;r<xlen;r++) xa[r][2]=-xa[r][2];
4993 make_sec(xa, xlen, secx); // secondary structure assignment
4994
4995 for (j=(dir_opt.size()>0)*(i+1);j<chain2_list.size();j++)
4996 {
4997 /* parse chain 2 */
4998 if (PDB_lines2.size()==0)
4999 {
5000 yname=chain2_list[j];
5001 ychainnum=get_PDB_lines(yname, PDB_lines2, chainID_list2,
5002 mol_vec2, ter_opt, infmt2_opt, atom_opt, split_opt,
5003 het_opt);
5004 if (!ychainnum)
5005 {
5006 cerr<<"Warning! Cannot parse file: "<<yname
5007 <<". Chain number 0."<<endl;
5008 continue;
5009 }
5010 }
5011 for (chain_j=0;chain_j<ychainnum;chain_j++)
5012 {
5013 ylen=PDB_lines2[chain_j].size();
5014 mol_vec2[chain_j]=-1;
5015 if (!ylen)
5016 {
5017 cerr<<"Warning! Cannot parse file: "<<yname
5018 <<". Chain length 0."<<endl;
5019 continue;
5020 }
5021 else if (ylen<3)
5022 {
5023 cerr<<"Sequence is too short <3!: "<<yname<<endl;
5024 continue;
5025 }
5026 NewArray(&ya, ylen, 3);
5027 seqy = new char[ylen + 1];
5028 secy = new char[ylen + 1];
5029 ylen = read_PDB(PDB_lines2[chain_j], ya, seqy,
5030 resi_vec2, byresi_opt?byresi_opt:o_opt);
5031 make_sec(ya, ylen, secy);
5032 if (byresi_opt) extract_aln_from_resi(sequence,
5033 seqx,seqy,resi_vec1,resi_vec2,byresi_opt);
5034
5035 /* declare variable specific to this pair of TMalign */
5036 double t0[3], u0[3][3];
5037 double TM1, TM2;
5038 double TM3, TM4, TM5; // for a_opt, u_opt, d_opt
5039 double d0_0, TM_0;
5040 double d0A, d0B, d0u, d0a;
5041 double d0_out=5.0;
5042 string seqM, seqxA, seqyA;// for output alignment
5043 double rmsd0 = 0.0;
5044 int L_ali; // Aligned length in standard_TMscore
5045 double Liden=0;
5046 double TM_ali, rmsd_ali; // TMscore and rmsd in standard_TMscore
5047 int n_ali=0;
5048 int n_ali8=0;
5049
5050 /* entry function for structure alignment */
5051 if (cp_opt) CPalign_main(
5052 xa, ya, seqx, seqy, secx, secy,
5053 t0, u0, TM1, TM2, TM3, TM4, TM5,
5054 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
5055 seqM, seqxA, seqyA,
5056 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
5057 xlen, ylen, sequence, Lnorm_ass, d0_scale,
5058 i_opt, a_opt, u_opt, d_opt, fast_opt,
5059 mol_vec1[chain_i]+mol_vec2[chain_j],TMcut);
5060 else TMalign_main(
5061 xa, ya, seqx, seqy, secx, secy,
5062 t0, u0, TM1, TM2, TM3, TM4, TM5,
5063 d0_0, TM_0, d0A, d0B, d0u, d0a, d0_out,
5064 seqM, seqxA, seqyA,
5065 rmsd0, L_ali, Liden, TM_ali, rmsd_ali, n_ali, n_ali8,
5066 xlen, ylen, sequence, Lnorm_ass, d0_scale,
5067 i_opt, a_opt, u_opt, d_opt, fast_opt,
5068 mol_vec1[chain_i]+mol_vec2[chain_j],TMcut);
5069
5070 /* print result */
5071 if (outfmt_opt==0) print_version();
5072 output_results(
5073 xname.substr(dir1_opt.size()),
5074 yname.substr(dir2_opt.size()),
5075 chainID_list1[chain_i].c_str(),
5076 chainID_list2[chain_j].c_str(),
5077 xlen, ylen, t0, u0, TM1, TM2,
5078 TM3, TM4, TM5, rmsd0, d0_out,
5079 seqM.c_str(), seqxA.c_str(), seqyA.c_str(), Liden,
5080 n_ali8, L_ali, TM_ali, rmsd_ali,
5081 TM_0, d0_0, d0A, d0B,
5082 Lnorm_ass, d0_scale, d0a, d0u,
5083 (m_opt?fname_matrix+chainID_list1[chain_i]:"").c_str(),
5084 outfmt_opt, ter_opt,
5085 (o_opt?fname_super+chainID_list1[chain_i]:"").c_str(),
5086 i_opt, a_opt, u_opt, d_opt,mirror_opt,
5087 resi_vec1,resi_vec2);
5088
5089 /* Done! Free memory */
5090 seqM.clear();
5091 seqxA.clear();
5092 seqyA.clear();
5093 DeleteArray(&ya, ylen);
5094 delete [] seqy;
5095 delete [] secy;
5096 resi_vec2.clear();
5097 } // chain_j
5098 if (chain2_list.size()>1)
5099 {
5100 yname.clear();
5101 for (chain_j=0;chain_j<ychainnum;chain_j++)
5102 PDB_lines2[chain_j].clear();
5103 PDB_lines2.clear();
5104 chainID_list2.clear();
5105 mol_vec2.clear();
5106 }
5107 } // j
5108 PDB_lines1[chain_i].clear();
5109 DeleteArray(&xa, xlen);
5110 delete [] seqx;
5111 delete [] secx;
5112 resi_vec1.clear();
5113 } // chain_i
5114 xname.clear();
5115 PDB_lines1.clear();
5116 chainID_list1.clear();
5117 mol_vec1.clear();
5118 } // i
5119 if (chain2_list.size()==1)
5120 {
5121 yname.clear();
5122 for (chain_j=0;chain_j<ychainnum;chain_j++)
5123 PDB_lines2[chain_j].clear();
5124 PDB_lines2.clear();
5125 resi_vec2.clear();
5126 chainID_list2.clear();
5127 mol_vec2.clear();
5128 }
5129 chain1_list.clear();
5130 chain2_list.clear();
5131 sequence.clear();
5132
5133 t2 = clock();
5134 float diff = ((float)t2 - (float)t1)/CLOCKS_PER_SEC;
5135 printf("Total CPU time is %5.2f seconds\n", diff);
5136 return 0;
5137 }