annotate galaxy-tools/tools/rdock/bin/sdrmsd @ 0:4eb3f9cb2a51 draft

Uploaded
author dzesikah
date Fri, 26 Aug 2016 09:53:37 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
1 #! /usr/bin/env python
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
2 #
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
3 # Calculate SMART RMSD with or without molecular superposition (FIT or NOFIT)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
4 # Script distributed under GNU LGPL 3.0 along rDock software.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
5 #
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
6 # This algorithm takes into account molecular automorphism. That is, it identifies
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
7 # molecules which are the same but might have atom orders changed and still be able to
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
8 # match the pairs and correctly calculate the RMSD.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
9 #
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
10 # Author: Daniel Alvarez-Garcia
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
11 # Date: 08-11-2013
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
12
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
13 import math
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
14 import pybel
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
15 import numpy as npy
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
16 import optparse
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
17
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
18 def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
19 """superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
20 definition : superpose3D(ref, target, weights,refmask,targetmask)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
21 @parameter 1 : ref - xyz coordinates of the reference structure (the ligand for instance)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
22 @type 1 : float64 numpy array (nx3)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
23 ---
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
24 @parameter 2 : target - theoretical target positions to which we should move (does not need to be physically relevant.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
25 @type 2 : float 64 numpy array (nx3)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
26 ---
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
27 @parameter 3: weights - numpy array of atom weights (usuallly between 0 and 1)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
28 @type 3 : float 64 numpy array (n)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
29 @parameter 4: mask - a numpy boolean mask for designating atoms to include
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
30 Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
31 Returns a set of new coordinates, aligned to the target state as well as the rmsd
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
32 """
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
33 if weights == None :
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
34 weights=1.0
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
35 if refmask == None :
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
36 refmask=npy.ones(len(ref),"bool")
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
37 if targetmask == None :
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
38 targetmask=npy.ones(len(target),"bool")
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
39 #first get the centroid of both states
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
40 ref_centroid = npy.mean(ref[refmask]*weights,axis=0)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
41 #print ref_centroid
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
42 refCenteredCoords=ref-ref_centroid
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
43 #print refCenteredCoords
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
44 target_centroid=npy.mean(target[targetmask]*weights,axis=0)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
45 targetCenteredCoords=target-target_centroid
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
46 #print targetCenteredCoords
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
47 #the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
48 # Initial residual, see Kabsch.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
49 E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
50 reftmp=npy.copy(refCenteredCoords[refmask])
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
51 targettmp=npy.copy(targetCenteredCoords[targetmask])
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
52 #print refCenteredCoords[refmask]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
53 #single value decomposition of the dotProduct of both position vectors
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
54 try:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
55 dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
56 V, S, Wt = npy.linalg.svd(dotProd )
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
57 except Exception:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
58 try:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
59 dotProd = npy.dot( npy.transpose(reftmp), targettmp)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
60 V, S, Wt = npy.linalg.svd(dotProd )
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
61 except Exception:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
62 print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment"
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
63 return ref, 0
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
64 # we already have our solution, in the results from SVD.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
65 # we just need to check for reflections and then produce
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
66 # the rotation. V and Wt are orthonormal, so their det's
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
67 # are +/-1.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
68 reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt))))
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
69 if reflect == -1.0:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
70 S[-1] = -S[-1]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
71 V[:,-1] = -V[:,-1]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
72 rmsd = E0 - (2.0 * sum(S))
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
73 rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) #get the rmsd
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
74 #U is simply V*Wt
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
75 U = npy.dot(V, Wt) #get the rotation matrix
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
76 # rotate and translate the molecule
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
77 new_coords = npy.dot((refCenteredCoords), U)+ target_centroid #translate & rotate
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
78 #new_coords=(refCenteredCoords + target_centroid)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
79 #print U
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
80 if returnRotMat :
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
81 return new_coords,rmsd, U
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
82 return new_coords,rmsd
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
83
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
84
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
85 def squared_distance(coordsA, coordsB):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
86 """Find the squared distance between two 3-tuples"""
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
87 sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) )
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
88 return sqrdist
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
89
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
90 def rmsd(allcoordsA, allcoordsB):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
91 """Find the RMSD between two lists of 3-tuples"""
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
92 deviation = sum(squared_distance(atomA, atomB) for
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
93 (atomA, atomB) in zip(allcoordsA, allcoordsB))
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
94 return math.sqrt(deviation / float(len(allcoordsA)))
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
95
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
96 def mapToCrystal(xtal, pose):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
97 """Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
98 this will mess up the rmsd calculation with OpenBabel"""
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
99 query = pybel.ob.CompileMoleculeQuery(xtal.OBMol)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
100 mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
101 mappingpose = pybel.ob.vvpairUIntUInt()
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
102 exit=mapper.MapUnique(pose.OBMol,mappingpose)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
103 return mappingpose[0]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
104
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
105 def parseArguments():
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
106 optparse.OptionParser.format_epilog = lambda self, formatter: self.epilog
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
107 epilog = """Args:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
108 reference.sdf SDF file with the reference molecule.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
109 input.sdf SDF file with the molecules to be compared to reference.\n"""
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
110 parser = optparse.OptionParser("usage: %prog [options] reference.sdf input.sdf", epilog=epilog)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
111 parser.add_option("-f", "--fit",dest="fit", action="store_true", default=False,
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
112 help="Superpose molecules before RMSD calculation")
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
113 parser.add_option("--threshold","-t",dest="threshold", action="store", nargs=1,
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
114 help="Discard poses with RMSD < THRESHOLD with respect previous poses which where not rejected based on same principle. A Population SDField will be added to output SD with the population number.", type=float)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
115 parser.add_option("-o","--out", dest="outfilename", metavar="FILE", default=False,
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
116 help="If declared, write an output SDF file with the input molecules with a new sdfield <RMSD>. If molecule was fitted, the fitted molecule coordinates will be saved.")
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
117 (options, args) = parser.parse_args()
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
118
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
119 #Check we have two arguments
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
120 if len(args) < 2:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
121 parser.error("Incorrect number of arguments. Use -h or --help options to print help.")
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
122
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
123 return options, args
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
124
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
125 def updateCoords(obmol, newcoords):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
126 "Update OBMol coordinates. newcoords is a numpy array"
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
127 for i,atom in enumerate(obmol):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
128 atom.OBAtom.SetVector(*newcoords[i])
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
129
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
130 def getAutomorphRMSD(target, molec, fit=False):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
131 """
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
132 Use Automorphism to reorder target coordinates to match ref coordinates atom order
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
133 for correct RMSD comparison. Only the lowest RMSD will be returned.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
134
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
135 Returns:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
136 If fit=False: bestRMSD (float) RMSD of the best matching mapping.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
137 If fit=True: (bestRMSD, molecCoordinates) (float, npy.array) RMSD of best match and its molecule fitted coordinates.
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
138 """
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
139 mappings = pybel.ob.vvpairUIntUInt()
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
140 bitvec = pybel.ob.OBBitVec()
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
141 lookup = []
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
142 for i, atom in enumerate(target):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
143 lookup.append(i)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
144 success = pybel.ob.FindAutomorphisms(target.OBMol, mappings)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
145 targetcoords = [atom.coords for atom in target]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
146 mappose = npy.array(mapToCrystal(target, molec))
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
147 mappose = mappose[npy.argsort(mappose[:,0])][:,1]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
148 posecoords = npy.array([atom.coords for atom in molec])[mappose]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
149 resultrmsd = 999999999999
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
150 for mapping in mappings:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
151 automorph_coords = [None] * len(targetcoords)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
152 for x, y in mapping:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
153 automorph_coords[lookup.index(x)] = targetcoords[lookup.index(y)]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
154 mapping_rmsd = rmsd(posecoords, automorph_coords)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
155 if mapping_rmsd < resultrmsd:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
156 resultrmsd = mapping_rmsd
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
157 fitted_result = False
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
158 if fit:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
159 fitted_pose, fitted_rmsd = superpose3D(npy.array(automorph_coords), npy.array(posecoords))
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
160 if fitted_rmsd < resultrmsd:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
161 resultrmsd = fitted_rmsd
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
162 fitted_result = fitted_pose
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
163
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
164 if fit:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
165 return (resultrmsd, fitted_pose)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
166 else:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
167 return resultrmsd
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
168
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
169 def saveMolecWithRMSD(outsdf, molec, rmsd, population=False):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
170 newData = pybel.ob.OBPairData()
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
171 newData.SetAttribute("RMSD")
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
172 newData.SetValue('%.3f'%rmsd)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
173
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
174 if population:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
175 popData = pybel.ob.OBPairData()
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
176 popData.SetAttribute("Population")
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
177 popData.SetValue('%i'%population)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
178 molec.OBMol.CloneData(popData)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
179
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
180 molec.OBMol.CloneData(newData) # Add new data
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
181 outsdf.write(molec)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
182
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
183 if __name__ == "__main__":
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
184 import sys, os
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
185
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
186 (opts, args) = parseArguments()
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
187
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
188 xtal = args[0]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
189 poses = args[1]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
190
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
191 if not os.path.exists(xtal) or not os.path.exists(poses):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
192 sys.exit("Input files not found. Please check the path given is correct.")
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
193
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
194 fit = opts.fit
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
195 outfname = opts.outfilename
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
196 threshold = opts.threshold
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
197
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
198 # Read crystal pose
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
199 crystal = next(pybel.readfile("sdf", xtal))
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
200 crystal.removeh()
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
201 crystalnumatoms = len(crystal.atoms)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
202
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
203 #If outfname is defined, prepare an output SDF sink to write molecules
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
204 if outfname:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
205 outsdf = pybel.Outputfile('sdf', outfname, overwrite=True)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
206
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
207 # Find the RMSD between the crystal pose and each docked pose
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
208 dockedposes = pybel.readfile("sdf", poses)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
209 if fit: print "POSE\tRMSD_FIT"
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
210 else: print "POSE\tRMSD_NOFIT"
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
211 skipped = []
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
212 moleclist = {} # Save all poses with their dockid
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
213 population = {} # Poses to be written
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
214 outlist = {}
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
215 for docki, dockedpose in enumerate(dockedposes):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
216 dockedpose.removeh()
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
217 natoms = len(dockedpose.atoms)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
218 if natoms != crystalnumatoms:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
219 skipped.append(docki+1)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
220 continue
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
221 if fit:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
222 resultrmsd, fitted_result = getAutomorphRMSD(crystal, dockedpose, fit=True)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
223 updateCoords(dockedpose, fitted_result)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
224 else:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
225 resultrmsd = getAutomorphRMSD(crystal, dockedpose, fit=False)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
226
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
227 if threshold:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
228 # Calculate RMSD between all previous poses
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
229 # Discard if rmsd < FILTER threshold
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
230 if moleclist:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
231 match = None
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
232 bestmatchrmsd = 999999
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
233 for did,prevmol in moleclist.iteritems():
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
234 tmprmsd = getAutomorphRMSD(prevmol, dockedpose)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
235 if tmprmsd < threshold:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
236 if tmprmsd < bestmatchrmsd:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
237 bestmatchrmsd = tmprmsd
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
238 match = did
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
239
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
240 if match != None:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
241 # Do not write this one
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
242 # sum one up to the matching previous molecule id
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
243 print >> sys.stderr, "Pose %i matches pose %i with %.3f RMSD"%(docki+1, match+1, bestmatchrmsd)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
244 population[match] += 1
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
245 else:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
246 # There's no match. Print info for this one and write to outsdf if needed
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
247 # Save this one!
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
248 if outfname: outlist[docki] = (dockedpose, resultrmsd)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
249 print "%d\t%.2f"%((docki+1),resultrmsd)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
250 moleclist[docki] = dockedpose
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
251 population[docki] = 1
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
252 else:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
253 # First molecule in list. Append for sure
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
254 moleclist[docki] = dockedpose
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
255 population[docki] = 1
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
256 if outfname: outlist[docki] = (dockedpose, resultrmsd)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
257 else:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
258 # Just write best rmsd found and the molecule to outsdf if demanded
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
259 if outfname: saveMolecWithRMSD(outsdf, dockedpose, resultrmsd)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
260 print "%d\t%.2f"%((docki+1),resultrmsd)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
261
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
262 if outlist:
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
263 # Threshold applied and outlist need to be written
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
264 for docki in sorted(outlist.iterkeys()):
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
265 molrmsd = outlist[docki]
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
266 # Get number of matchs in thresholding operation
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
267 pop = population.get(docki)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
268 if not pop: pop = 1
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
269 # Save molecule
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
270 saveMolecWithRMSD(outsdf, molrmsd[0], molrmsd[1], pop)
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
271
4eb3f9cb2a51 Uploaded
dzesikah
parents:
diff changeset
272 if skipped: print >> sys.stderr, "SKIPPED input molecules due to number of atom missmatch: %s"%skipped