Mercurial > repos > marpiech > norwich_tools_docking
comparison tools/rdock/bin/sdrmsd @ 3:b02d74d22d05 draft default tip
planemo upload
| author | marpiech |
|---|---|
| date | Mon, 29 Aug 2016 08:23:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:bd50f811878f | 3:b02d74d22d05 |
|---|---|
| 1 #! /usr/bin/env python | |
| 2 # | |
| 3 # Calculate SMART RMSD with or without molecular superposition (FIT or NOFIT) | |
| 4 # Script distributed under GNU LGPL 3.0 along rDock software. | |
| 5 # | |
| 6 # This algorithm takes into account molecular automorphism. That is, it identifies | |
| 7 # molecules which are the same but might have atom orders changed and still be able to | |
| 8 # match the pairs and correctly calculate the RMSD. | |
| 9 # | |
| 10 # Author: Daniel Alvarez-Garcia | |
| 11 # Date: 08-11-2013 | |
| 12 | |
| 13 import math | |
| 14 import pybel | |
| 15 import numpy as npy | |
| 16 import optparse | |
| 17 | |
| 18 def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False): | |
| 19 """superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654 | |
| 20 definition : superpose3D(ref, target, weights,refmask,targetmask) | |
| 21 @parameter 1 : ref - xyz coordinates of the reference structure (the ligand for instance) | |
| 22 @type 1 : float64 numpy array (nx3) | |
| 23 --- | |
| 24 @parameter 2 : target - theoretical target positions to which we should move (does not need to be physically relevant. | |
| 25 @type 2 : float 64 numpy array (nx3) | |
| 26 --- | |
| 27 @parameter 3: weights - numpy array of atom weights (usuallly between 0 and 1) | |
| 28 @type 3 : float 64 numpy array (n) | |
| 29 @parameter 4: mask - a numpy boolean mask for designating atoms to include | |
| 30 Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms) | |
| 31 Returns a set of new coordinates, aligned to the target state as well as the rmsd | |
| 32 """ | |
| 33 if weights == None : | |
| 34 weights=1.0 | |
| 35 if refmask == None : | |
| 36 refmask=npy.ones(len(ref),"bool") | |
| 37 if targetmask == None : | |
| 38 targetmask=npy.ones(len(target),"bool") | |
| 39 #first get the centroid of both states | |
| 40 ref_centroid = npy.mean(ref[refmask]*weights,axis=0) | |
| 41 #print ref_centroid | |
| 42 refCenteredCoords=ref-ref_centroid | |
| 43 #print refCenteredCoords | |
| 44 target_centroid=npy.mean(target[targetmask]*weights,axis=0) | |
| 45 targetCenteredCoords=target-target_centroid | |
| 46 #print targetCenteredCoords | |
| 47 #the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm | |
| 48 # Initial residual, see Kabsch. | |
| 49 E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0) | |
| 50 reftmp=npy.copy(refCenteredCoords[refmask]) | |
| 51 targettmp=npy.copy(targetCenteredCoords[targetmask]) | |
| 52 #print refCenteredCoords[refmask] | |
| 53 #single value decomposition of the dotProduct of both position vectors | |
| 54 try: | |
| 55 dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights) | |
| 56 V, S, Wt = npy.linalg.svd(dotProd ) | |
| 57 except Exception: | |
| 58 try: | |
| 59 dotProd = npy.dot( npy.transpose(reftmp), targettmp) | |
| 60 V, S, Wt = npy.linalg.svd(dotProd ) | |
| 61 except Exception: | |
| 62 print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment" | |
| 63 return ref, 0 | |
| 64 # we already have our solution, in the results from SVD. | |
| 65 # we just need to check for reflections and then produce | |
| 66 # the rotation. V and Wt are orthonormal, so their det's | |
| 67 # are +/-1. | |
| 68 reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt)))) | |
| 69 if reflect == -1.0: | |
| 70 S[-1] = -S[-1] | |
| 71 V[:,-1] = -V[:,-1] | |
| 72 rmsd = E0 - (2.0 * sum(S)) | |
| 73 rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) #get the rmsd | |
| 74 #U is simply V*Wt | |
| 75 U = npy.dot(V, Wt) #get the rotation matrix | |
| 76 # rotate and translate the molecule | |
| 77 new_coords = npy.dot((refCenteredCoords), U)+ target_centroid #translate & rotate | |
| 78 #new_coords=(refCenteredCoords + target_centroid) | |
| 79 #print U | |
| 80 if returnRotMat : | |
| 81 return new_coords,rmsd, U | |
| 82 return new_coords,rmsd | |
| 83 | |
| 84 | |
| 85 def squared_distance(coordsA, coordsB): | |
| 86 """Find the squared distance between two 3-tuples""" | |
| 87 sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) ) | |
| 88 return sqrdist | |
| 89 | |
| 90 def rmsd(allcoordsA, allcoordsB): | |
| 91 """Find the RMSD between two lists of 3-tuples""" | |
| 92 deviation = sum(squared_distance(atomA, atomB) for | |
| 93 (atomA, atomB) in zip(allcoordsA, allcoordsB)) | |
| 94 return math.sqrt(deviation / float(len(allcoordsA))) | |
| 95 | |
| 96 def mapToCrystal(xtal, pose): | |
| 97 """Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...) | |
| 98 this will mess up the rmsd calculation with OpenBabel""" | |
| 99 query = pybel.ob.CompileMoleculeQuery(xtal.OBMol) | |
| 100 mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query) | |
| 101 mappingpose = pybel.ob.vvpairUIntUInt() | |
| 102 exit=mapper.MapUnique(pose.OBMol,mappingpose) | |
| 103 return mappingpose[0] | |
| 104 | |
| 105 def parseArguments(): | |
| 106 optparse.OptionParser.format_epilog = lambda self, formatter: self.epilog | |
| 107 epilog = """Args: | |
| 108 reference.sdf SDF file with the reference molecule. | |
| 109 input.sdf SDF file with the molecules to be compared to reference.\n""" | |
| 110 parser = optparse.OptionParser("usage: %prog [options] reference.sdf input.sdf", epilog=epilog) | |
| 111 parser.add_option("-f", "--fit",dest="fit", action="store_true", default=False, | |
| 112 help="Superpose molecules before RMSD calculation") | |
| 113 parser.add_option("--threshold","-t",dest="threshold", action="store", nargs=1, | |
| 114 help="Discard poses with RMSD < THRESHOLD with respect previous poses which where not rejected based on same principle. A Population SDField will be added to output SD with the population number.", type=float) | |
| 115 parser.add_option("-o","--out", dest="outfilename", metavar="FILE", default=False, | |
| 116 help="If declared, write an output SDF file with the input molecules with a new sdfield <RMSD>. If molecule was fitted, the fitted molecule coordinates will be saved.") | |
| 117 (options, args) = parser.parse_args() | |
| 118 | |
| 119 #Check we have two arguments | |
| 120 if len(args) < 2: | |
| 121 parser.error("Incorrect number of arguments. Use -h or --help options to print help.") | |
| 122 | |
| 123 return options, args | |
| 124 | |
| 125 def updateCoords(obmol, newcoords): | |
| 126 "Update OBMol coordinates. newcoords is a numpy array" | |
| 127 for i,atom in enumerate(obmol): | |
| 128 atom.OBAtom.SetVector(*newcoords[i]) | |
| 129 | |
| 130 def getAutomorphRMSD(target, molec, fit=False): | |
| 131 """ | |
| 132 Use Automorphism to reorder target coordinates to match ref coordinates atom order | |
| 133 for correct RMSD comparison. Only the lowest RMSD will be returned. | |
| 134 | |
| 135 Returns: | |
| 136 If fit=False: bestRMSD (float) RMSD of the best matching mapping. | |
| 137 If fit=True: (bestRMSD, molecCoordinates) (float, npy.array) RMSD of best match and its molecule fitted coordinates. | |
| 138 """ | |
| 139 mappings = pybel.ob.vvpairUIntUInt() | |
| 140 bitvec = pybel.ob.OBBitVec() | |
| 141 lookup = [] | |
| 142 for i, atom in enumerate(target): | |
| 143 lookup.append(i) | |
| 144 success = pybel.ob.FindAutomorphisms(target.OBMol, mappings) | |
| 145 targetcoords = [atom.coords for atom in target] | |
| 146 mappose = npy.array(mapToCrystal(target, molec)) | |
| 147 mappose = mappose[npy.argsort(mappose[:,0])][:,1] | |
| 148 posecoords = npy.array([atom.coords for atom in molec])[mappose] | |
| 149 resultrmsd = 999999999999 | |
| 150 for mapping in mappings: | |
| 151 automorph_coords = [None] * len(targetcoords) | |
| 152 for x, y in mapping: | |
| 153 automorph_coords[lookup.index(x)] = targetcoords[lookup.index(y)] | |
| 154 mapping_rmsd = rmsd(posecoords, automorph_coords) | |
| 155 if mapping_rmsd < resultrmsd: | |
| 156 resultrmsd = mapping_rmsd | |
| 157 fitted_result = False | |
| 158 if fit: | |
| 159 fitted_pose, fitted_rmsd = superpose3D(npy.array(automorph_coords), npy.array(posecoords)) | |
| 160 if fitted_rmsd < resultrmsd: | |
| 161 resultrmsd = fitted_rmsd | |
| 162 fitted_result = fitted_pose | |
| 163 | |
| 164 if fit: | |
| 165 return (resultrmsd, fitted_pose) | |
| 166 else: | |
| 167 return resultrmsd | |
| 168 | |
| 169 def saveMolecWithRMSD(outsdf, molec, rmsd, population=False): | |
| 170 newData = pybel.ob.OBPairData() | |
| 171 newData.SetAttribute("RMSD") | |
| 172 newData.SetValue('%.3f'%rmsd) | |
| 173 | |
| 174 if population: | |
| 175 popData = pybel.ob.OBPairData() | |
| 176 popData.SetAttribute("Population") | |
| 177 popData.SetValue('%i'%population) | |
| 178 molec.OBMol.CloneData(popData) | |
| 179 | |
| 180 molec.OBMol.CloneData(newData) # Add new data | |
| 181 outsdf.write(molec) | |
| 182 | |
| 183 if __name__ == "__main__": | |
| 184 import sys, os | |
| 185 | |
| 186 (opts, args) = parseArguments() | |
| 187 | |
| 188 xtal = args[0] | |
| 189 poses = args[1] | |
| 190 | |
| 191 if not os.path.exists(xtal) or not os.path.exists(poses): | |
| 192 sys.exit("Input files not found. Please check the path given is correct.") | |
| 193 | |
| 194 fit = opts.fit | |
| 195 outfname = opts.outfilename | |
| 196 threshold = opts.threshold | |
| 197 | |
| 198 # Read crystal pose | |
| 199 crystal = next(pybel.readfile("sdf", xtal)) | |
| 200 crystal.removeh() | |
| 201 crystalnumatoms = len(crystal.atoms) | |
| 202 | |
| 203 #If outfname is defined, prepare an output SDF sink to write molecules | |
| 204 if outfname: | |
| 205 outsdf = pybel.Outputfile('sdf', outfname, overwrite=True) | |
| 206 | |
| 207 # Find the RMSD between the crystal pose and each docked pose | |
| 208 dockedposes = pybel.readfile("sdf", poses) | |
| 209 if fit: print "POSE\tRMSD_FIT" | |
| 210 else: print "POSE\tRMSD_NOFIT" | |
| 211 skipped = [] | |
| 212 moleclist = {} # Save all poses with their dockid | |
| 213 population = {} # Poses to be written | |
| 214 outlist = {} | |
| 215 for docki, dockedpose in enumerate(dockedposes): | |
| 216 dockedpose.removeh() | |
| 217 natoms = len(dockedpose.atoms) | |
| 218 if natoms != crystalnumatoms: | |
| 219 skipped.append(docki+1) | |
| 220 continue | |
| 221 if fit: | |
| 222 resultrmsd, fitted_result = getAutomorphRMSD(crystal, dockedpose, fit=True) | |
| 223 updateCoords(dockedpose, fitted_result) | |
| 224 else: | |
| 225 resultrmsd = getAutomorphRMSD(crystal, dockedpose, fit=False) | |
| 226 | |
| 227 if threshold: | |
| 228 # Calculate RMSD between all previous poses | |
| 229 # Discard if rmsd < FILTER threshold | |
| 230 if moleclist: | |
| 231 match = None | |
| 232 bestmatchrmsd = 999999 | |
| 233 for did,prevmol in moleclist.iteritems(): | |
| 234 tmprmsd = getAutomorphRMSD(prevmol, dockedpose) | |
| 235 if tmprmsd < threshold: | |
| 236 if tmprmsd < bestmatchrmsd: | |
| 237 bestmatchrmsd = tmprmsd | |
| 238 match = did | |
| 239 | |
| 240 if match != None: | |
| 241 # Do not write this one | |
| 242 # sum one up to the matching previous molecule id | |
| 243 print >> sys.stderr, "Pose %i matches pose %i with %.3f RMSD"%(docki+1, match+1, bestmatchrmsd) | |
| 244 population[match] += 1 | |
| 245 else: | |
| 246 # There's no match. Print info for this one and write to outsdf if needed | |
| 247 # Save this one! | |
| 248 if outfname: outlist[docki] = (dockedpose, resultrmsd) | |
| 249 print "%d\t%.2f"%((docki+1),resultrmsd) | |
| 250 moleclist[docki] = dockedpose | |
| 251 population[docki] = 1 | |
| 252 else: | |
| 253 # First molecule in list. Append for sure | |
| 254 moleclist[docki] = dockedpose | |
| 255 population[docki] = 1 | |
| 256 if outfname: outlist[docki] = (dockedpose, resultrmsd) | |
| 257 else: | |
| 258 # Just write best rmsd found and the molecule to outsdf if demanded | |
| 259 if outfname: saveMolecWithRMSD(outsdf, dockedpose, resultrmsd) | |
| 260 print "%d\t%.2f"%((docki+1),resultrmsd) | |
| 261 | |
| 262 if outlist: | |
| 263 # Threshold applied and outlist need to be written | |
| 264 for docki in sorted(outlist.iterkeys()): | |
| 265 molrmsd = outlist[docki] | |
| 266 # Get number of matchs in thresholding operation | |
| 267 pop = population.get(docki) | |
| 268 if not pop: pop = 1 | |
| 269 # Save molecule | |
| 270 saveMolecWithRMSD(outsdf, molrmsd[0], molrmsd[1], pop) | |
| 271 | |
| 272 if skipped: print >> sys.stderr, "SKIPPED input molecules due to number of atom missmatch: %s"%skipped |
