Mercurial > repos > marpiech > norwich_tools_docking
diff tools/rdock/bin/sdrmsd @ 3:b02d74d22d05 draft default tip
planemo upload
author | marpiech |
---|---|
date | Mon, 29 Aug 2016 08:23:52 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/rdock/bin/sdrmsd Mon Aug 29 08:23:52 2016 -0400 @@ -0,0 +1,272 @@ +#! /usr/bin/env python +# +# Calculate SMART RMSD with or without molecular superposition (FIT or NOFIT) +# Script distributed under GNU LGPL 3.0 along rDock software. +# +# This algorithm takes into account molecular automorphism. That is, it identifies +# molecules which are the same but might have atom orders changed and still be able to +# match the pairs and correctly calculate the RMSD. +# +# Author: Daniel Alvarez-Garcia +# Date: 08-11-2013 + +import math +import pybel +import numpy as npy +import optparse + +def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False): + """superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654 + definition : superpose3D(ref, target, weights,refmask,targetmask) + @parameter 1 : ref - xyz coordinates of the reference structure (the ligand for instance) + @type 1 : float64 numpy array (nx3) + --- + @parameter 2 : target - theoretical target positions to which we should move (does not need to be physically relevant. + @type 2 : float 64 numpy array (nx3) + --- + @parameter 3: weights - numpy array of atom weights (usuallly between 0 and 1) + @type 3 : float 64 numpy array (n) + @parameter 4: mask - a numpy boolean mask for designating atoms to include + Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms) + Returns a set of new coordinates, aligned to the target state as well as the rmsd + """ + if weights == None : + weights=1.0 + if refmask == None : + refmask=npy.ones(len(ref),"bool") + if targetmask == None : + targetmask=npy.ones(len(target),"bool") + #first get the centroid of both states + ref_centroid = npy.mean(ref[refmask]*weights,axis=0) + #print ref_centroid + refCenteredCoords=ref-ref_centroid + #print refCenteredCoords + target_centroid=npy.mean(target[targetmask]*weights,axis=0) + targetCenteredCoords=target-target_centroid + #print targetCenteredCoords + #the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm + # Initial residual, see Kabsch. + E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0) + reftmp=npy.copy(refCenteredCoords[refmask]) + targettmp=npy.copy(targetCenteredCoords[targetmask]) + #print refCenteredCoords[refmask] + #single value decomposition of the dotProduct of both position vectors + try: + dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights) + V, S, Wt = npy.linalg.svd(dotProd ) + except Exception: + try: + dotProd = npy.dot( npy.transpose(reftmp), targettmp) + V, S, Wt = npy.linalg.svd(dotProd ) + except Exception: + print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment" + return ref, 0 + # we already have our solution, in the results from SVD. + # we just need to check for reflections and then produce + # the rotation. V and Wt are orthonormal, so their det's + # are +/-1. + reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt)))) + if reflect == -1.0: + S[-1] = -S[-1] + V[:,-1] = -V[:,-1] + rmsd = E0 - (2.0 * sum(S)) + rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) #get the rmsd + #U is simply V*Wt + U = npy.dot(V, Wt) #get the rotation matrix + # rotate and translate the molecule + new_coords = npy.dot((refCenteredCoords), U)+ target_centroid #translate & rotate + #new_coords=(refCenteredCoords + target_centroid) + #print U + if returnRotMat : + return new_coords,rmsd, U + return new_coords,rmsd + + +def squared_distance(coordsA, coordsB): + """Find the squared distance between two 3-tuples""" + sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) ) + return sqrdist + +def rmsd(allcoordsA, allcoordsB): + """Find the RMSD between two lists of 3-tuples""" + deviation = sum(squared_distance(atomA, atomB) for + (atomA, atomB) in zip(allcoordsA, allcoordsB)) + return math.sqrt(deviation / float(len(allcoordsA))) + +def mapToCrystal(xtal, pose): + """Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...) + this will mess up the rmsd calculation with OpenBabel""" + query = pybel.ob.CompileMoleculeQuery(xtal.OBMol) + mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query) + mappingpose = pybel.ob.vvpairUIntUInt() + exit=mapper.MapUnique(pose.OBMol,mappingpose) + return mappingpose[0] + +def parseArguments(): + optparse.OptionParser.format_epilog = lambda self, formatter: self.epilog + epilog = """Args: + reference.sdf SDF file with the reference molecule. + input.sdf SDF file with the molecules to be compared to reference.\n""" + parser = optparse.OptionParser("usage: %prog [options] reference.sdf input.sdf", epilog=epilog) + parser.add_option("-f", "--fit",dest="fit", action="store_true", default=False, + help="Superpose molecules before RMSD calculation") + parser.add_option("--threshold","-t",dest="threshold", action="store", nargs=1, + help="Discard poses with RMSD < THRESHOLD with respect previous poses which where not rejected based on same principle. A Population SDField will be added to output SD with the population number.", type=float) + parser.add_option("-o","--out", dest="outfilename", metavar="FILE", default=False, + help="If declared, write an output SDF file with the input molecules with a new sdfield <RMSD>. If molecule was fitted, the fitted molecule coordinates will be saved.") + (options, args) = parser.parse_args() + + #Check we have two arguments + if len(args) < 2: + parser.error("Incorrect number of arguments. Use -h or --help options to print help.") + + return options, args + +def updateCoords(obmol, newcoords): + "Update OBMol coordinates. newcoords is a numpy array" + for i,atom in enumerate(obmol): + atom.OBAtom.SetVector(*newcoords[i]) + +def getAutomorphRMSD(target, molec, fit=False): + """ + Use Automorphism to reorder target coordinates to match ref coordinates atom order + for correct RMSD comparison. Only the lowest RMSD will be returned. + + Returns: + If fit=False: bestRMSD (float) RMSD of the best matching mapping. + If fit=True: (bestRMSD, molecCoordinates) (float, npy.array) RMSD of best match and its molecule fitted coordinates. + """ + mappings = pybel.ob.vvpairUIntUInt() + bitvec = pybel.ob.OBBitVec() + lookup = [] + for i, atom in enumerate(target): + lookup.append(i) + success = pybel.ob.FindAutomorphisms(target.OBMol, mappings) + targetcoords = [atom.coords for atom in target] + mappose = npy.array(mapToCrystal(target, molec)) + mappose = mappose[npy.argsort(mappose[:,0])][:,1] + posecoords = npy.array([atom.coords for atom in molec])[mappose] + resultrmsd = 999999999999 + for mapping in mappings: + automorph_coords = [None] * len(targetcoords) + for x, y in mapping: + automorph_coords[lookup.index(x)] = targetcoords[lookup.index(y)] + mapping_rmsd = rmsd(posecoords, automorph_coords) + if mapping_rmsd < resultrmsd: + resultrmsd = mapping_rmsd + fitted_result = False + if fit: + fitted_pose, fitted_rmsd = superpose3D(npy.array(automorph_coords), npy.array(posecoords)) + if fitted_rmsd < resultrmsd: + resultrmsd = fitted_rmsd + fitted_result = fitted_pose + + if fit: + return (resultrmsd, fitted_pose) + else: + return resultrmsd + +def saveMolecWithRMSD(outsdf, molec, rmsd, population=False): + newData = pybel.ob.OBPairData() + newData.SetAttribute("RMSD") + newData.SetValue('%.3f'%rmsd) + + if population: + popData = pybel.ob.OBPairData() + popData.SetAttribute("Population") + popData.SetValue('%i'%population) + molec.OBMol.CloneData(popData) + + molec.OBMol.CloneData(newData) # Add new data + outsdf.write(molec) + +if __name__ == "__main__": + import sys, os + + (opts, args) = parseArguments() + + xtal = args[0] + poses = args[1] + + if not os.path.exists(xtal) or not os.path.exists(poses): + sys.exit("Input files not found. Please check the path given is correct.") + + fit = opts.fit + outfname = opts.outfilename + threshold = opts.threshold + + # Read crystal pose + crystal = next(pybel.readfile("sdf", xtal)) + crystal.removeh() + crystalnumatoms = len(crystal.atoms) + + #If outfname is defined, prepare an output SDF sink to write molecules + if outfname: + outsdf = pybel.Outputfile('sdf', outfname, overwrite=True) + + # Find the RMSD between the crystal pose and each docked pose + dockedposes = pybel.readfile("sdf", poses) + if fit: print "POSE\tRMSD_FIT" + else: print "POSE\tRMSD_NOFIT" + skipped = [] + moleclist = {} # Save all poses with their dockid + population = {} # Poses to be written + outlist = {} + for docki, dockedpose in enumerate(dockedposes): + dockedpose.removeh() + natoms = len(dockedpose.atoms) + if natoms != crystalnumatoms: + skipped.append(docki+1) + continue + if fit: + resultrmsd, fitted_result = getAutomorphRMSD(crystal, dockedpose, fit=True) + updateCoords(dockedpose, fitted_result) + else: + resultrmsd = getAutomorphRMSD(crystal, dockedpose, fit=False) + + if threshold: + # Calculate RMSD between all previous poses + # Discard if rmsd < FILTER threshold + if moleclist: + match = None + bestmatchrmsd = 999999 + for did,prevmol in moleclist.iteritems(): + tmprmsd = getAutomorphRMSD(prevmol, dockedpose) + if tmprmsd < threshold: + if tmprmsd < bestmatchrmsd: + bestmatchrmsd = tmprmsd + match = did + + if match != None: + # Do not write this one + # sum one up to the matching previous molecule id + print >> sys.stderr, "Pose %i matches pose %i with %.3f RMSD"%(docki+1, match+1, bestmatchrmsd) + population[match] += 1 + else: + # There's no match. Print info for this one and write to outsdf if needed + # Save this one! + if outfname: outlist[docki] = (dockedpose, resultrmsd) + print "%d\t%.2f"%((docki+1),resultrmsd) + moleclist[docki] = dockedpose + population[docki] = 1 + else: + # First molecule in list. Append for sure + moleclist[docki] = dockedpose + population[docki] = 1 + if outfname: outlist[docki] = (dockedpose, resultrmsd) + else: + # Just write best rmsd found and the molecule to outsdf if demanded + if outfname: saveMolecWithRMSD(outsdf, dockedpose, resultrmsd) + print "%d\t%.2f"%((docki+1),resultrmsd) + + if outlist: + # Threshold applied and outlist need to be written + for docki in sorted(outlist.iterkeys()): + molrmsd = outlist[docki] + # Get number of matchs in thresholding operation + pop = population.get(docki) + if not pop: pop = 1 + # Save molecule + saveMolecWithRMSD(outsdf, molrmsd[0], molrmsd[1], pop) + + if skipped: print >> sys.stderr, "SKIPPED input molecules due to number of atom missmatch: %s"%skipped