Mercurial > repos > marpiech > norwich_tools
view tools/rdock/bin/sdrmsd @ 0:bc03dbb6eb37 draft
planemo upload commit 781926e52355f7805db8d9a4ccafeff397b19aa4-dirty
author | marpiech |
---|---|
date | Mon, 29 Aug 2016 03:38:13 -0400 |
parents | |
children |
line wrap: on
line source
#! /usr/bin/env python # # Calculate SMART RMSD with or without molecular superposition (FIT or NOFIT) # Script distributed under GNU LGPL 3.0 along rDock software. # # This algorithm takes into account molecular automorphism. That is, it identifies # molecules which are the same but might have atom orders changed and still be able to # match the pairs and correctly calculate the RMSD. # # Author: Daniel Alvarez-Garcia # Date: 08-11-2013 import math import pybel import numpy as npy import optparse def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False): """superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654 definition : superpose3D(ref, target, weights,refmask,targetmask) @parameter 1 : ref - xyz coordinates of the reference structure (the ligand for instance) @type 1 : float64 numpy array (nx3) --- @parameter 2 : target - theoretical target positions to which we should move (does not need to be physically relevant. @type 2 : float 64 numpy array (nx3) --- @parameter 3: weights - numpy array of atom weights (usuallly between 0 and 1) @type 3 : float 64 numpy array (n) @parameter 4: mask - a numpy boolean mask for designating atoms to include Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms) Returns a set of new coordinates, aligned to the target state as well as the rmsd """ if weights == None : weights=1.0 if refmask == None : refmask=npy.ones(len(ref),"bool") if targetmask == None : targetmask=npy.ones(len(target),"bool") #first get the centroid of both states ref_centroid = npy.mean(ref[refmask]*weights,axis=0) #print ref_centroid refCenteredCoords=ref-ref_centroid #print refCenteredCoords target_centroid=npy.mean(target[targetmask]*weights,axis=0) targetCenteredCoords=target-target_centroid #print targetCenteredCoords #the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm # Initial residual, see Kabsch. E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0) reftmp=npy.copy(refCenteredCoords[refmask]) targettmp=npy.copy(targetCenteredCoords[targetmask]) #print refCenteredCoords[refmask] #single value decomposition of the dotProduct of both position vectors try: dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights) V, S, Wt = npy.linalg.svd(dotProd ) except Exception: try: dotProd = npy.dot( npy.transpose(reftmp), targettmp) V, S, Wt = npy.linalg.svd(dotProd ) except Exception: print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment" return ref, 0 # we already have our solution, in the results from SVD. # we just need to check for reflections and then produce # the rotation. V and Wt are orthonormal, so their det's # are +/-1. reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt)))) if reflect == -1.0: S[-1] = -S[-1] V[:,-1] = -V[:,-1] rmsd = E0 - (2.0 * sum(S)) rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) #get the rmsd #U is simply V*Wt U = npy.dot(V, Wt) #get the rotation matrix # rotate and translate the molecule new_coords = npy.dot((refCenteredCoords), U)+ target_centroid #translate & rotate #new_coords=(refCenteredCoords + target_centroid) #print U if returnRotMat : return new_coords,rmsd, U return new_coords,rmsd def squared_distance(coordsA, coordsB): """Find the squared distance between two 3-tuples""" sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) ) return sqrdist def rmsd(allcoordsA, allcoordsB): """Find the RMSD between two lists of 3-tuples""" deviation = sum(squared_distance(atomA, atomB) for (atomA, atomB) in zip(allcoordsA, allcoordsB)) return math.sqrt(deviation / float(len(allcoordsA))) def mapToCrystal(xtal, pose): """Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...) this will mess up the rmsd calculation with OpenBabel""" query = pybel.ob.CompileMoleculeQuery(xtal.OBMol) mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query) mappingpose = pybel.ob.vvpairUIntUInt() exit=mapper.MapUnique(pose.OBMol,mappingpose) return mappingpose[0] def parseArguments(): optparse.OptionParser.format_epilog = lambda self, formatter: self.epilog epilog = """Args: reference.sdf SDF file with the reference molecule. input.sdf SDF file with the molecules to be compared to reference.\n""" parser = optparse.OptionParser("usage: %prog [options] reference.sdf input.sdf", epilog=epilog) parser.add_option("-f", "--fit",dest="fit", action="store_true", default=False, help="Superpose molecules before RMSD calculation") parser.add_option("--threshold","-t",dest="threshold", action="store", nargs=1, help="Discard poses with RMSD < THRESHOLD with respect previous poses which where not rejected based on same principle. A Population SDField will be added to output SD with the population number.", type=float) parser.add_option("-o","--out", dest="outfilename", metavar="FILE", default=False, help="If declared, write an output SDF file with the input molecules with a new sdfield <RMSD>. If molecule was fitted, the fitted molecule coordinates will be saved.") (options, args) = parser.parse_args() #Check we have two arguments if len(args) < 2: parser.error("Incorrect number of arguments. Use -h or --help options to print help.") return options, args def updateCoords(obmol, newcoords): "Update OBMol coordinates. newcoords is a numpy array" for i,atom in enumerate(obmol): atom.OBAtom.SetVector(*newcoords[i]) def getAutomorphRMSD(target, molec, fit=False): """ Use Automorphism to reorder target coordinates to match ref coordinates atom order for correct RMSD comparison. Only the lowest RMSD will be returned. Returns: If fit=False: bestRMSD (float) RMSD of the best matching mapping. If fit=True: (bestRMSD, molecCoordinates) (float, npy.array) RMSD of best match and its molecule fitted coordinates. """ mappings = pybel.ob.vvpairUIntUInt() bitvec = pybel.ob.OBBitVec() lookup = [] for i, atom in enumerate(target): lookup.append(i) success = pybel.ob.FindAutomorphisms(target.OBMol, mappings) targetcoords = [atom.coords for atom in target] mappose = npy.array(mapToCrystal(target, molec)) mappose = mappose[npy.argsort(mappose[:,0])][:,1] posecoords = npy.array([atom.coords for atom in molec])[mappose] resultrmsd = 999999999999 for mapping in mappings: automorph_coords = [None] * len(targetcoords) for x, y in mapping: automorph_coords[lookup.index(x)] = targetcoords[lookup.index(y)] mapping_rmsd = rmsd(posecoords, automorph_coords) if mapping_rmsd < resultrmsd: resultrmsd = mapping_rmsd fitted_result = False if fit: fitted_pose, fitted_rmsd = superpose3D(npy.array(automorph_coords), npy.array(posecoords)) if fitted_rmsd < resultrmsd: resultrmsd = fitted_rmsd fitted_result = fitted_pose if fit: return (resultrmsd, fitted_pose) else: return resultrmsd def saveMolecWithRMSD(outsdf, molec, rmsd, population=False): newData = pybel.ob.OBPairData() newData.SetAttribute("RMSD") newData.SetValue('%.3f'%rmsd) if population: popData = pybel.ob.OBPairData() popData.SetAttribute("Population") popData.SetValue('%i'%population) molec.OBMol.CloneData(popData) molec.OBMol.CloneData(newData) # Add new data outsdf.write(molec) if __name__ == "__main__": import sys, os (opts, args) = parseArguments() xtal = args[0] poses = args[1] if not os.path.exists(xtal) or not os.path.exists(poses): sys.exit("Input files not found. Please check the path given is correct.") fit = opts.fit outfname = opts.outfilename threshold = opts.threshold # Read crystal pose crystal = next(pybel.readfile("sdf", xtal)) crystal.removeh() crystalnumatoms = len(crystal.atoms) #If outfname is defined, prepare an output SDF sink to write molecules if outfname: outsdf = pybel.Outputfile('sdf', outfname, overwrite=True) # Find the RMSD between the crystal pose and each docked pose dockedposes = pybel.readfile("sdf", poses) if fit: print "POSE\tRMSD_FIT" else: print "POSE\tRMSD_NOFIT" skipped = [] moleclist = {} # Save all poses with their dockid population = {} # Poses to be written outlist = {} for docki, dockedpose in enumerate(dockedposes): dockedpose.removeh() natoms = len(dockedpose.atoms) if natoms != crystalnumatoms: skipped.append(docki+1) continue if fit: resultrmsd, fitted_result = getAutomorphRMSD(crystal, dockedpose, fit=True) updateCoords(dockedpose, fitted_result) else: resultrmsd = getAutomorphRMSD(crystal, dockedpose, fit=False) if threshold: # Calculate RMSD between all previous poses # Discard if rmsd < FILTER threshold if moleclist: match = None bestmatchrmsd = 999999 for did,prevmol in moleclist.iteritems(): tmprmsd = getAutomorphRMSD(prevmol, dockedpose) if tmprmsd < threshold: if tmprmsd < bestmatchrmsd: bestmatchrmsd = tmprmsd match = did if match != None: # Do not write this one # sum one up to the matching previous molecule id print >> sys.stderr, "Pose %i matches pose %i with %.3f RMSD"%(docki+1, match+1, bestmatchrmsd) population[match] += 1 else: # There's no match. Print info for this one and write to outsdf if needed # Save this one! if outfname: outlist[docki] = (dockedpose, resultrmsd) print "%d\t%.2f"%((docki+1),resultrmsd) moleclist[docki] = dockedpose population[docki] = 1 else: # First molecule in list. Append for sure moleclist[docki] = dockedpose population[docki] = 1 if outfname: outlist[docki] = (dockedpose, resultrmsd) else: # Just write best rmsd found and the molecule to outsdf if demanded if outfname: saveMolecWithRMSD(outsdf, dockedpose, resultrmsd) print "%d\t%.2f"%((docki+1),resultrmsd) if outlist: # Threshold applied and outlist need to be written for docki in sorted(outlist.iterkeys()): molrmsd = outlist[docki] # Get number of matchs in thresholding operation pop = population.get(docki) if not pop: pop = 1 # Save molecule saveMolecWithRMSD(outsdf, molrmsd[0], molrmsd[1], pop) if skipped: print >> sys.stderr, "SKIPPED input molecules due to number of atom missmatch: %s"%skipped