Mercurial > repos > marpiech > norwich_docking_tools
diff galaxy-tools/tools/rdock/bin/sdrmsd @ 2:0faa03a92843 draft default tip
Uploaded
author | dzesikah |
---|---|
date | Fri, 26 Aug 2016 10:19:49 -0400 (2016-08-26) |
parents | ad4bc82457e5 |
children |
line wrap: on
line diff
--- a/galaxy-tools/tools/rdock/bin/sdrmsd Fri Aug 26 09:55:15 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,272 +0,0 @@ -#! /usr/bin/env python -# -# Calculate SMART RMSD with or without molecular superposition (FIT or NOFIT) -# Script distributed under GNU LGPL 3.0 along rDock software. -# -# This algorithm takes into account molecular automorphism. That is, it identifies -# molecules which are the same but might have atom orders changed and still be able to -# match the pairs and correctly calculate the RMSD. -# -# Author: Daniel Alvarez-Garcia -# Date: 08-11-2013 - -import math -import pybel -import numpy as npy -import optparse - -def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False): - """superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654 - definition : superpose3D(ref, target, weights,refmask,targetmask) - @parameter 1 : ref - xyz coordinates of the reference structure (the ligand for instance) - @type 1 : float64 numpy array (nx3) - --- - @parameter 2 : target - theoretical target positions to which we should move (does not need to be physically relevant. - @type 2 : float 64 numpy array (nx3) - --- - @parameter 3: weights - numpy array of atom weights (usuallly between 0 and 1) - @type 3 : float 64 numpy array (n) - @parameter 4: mask - a numpy boolean mask for designating atoms to include - Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms) - Returns a set of new coordinates, aligned to the target state as well as the rmsd - """ - if weights == None : - weights=1.0 - if refmask == None : - refmask=npy.ones(len(ref),"bool") - if targetmask == None : - targetmask=npy.ones(len(target),"bool") - #first get the centroid of both states - ref_centroid = npy.mean(ref[refmask]*weights,axis=0) - #print ref_centroid - refCenteredCoords=ref-ref_centroid - #print refCenteredCoords - target_centroid=npy.mean(target[targetmask]*weights,axis=0) - targetCenteredCoords=target-target_centroid - #print targetCenteredCoords - #the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm - # Initial residual, see Kabsch. - E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0) - reftmp=npy.copy(refCenteredCoords[refmask]) - targettmp=npy.copy(targetCenteredCoords[targetmask]) - #print refCenteredCoords[refmask] - #single value decomposition of the dotProduct of both position vectors - try: - dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights) - V, S, Wt = npy.linalg.svd(dotProd ) - except Exception: - try: - dotProd = npy.dot( npy.transpose(reftmp), targettmp) - V, S, Wt = npy.linalg.svd(dotProd ) - except Exception: - print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment" - return ref, 0 - # we already have our solution, in the results from SVD. - # we just need to check for reflections and then produce - # the rotation. V and Wt are orthonormal, so their det's - # are +/-1. - reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt)))) - if reflect == -1.0: - S[-1] = -S[-1] - V[:,-1] = -V[:,-1] - rmsd = E0 - (2.0 * sum(S)) - rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) #get the rmsd - #U is simply V*Wt - U = npy.dot(V, Wt) #get the rotation matrix - # rotate and translate the molecule - new_coords = npy.dot((refCenteredCoords), U)+ target_centroid #translate & rotate - #new_coords=(refCenteredCoords + target_centroid) - #print U - if returnRotMat : - return new_coords,rmsd, U - return new_coords,rmsd - - -def squared_distance(coordsA, coordsB): - """Find the squared distance between two 3-tuples""" - sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) ) - return sqrdist - -def rmsd(allcoordsA, allcoordsB): - """Find the RMSD between two lists of 3-tuples""" - deviation = sum(squared_distance(atomA, atomB) for - (atomA, atomB) in zip(allcoordsA, allcoordsB)) - return math.sqrt(deviation / float(len(allcoordsA))) - -def mapToCrystal(xtal, pose): - """Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...) - this will mess up the rmsd calculation with OpenBabel""" - query = pybel.ob.CompileMoleculeQuery(xtal.OBMol) - mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query) - mappingpose = pybel.ob.vvpairUIntUInt() - exit=mapper.MapUnique(pose.OBMol,mappingpose) - return mappingpose[0] - -def parseArguments(): - optparse.OptionParser.format_epilog = lambda self, formatter: self.epilog - epilog = """Args: - reference.sdf SDF file with the reference molecule. - input.sdf SDF file with the molecules to be compared to reference.\n""" - parser = optparse.OptionParser("usage: %prog [options] reference.sdf input.sdf", epilog=epilog) - parser.add_option("-f", "--fit",dest="fit", action="store_true", default=False, - help="Superpose molecules before RMSD calculation") - parser.add_option("--threshold","-t",dest="threshold", action="store", nargs=1, - help="Discard poses with RMSD < THRESHOLD with respect previous poses which where not rejected based on same principle. A Population SDField will be added to output SD with the population number.", type=float) - parser.add_option("-o","--out", dest="outfilename", metavar="FILE", default=False, - help="If declared, write an output SDF file with the input molecules with a new sdfield <RMSD>. If molecule was fitted, the fitted molecule coordinates will be saved.") - (options, args) = parser.parse_args() - - #Check we have two arguments - if len(args) < 2: - parser.error("Incorrect number of arguments. Use -h or --help options to print help.") - - return options, args - -def updateCoords(obmol, newcoords): - "Update OBMol coordinates. newcoords is a numpy array" - for i,atom in enumerate(obmol): - atom.OBAtom.SetVector(*newcoords[i]) - -def getAutomorphRMSD(target, molec, fit=False): - """ - Use Automorphism to reorder target coordinates to match ref coordinates atom order - for correct RMSD comparison. Only the lowest RMSD will be returned. - - Returns: - If fit=False: bestRMSD (float) RMSD of the best matching mapping. - If fit=True: (bestRMSD, molecCoordinates) (float, npy.array) RMSD of best match and its molecule fitted coordinates. - """ - mappings = pybel.ob.vvpairUIntUInt() - bitvec = pybel.ob.OBBitVec() - lookup = [] - for i, atom in enumerate(target): - lookup.append(i) - success = pybel.ob.FindAutomorphisms(target.OBMol, mappings) - targetcoords = [atom.coords for atom in target] - mappose = npy.array(mapToCrystal(target, molec)) - mappose = mappose[npy.argsort(mappose[:,0])][:,1] - posecoords = npy.array([atom.coords for atom in molec])[mappose] - resultrmsd = 999999999999 - for mapping in mappings: - automorph_coords = [None] * len(targetcoords) - for x, y in mapping: - automorph_coords[lookup.index(x)] = targetcoords[lookup.index(y)] - mapping_rmsd = rmsd(posecoords, automorph_coords) - if mapping_rmsd < resultrmsd: - resultrmsd = mapping_rmsd - fitted_result = False - if fit: - fitted_pose, fitted_rmsd = superpose3D(npy.array(automorph_coords), npy.array(posecoords)) - if fitted_rmsd < resultrmsd: - resultrmsd = fitted_rmsd - fitted_result = fitted_pose - - if fit: - return (resultrmsd, fitted_pose) - else: - return resultrmsd - -def saveMolecWithRMSD(outsdf, molec, rmsd, population=False): - newData = pybel.ob.OBPairData() - newData.SetAttribute("RMSD") - newData.SetValue('%.3f'%rmsd) - - if population: - popData = pybel.ob.OBPairData() - popData.SetAttribute("Population") - popData.SetValue('%i'%population) - molec.OBMol.CloneData(popData) - - molec.OBMol.CloneData(newData) # Add new data - outsdf.write(molec) - -if __name__ == "__main__": - import sys, os - - (opts, args) = parseArguments() - - xtal = args[0] - poses = args[1] - - if not os.path.exists(xtal) or not os.path.exists(poses): - sys.exit("Input files not found. Please check the path given is correct.") - - fit = opts.fit - outfname = opts.outfilename - threshold = opts.threshold - - # Read crystal pose - crystal = next(pybel.readfile("sdf", xtal)) - crystal.removeh() - crystalnumatoms = len(crystal.atoms) - - #If outfname is defined, prepare an output SDF sink to write molecules - if outfname: - outsdf = pybel.Outputfile('sdf', outfname, overwrite=True) - - # Find the RMSD between the crystal pose and each docked pose - dockedposes = pybel.readfile("sdf", poses) - if fit: print "POSE\tRMSD_FIT" - else: print "POSE\tRMSD_NOFIT" - skipped = [] - moleclist = {} # Save all poses with their dockid - population = {} # Poses to be written - outlist = {} - for docki, dockedpose in enumerate(dockedposes): - dockedpose.removeh() - natoms = len(dockedpose.atoms) - if natoms != crystalnumatoms: - skipped.append(docki+1) - continue - if fit: - resultrmsd, fitted_result = getAutomorphRMSD(crystal, dockedpose, fit=True) - updateCoords(dockedpose, fitted_result) - else: - resultrmsd = getAutomorphRMSD(crystal, dockedpose, fit=False) - - if threshold: - # Calculate RMSD between all previous poses - # Discard if rmsd < FILTER threshold - if moleclist: - match = None - bestmatchrmsd = 999999 - for did,prevmol in moleclist.iteritems(): - tmprmsd = getAutomorphRMSD(prevmol, dockedpose) - if tmprmsd < threshold: - if tmprmsd < bestmatchrmsd: - bestmatchrmsd = tmprmsd - match = did - - if match != None: - # Do not write this one - # sum one up to the matching previous molecule id - print >> sys.stderr, "Pose %i matches pose %i with %.3f RMSD"%(docki+1, match+1, bestmatchrmsd) - population[match] += 1 - else: - # There's no match. Print info for this one and write to outsdf if needed - # Save this one! - if outfname: outlist[docki] = (dockedpose, resultrmsd) - print "%d\t%.2f"%((docki+1),resultrmsd) - moleclist[docki] = dockedpose - population[docki] = 1 - else: - # First molecule in list. Append for sure - moleclist[docki] = dockedpose - population[docki] = 1 - if outfname: outlist[docki] = (dockedpose, resultrmsd) - else: - # Just write best rmsd found and the molecule to outsdf if demanded - if outfname: saveMolecWithRMSD(outsdf, dockedpose, resultrmsd) - print "%d\t%.2f"%((docki+1),resultrmsd) - - if outlist: - # Threshold applied and outlist need to be written - for docki in sorted(outlist.iterkeys()): - molrmsd = outlist[docki] - # Get number of matchs in thresholding operation - pop = population.get(docki) - if not pop: pop = 1 - # Save molecule - saveMolecWithRMSD(outsdf, molrmsd[0], molrmsd[1], pop) - - if skipped: print >> sys.stderr, "SKIPPED input molecules due to number of atom missmatch: %s"%skipped