Mercurial > repos > marpiech > norwich_docking_tools

diff galaxy-tools/tools/rdock/bin/sdrmsd @ 2:0faa03a92843 draft default tip
Uploaded
author: dzesikah
date: Fri, 26 Aug 2016 10:19:49 -0400
parents: ad4bc82457e5
--- a/galaxy-tools/tools/rdock/bin/sdrmsd	Fri Aug 26 09:55:15 2016 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,272 +0,0 @@
-#! /usr/bin/env python
-#
-# Calculate SMART RMSD with or without molecular superposition (FIT or NOFIT) 
-# Script distributed under GNU LGPL 3.0 along rDock software.
-# 
-# This algorithm takes into account molecular automorphism. That is, it identifies
-# molecules which are the same but might have atom orders changed and still be able to 
-# match the pairs and correctly calculate the RMSD.
-#
-# Author: Daniel Alvarez-Garcia
-# Date: 08-11-2013
-
-import math
-import pybel
-import numpy as npy
-import optparse
-
-def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False):
-    """superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654
-    definition : superpose3D(ref, target, weights,refmask,targetmask)
-    @parameter 1 :  ref - xyz coordinates of the reference structure (the ligand for instance)
-    @type 1 :       float64 numpy array (nx3)
-    ---
-    @parameter 2 :  target - theoretical target positions to which we should move (does not need to be physically relevant.
-    @type 2 :       float 64 numpy array (nx3)
-    ---
-    @parameter 3:   weights - numpy array of atom weights (usuallly between 0 and 1)
-    @type 3 :       float 64 numpy array (n)
-    @parameter 4:   mask - a numpy boolean mask for designating atoms to include
-    Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms)
-    Returns a set of new coordinates, aligned to the target state as well as the rmsd
-    """
-    if weights == None :
-        weights=1.0
-    if refmask == None :
-        refmask=npy.ones(len(ref),"bool")
-    if targetmask == None :
-        targetmask=npy.ones(len(target),"bool")
-    #first get the centroid of both states
-    ref_centroid = npy.mean(ref[refmask]*weights,axis=0)
-    #print ref_centroid
-    refCenteredCoords=ref-ref_centroid
-    #print refCenteredCoords
-    target_centroid=npy.mean(target[targetmask]*weights,axis=0)
-    targetCenteredCoords=target-target_centroid
-    #print targetCenteredCoords
-    #the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm
-    # Initial residual, see Kabsch.
-    E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0)
-    reftmp=npy.copy(refCenteredCoords[refmask])
-    targettmp=npy.copy(targetCenteredCoords[targetmask])
-    #print refCenteredCoords[refmask]
-    #single value decomposition of the dotProduct of both position vectors
-    try:
-        dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights)
-        V, S, Wt = npy.linalg.svd(dotProd )
-    except Exception:
-        try:
-            dotProd = npy.dot( npy.transpose(reftmp), targettmp)
-            V, S, Wt = npy.linalg.svd(dotProd )
-        except Exception:
-            print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment"
-        return ref, 0
-    # we already have our solution, in the results from SVD.
-    # we just need to check for reflections and then produce
-    # the rotation.  V and Wt are orthonormal, so their det's
-    # are +/-1.
-    reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt))))
-    if reflect == -1.0:
-        S[-1] = -S[-1]
-        V[:,-1] = -V[:,-1]
-    rmsd = E0 - (2.0 * sum(S))
-    rmsd = npy.sqrt(abs(rmsd / len(ref[refmask])))   #get the rmsd
-    #U is simply V*Wt
-    U = npy.dot(V, Wt)  #get the rotation matrix
-    # rotate and translate the molecule
-    new_coords = npy.dot((refCenteredCoords), U)+ target_centroid  #translate & rotate
-    #new_coords=(refCenteredCoords + target_centroid)
-    #print U
-    if returnRotMat : 
-        return new_coords,rmsd, U
-    return new_coords,rmsd
-
-
-def squared_distance(coordsA, coordsB):
-    """Find the squared distance between two 3-tuples"""
-    sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) )
-    return sqrdist
-    
-def rmsd(allcoordsA, allcoordsB):
-    """Find the RMSD between two lists of 3-tuples"""
-    deviation = sum(squared_distance(atomA, atomB) for
-                    (atomA, atomB) in zip(allcoordsA, allcoordsB))
-    return math.sqrt(deviation / float(len(allcoordsA)))
-    
-def mapToCrystal(xtal, pose):
-    """Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...)
-     this will mess up the rmsd calculation with OpenBabel"""
-    query = pybel.ob.CompileMoleculeQuery(xtal.OBMol) 
-    mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query)
-    mappingpose = pybel.ob.vvpairUIntUInt()
-    exit=mapper.MapUnique(pose.OBMol,mappingpose)
-    return mappingpose[0]
-
-def parseArguments():
-	optparse.OptionParser.format_epilog = lambda self, formatter: self.epilog
-	epilog = """Args:
-	reference.sdf		SDF file with the reference molecule.
-	input.sdf		SDF file with the molecules to be compared to reference.\n"""
-	parser = optparse.OptionParser("usage: %prog [options] reference.sdf input.sdf", epilog=epilog)
-	parser.add_option("-f", "--fit",dest="fit", action="store_true", default=False,
-                  help="Superpose molecules before RMSD calculation")
-	parser.add_option("--threshold","-t",dest="threshold", action="store", nargs=1, 
-                  help="Discard poses with RMSD < THRESHOLD with respect previous poses which where not rejected based on same principle. A Population SDField will be added to output SD with the population number.", type=float)
-	parser.add_option("-o","--out", dest="outfilename", metavar="FILE", default=False,
-                  help="If declared, write an output SDF file with the input molecules with a new sdfield <RMSD>. If molecule was fitted, the fitted molecule coordinates will be saved.")
-	(options, args) =  parser.parse_args()
-	
-	#Check we have two arguments
-	if len(args) < 2:
-		parser.error("Incorrect number of arguments. Use -h or --help options to print help.")
-
-	return options, args
-
-def updateCoords(obmol, newcoords):
-    "Update OBMol coordinates. newcoords is a numpy array"
-    for i,atom in enumerate(obmol):
-        atom.OBAtom.SetVector(*newcoords[i])
-
-def getAutomorphRMSD(target, molec, fit=False):
-    """
-    Use Automorphism to reorder target coordinates to match ref coordinates atom order
-    for correct RMSD comparison. Only the lowest RMSD will be returned.
-    
-    Returns:
-      If fit=False: 	bestRMSD	(float)					RMSD of the best matching mapping.
-      If fit=True:	(bestRMSD, molecCoordinates)	(float, npy.array)	RMSD of best match and its molecule fitted coordinates.	
-    """
-    mappings = pybel.ob.vvpairUIntUInt()
-    bitvec = pybel.ob.OBBitVec()
-    lookup = []
-    for i, atom in enumerate(target):
-        lookup.append(i)
-    success = pybel.ob.FindAutomorphisms(target.OBMol, mappings)
-    targetcoords = [atom.coords for atom in target]
-    mappose = npy.array(mapToCrystal(target, molec))
-    mappose = mappose[npy.argsort(mappose[:,0])][:,1]
-    posecoords = npy.array([atom.coords for atom in molec])[mappose]
-    resultrmsd = 999999999999
-    for mapping in mappings:
-	automorph_coords = [None] * len(targetcoords)
-	for x, y in mapping:
-	    automorph_coords[lookup.index(x)] = targetcoords[lookup.index(y)]
-	mapping_rmsd = rmsd(posecoords, automorph_coords)
-	if mapping_rmsd < resultrmsd:
-	    resultrmsd = mapping_rmsd
-	    fitted_result = False
-	if fit: 
-	    fitted_pose, fitted_rmsd = superpose3D(npy.array(automorph_coords), npy.array(posecoords))
-	    if fitted_rmsd < resultrmsd:
-		resultrmsd = fitted_rmsd
-		fitted_result = fitted_pose
-    
-    if fit:
-      return (resultrmsd, fitted_pose)
-    else:
-      return resultrmsd
-
-def saveMolecWithRMSD(outsdf, molec, rmsd, population=False):
-    newData = pybel.ob.OBPairData()	
-    newData.SetAttribute("RMSD")
-    newData.SetValue('%.3f'%rmsd)
-    
-    if population:
-	popData = pybel.ob.OBPairData()
-	popData.SetAttribute("Population")
-	popData.SetValue('%i'%population)
-	molec.OBMol.CloneData(popData)
-	
-    molec.OBMol.CloneData(newData)           # Add new data
-    outsdf.write(molec)
-    
-if __name__ == "__main__":
-    import sys, os
-   
-    (opts, args) = parseArguments() 
-	
-    xtal = args[0]
-    poses = args[1]
-
-    if not os.path.exists(xtal) or not os.path.exists(poses):
-	sys.exit("Input files not found. Please check the path given is correct.")
-	
-    fit = opts.fit
-    outfname = opts.outfilename
-    threshold = opts.threshold
-
-    # Read crystal pose
-    crystal = next(pybel.readfile("sdf", xtal))
-    crystal.removeh()
-    crystalnumatoms = len(crystal.atoms)
-
-    #If outfname is defined, prepare an output SDF sink to write molecules
-    if outfname:
-	outsdf = pybel.Outputfile('sdf', outfname, overwrite=True)
-
-    # Find the RMSD between the crystal pose and each docked pose
-    dockedposes = pybel.readfile("sdf", poses)
-    if fit: print "POSE\tRMSD_FIT"
-    else: print "POSE\tRMSD_NOFIT"
-    skipped = []
-    moleclist = {}	# Save all poses with their dockid
-    population = {}	# Poses to be written
-    outlist = {}
-    for docki, dockedpose in enumerate(dockedposes):
-        dockedpose.removeh()
-	natoms = len(dockedpose.atoms)
-	if natoms != crystalnumatoms: 
-		skipped.append(docki+1)
-		continue
-	if fit: 
-	    resultrmsd, fitted_result = getAutomorphRMSD(crystal, dockedpose, fit=True)
-	    updateCoords(dockedpose, fitted_result)
-	else:
-	    resultrmsd = getAutomorphRMSD(crystal, dockedpose, fit=False)
-	
-	if threshold:
-	    # Calculate RMSD between all previous poses
-	    # Discard if rmsd < FILTER threshold
-	    if moleclist:
-		match = None
-		bestmatchrmsd = 999999
-		for did,prevmol in moleclist.iteritems():
-		    tmprmsd = getAutomorphRMSD(prevmol, dockedpose)
-		    if tmprmsd < threshold:
-			if tmprmsd < bestmatchrmsd:
-			    bestmatchrmsd = tmprmsd
-			    match = did
-			
-		if match != None:
-		    # Do not write this one
-		    # sum one up to the matching previous molecule id
-		    print >> sys.stderr, "Pose %i matches pose %i with %.3f RMSD"%(docki+1, match+1, bestmatchrmsd)
-		    population[match] += 1
-		else:
-		    # There's no match. Print info for this one and write to outsdf if needed
-		    # Save this one!
-		    if outfname: outlist[docki] = (dockedpose, resultrmsd)
-		    print "%d\t%.2f"%((docki+1),resultrmsd)
-		    moleclist[docki] = dockedpose
-		    population[docki] = 1
-	    else:
-		# First molecule in list. Append for sure
-		moleclist[docki] = dockedpose
-		population[docki] = 1
-		if outfname: outlist[docki] = (dockedpose, resultrmsd)
-	else:
-	    # Just write best rmsd found and the molecule to outsdf if demanded
-	    if outfname: saveMolecWithRMSD(outsdf, dockedpose, resultrmsd)
-	    print "%d\t%.2f"%((docki+1),resultrmsd)
-
-    if outlist:
-	# Threshold applied and outlist need to be written
-	for docki in sorted(outlist.iterkeys()):
-	    molrmsd = outlist[docki]
-	    # Get number of matchs in thresholding operation
-	    pop = population.get(docki)
-	    if not pop: pop = 1
-	    # Save molecule
-	    saveMolecWithRMSD(outsdf, molrmsd[0], molrmsd[1], pop)
-	    
-    if skipped: print >> sys.stderr, "SKIPPED input molecules due to number of atom missmatch: %s"%skipped
author	dzesikah
date	Fri, 26 Aug 2016 10:19:49 -0400
parents	ad4bc82457e5
children