Mercurial > repos > marpiech > norwich_tools_docking
comparison tools/rdock/bin/sdtether @ 3:b02d74d22d05 draft default tip
planemo upload
| author | marpiech |
|---|---|
| date | Mon, 29 Aug 2016 08:23:52 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:bd50f811878f | 3:b02d74d22d05 |
|---|---|
| 1 #! /usr/bin/env python | |
| 2 # | |
| 3 # Substitute for rbtether of rDock. Will align input molecules to a reference fragment defined by a smarts string, | |
| 4 # it will add a TETHERED ATOM property field to the output SDF that is correctly understood by rDock | |
| 5 # rDock will restrain the matching atom positions to the reference molecule coordinates. | |
| 6 # | |
| 7 # Initially implemented with a conformational search algorithm to better match target coordinates. | |
| 8 # But had problems with OBabel FF generating non-sense conformers. So in this version the conformer search is commented out. | |
| 9 # Now if the input molecule do not have a good conformation, might not align well with the target. This effect will be | |
| 10 # dimished or even vanish if the SMARTS string is defined for a rigid region (like a ring). | |
| 11 # I'm still trying to incorporate somehow this conformational search. | |
| 12 # | |
| 13 # Script distributed under GNU LGPL 3.0 along rDock software. | |
| 14 # | |
| 15 # Author: Daniel Alvarez-Garcia | |
| 16 # Date: 08-11-2013 | |
| 17 | |
| 18 import math | |
| 19 import pybel | |
| 20 import numpy as npy | |
| 21 | |
| 22 def superpose3D(ref, target, weights=None,refmask=None,targetmask=None,returnRotMat=False): | |
| 23 """superpose3D performs 3d superposition using a weighted Kabsch algorithm : http://dx.doi.org/10.1107%2FS0567739476001873 & doi: 10.1529/biophysj.105.066654 | |
| 24 definition : superpose3D(ref, target, weights,refmask,targetmask) | |
| 25 @parameter 1 : ref - xyz coordinates of the reference structure (the ligand for instance) | |
| 26 @type 1 : float64 numpy array (nx3) | |
| 27 --- | |
| 28 @parameter 2 : target - theoretical target positions to which we should move (does not need to be physically relevant. | |
| 29 @type 2 : float 64 numpy array (nx3) | |
| 30 --- | |
| 31 @parameter 3: weights - numpy array of atom weights (usuallly between 0 and 1) | |
| 32 @type 3 : float 64 numpy array (n) | |
| 33 @parameter 4: mask - a numpy boolean mask for designating atoms to include | |
| 34 Note ref and target positions must have the same dimensions -> n*3 numpy arrays where n is the number of points (or atoms) | |
| 35 Returns a set of new coordinates, aligned to the target state as well as the rmsd | |
| 36 """ | |
| 37 if weights == None : | |
| 38 weights=1.0 | |
| 39 if refmask == None : | |
| 40 refmask=npy.ones(len(ref),"bool") | |
| 41 if targetmask == None : | |
| 42 targetmask=npy.ones(len(target),"bool") | |
| 43 #first get the centroid of both states | |
| 44 ref_centroid = npy.mean(ref[refmask]*weights,axis=0) | |
| 45 #print ref_centroid | |
| 46 refCenteredCoords=ref-ref_centroid | |
| 47 #print refCenteredCoords | |
| 48 target_centroid=npy.mean(target[targetmask]*weights,axis=0) | |
| 49 targetCenteredCoords=target-target_centroid | |
| 50 #print targetCenteredCoords | |
| 51 #the following steps come from : http://www.pymolwiki.org/index.php/OptAlign#The_Code and http://en.wikipedia.org/wiki/Kabsch_algorithm | |
| 52 # Initial residual, see Kabsch. | |
| 53 E0 = npy.sum( npy.sum(refCenteredCoords[refmask] * refCenteredCoords[refmask]*weights,axis=0),axis=0) + npy.sum( npy.sum(targetCenteredCoords[targetmask] * targetCenteredCoords[targetmask]*weights,axis=0),axis=0) | |
| 54 reftmp=npy.copy(refCenteredCoords[refmask]) | |
| 55 targettmp=npy.copy(targetCenteredCoords[targetmask]) | |
| 56 #print refCenteredCoords[refmask] | |
| 57 #single value decomposition of the dotProduct of both position vectors | |
| 58 try: | |
| 59 dotProd = npy.dot( npy.transpose(reftmp), targettmp* weights) | |
| 60 V, S, Wt = npy.linalg.svd(dotProd ) | |
| 61 except Exception: | |
| 62 try: | |
| 63 dotProd = npy.dot( npy.transpose(reftmp), targettmp) | |
| 64 V, S, Wt = npy.linalg.svd(dotProd ) | |
| 65 except Exception: | |
| 66 print >> sys.stderr,"Couldn't perform the Single Value Decomposition, skipping alignment" | |
| 67 return ref, 0 | |
| 68 # we already have our solution, in the results from SVD. | |
| 69 # we just need to check for reflections and then produce | |
| 70 # the rotation. V and Wt are orthonormal, so their det's | |
| 71 # are +/-1. | |
| 72 reflect = float(str(float(npy.linalg.det(V) * npy.linalg.det(Wt)))) | |
| 73 if reflect == -1.0: | |
| 74 S[-1] = -S[-1] | |
| 75 V[:,-1] = -V[:,-1] | |
| 76 rmsd = E0 - (2.0 * sum(S)) | |
| 77 rmsd = npy.sqrt(abs(rmsd / len(ref[refmask]))) #get the rmsd | |
| 78 #U is simply V*Wt | |
| 79 U = npy.dot(V, Wt) #get the rotation matrix | |
| 80 # rotate and translate the molecule | |
| 81 new_coords = npy.dot((refCenteredCoords), U)+ target_centroid #translate & rotate | |
| 82 #new_coords=(refCenteredCoords + target_centroid) | |
| 83 #print U | |
| 84 if returnRotMat : | |
| 85 return U, ref_centroid, target_centroid, rmsd | |
| 86 return new_coords,rmsd | |
| 87 | |
| 88 | |
| 89 def squared_distance(coordsA, coordsB): | |
| 90 """Find the squared distance between two 3-tuples""" | |
| 91 sqrdist = sum( (a-b)**2 for a, b in zip(coordsA, coordsB) ) | |
| 92 return sqrdist | |
| 93 | |
| 94 def rmsd(allcoordsA, allcoordsB): | |
| 95 """Find the RMSD between two lists of 3-tuples""" | |
| 96 deviation = sum(squared_distance(atomA, atomB) for | |
| 97 (atomA, atomB) in zip(allcoordsA, allcoordsB)) | |
| 98 return math.sqrt(deviation / float(len(allcoordsA))) | |
| 99 | |
| 100 def mapToCrystal(xtal, pose): | |
| 101 """Some docking programs might alter the order of the atoms in the output (like Autodock Vina does...) | |
| 102 this will mess up the rmsd calculation with OpenBabel""" | |
| 103 query = pybel.ob.CompileMoleculeQuery(xtal.OBMol) | |
| 104 mapper=pybel.ob.OBIsomorphismMapper.GetInstance(query) | |
| 105 mappingpose = pybel.ob.vvpairUIntUInt() | |
| 106 exit=mapper.MapUnique(pose.OBMol,mappingpose) | |
| 107 return mappingpose[0] | |
| 108 | |
| 109 def takeCoords(obmol): | |
| 110 """Take coordinates of an OBMol as a npy array""" | |
| 111 return npy.array([atom.coords for atom in obmol]) | |
| 112 | |
| 113 def updateCoords(obmol, newcoords): | |
| 114 "Update OBMol coordinates. newcoords is a numpy array" | |
| 115 for i,atom in enumerate(obmol): | |
| 116 atom.OBAtom.SetVector(*newcoords[i]) | |
| 117 | |
| 118 def prepareAtomString(idlist): | |
| 119 s = "" | |
| 120 n = len(idlist) | |
| 121 for i, id in enumerate(idlist): | |
| 122 s += "%i"%id | |
| 123 if (i+1) == n: s+="\n" | |
| 124 elif (i+1)%35 == 0: s+=",\n" | |
| 125 else: s+="," | |
| 126 return s | |
| 127 | |
| 128 | |
| 129 if __name__ == "__main__": | |
| 130 import sys | |
| 131 | |
| 132 if len(sys.argv) != 5: | |
| 133 sys.exit("USAGE: %s reference.sdf input.sdf output.sdf 'SMARTS'"%sys.argv[0]) | |
| 134 | |
| 135 refsdf = sys.argv[1] | |
| 136 molsdf = sys.argv[2] | |
| 137 outsdf = sys.argv[3] | |
| 138 smarts = pybel.Smarts(sys.argv[4]) | |
| 139 | |
| 140 # Read reference pose and get atom list matching smarts query | |
| 141 # if more than 1 match, take the first one | |
| 142 ref = next(pybel.readfile("sdf", refsdf)) | |
| 143 refMatchIds = smarts.findall(ref) | |
| 144 numRefMatchs = len(refMatchIds) | |
| 145 | |
| 146 if not numRefMatchs: | |
| 147 sys.exit("No match found in the reference structure and the SMARTS string given. Please check it.") | |
| 148 | |
| 149 if numRefMatchs > 1: | |
| 150 print "More than one match in the reference molecule for the SMARTS string given. Will tether each input molecule all possible ways." | |
| 151 | |
| 152 refIndxPerMatch = [npy.array(rmi) - 1 for rmi in refMatchIds] | |
| 153 | |
| 154 # Take coordinates for the reference matched atoms | |
| 155 refCoords = takeCoords(ref) | |
| 156 refMatchCoords = [npy.take(refCoords, refIndx, axis=0) for refIndx in refIndxPerMatch] | |
| 157 | |
| 158 # Do the same for molecule in molsdf | |
| 159 out=pybel.Outputfile('sdf', outsdf, overwrite=True) | |
| 160 molSupp = pybel.readfile("sdf", molsdf) | |
| 161 ff = pybel.ob.OBForceField_FindForceField('MMFF94') | |
| 162 for i,mol in enumerate(molSupp): | |
| 163 print "## Molecule %i"%(i+1), | |
| 164 mol.OBMol.DeleteNonPolarHydrogens() | |
| 165 molMatchAllIds = smarts.findall(mol) | |
| 166 numMatchs = len(molMatchAllIds) | |
| 167 | |
| 168 if numMatchs == 0: | |
| 169 print "No_Match", | |
| 170 continue | |
| 171 elif numMatchs ==1: | |
| 172 print "Match", | |
| 173 elif numMatchs > 1: | |
| 174 print "Multiple_Match SMART Matches for this molecule (%d)"%numMatchs, | |
| 175 | |
| 176 # If more than one match, write an output of the same molecule for each match | |
| 177 # Start a default bestcoord and rmsd for later looping for each pose | |
| 178 bestCoordPerMatch = [[0 for i in range(numMatchs)] for i in range(numRefMatchs)] | |
| 179 bestRMSPerMatch = [[999 for i in range(numMatchs)] for i in range(numRefMatchs)] | |
| 180 | |
| 181 # Will do a randomrotorsearch to find conformer with the lower rmsd when superposing | |
| 182 # At least 20 when possible | |
| 183 #ff.Setup(mol.OBMol) | |
| 184 #numats = mol.OBMol.NumAtoms() | |
| 185 #numrot = mol.OBMol.NumRotors() | |
| 186 #print "Atoms: %i, Rotors: %i"%(numats, numrot) | |
| 187 #geomopt = 300 | |
| 188 #genconf = 100 | |
| 189 # increase iterations if bigger molecule or bigger number of rotatable bonds | |
| 190 # for allowing better sampling | |
| 191 #if numats > 40 and numrot > 5: | |
| 192 # geomopt = 300 | |
| 193 # genconf = 150 | |
| 194 #if numats > 55 and numrot > 7: | |
| 195 # genconf = 100 | |
| 196 # geomopt = 500 | |
| 197 #print "\tDoing conformational search with WeightedRotorSearch (%i, %i)..."%(genconf, geomopt), | |
| 198 #ff.SteepestDescent(500, 1.0e-4) | |
| 199 #ff.WeightedRotorSearch(genconf,geomopt) | |
| 200 #ff.ConjugateGradients(500, 1.0e-6) | |
| 201 #ff.GetConformers(mol.OBMol) | |
| 202 #numconf = mol.OBMol.NumConformers() | |
| 203 numconf = 1 | |
| 204 #print "%i conformers generated"%numconf | |
| 205 if numconf > 1: | |
| 206 # Doing conf search | |
| 207 #for i in range(numconf): | |
| 208 # mol.OBMol.SetConformer(i) | |
| 209 # confCoords = takeCoords(mol) | |
| 210 # print 'coord:',confCoords[0,:] | |
| 211 # | |
| 212 # for imatch, molMatchIds in enumerate(molMatchAllIds): | |
| 213 # molMatchIndx = npy.array(molMatchIds) - 1 | |
| 214 # confMatchCoords = npy.take(confCoords, molMatchIndx, axis=0) | |
| 215 # | |
| 216 # # Align: Get rotation matrix between the two sets of coords | |
| 217 # # Apply rotation to the whole target molecule | |
| 218 # rotMat, targetCentroid, refCentroid, rmsd = superpose3D(confMatchCoords, refMatchCoords, returnRotMat=True) | |
| 219 # if rmsd < bestRMSPerMatch[imatch]: | |
| 220 # newcoords = npy.dot((confCoords - targetCentroid), rotMat) + refCentroid | |
| 221 # bestRMSPerMatch[imatch] = rmsd | |
| 222 # bestCoordPerMatch[imatch] = newcoords | |
| 223 # #if bestrms < 0.01: break | |
| 224 pass | |
| 225 else: | |
| 226 molCoords = takeCoords(mol) | |
| 227 for imatch, molMatchIds in enumerate(molMatchAllIds): | |
| 228 # loop in each matching way for the input molecule | |
| 229 molMatchIndx = npy.array(molMatchIds) - 1 | |
| 230 molMatchCoords = npy.take(molCoords, molMatchIndx, axis=0) | |
| 231 | |
| 232 # Loop over the reference matches | |
| 233 # Align: Get rotation matrix between the two sets of coords | |
| 234 # Apply rotation to the whole target molecule | |
| 235 for ir, refMatchCoord in enumerate(refMatchCoords): | |
| 236 rotMat, targetCentroid, refCentroid, rmsd = superpose3D(molMatchCoords, refMatchCoord, returnRotMat=True) | |
| 237 if rmsd < bestRMSPerMatch[ir][imatch]: | |
| 238 newcoords = npy.dot((molCoords - targetCentroid), rotMat) + refCentroid | |
| 239 bestRMSPerMatch[ir][imatch] = rmsd | |
| 240 bestCoordPerMatch[ir][imatch] = newcoords | |
| 241 | |
| 242 # Finally update molecule coordinates with the best matching coordinates found | |
| 243 # change molecule coordinates, set TETHERED ATOMS property and save | |
| 244 for imatch in range(numMatchs): | |
| 245 for irefmatch in range(numRefMatchs): | |
| 246 bestCoord = bestCoordPerMatch[irefmatch][imatch] | |
| 247 bestRMS = bestRMSPerMatch[irefmatch][imatch] | |
| 248 print "\tBest RMSD reached (match %d, refmatch %d): %s"%(imatch, irefmatch, bestRMS) | |
| 249 molMatchID = molMatchAllIds[imatch] | |
| 250 updateCoords(mol, bestCoord) | |
| 251 newData = pybel.ob.OBPairData() | |
| 252 newData.SetAttribute("TETHERED ATOMS") | |
| 253 newData.SetValue(prepareAtomString(molMatchID)) | |
| 254 | |
| 255 mol.OBMol.DeleteData("TETHERED ATOMS") # Remove Previous DATA | |
| 256 mol.OBMol.CloneData(newData) # Add new data | |
| 257 out.write(mol) | |
| 258 | |
| 259 out.close() | |
| 260 | |
| 261 print "DONE" | |
| 262 sys.stdout.close() | |
| 263 sys.stderr.close() |
