Previous changeset 10:eed751918a20 (2020-03-21) Next changeset 12:5b66a1684f18 (2020-04-09) |
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 6c84abdd07f292048bf2194073e2e938e94158c4" |
added:
distance_finder.py test-data/ligands.sdf |
removed:
__pycache__/cheminfolib.cpython-36.pyc |
b |
diff -r eed751918a20 -r 49d21d05f77c __pycache__/cheminfolib.cpython-36.pyc |
b |
Binary file __pycache__/cheminfolib.cpython-36.pyc has changed |
b |
diff -r eed751918a20 -r 49d21d05f77c distance_finder.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/distance_finder.py Wed Mar 25 16:48:55 2020 -0400 |
[ |
@@ -0,0 +1,109 @@ +# Reports distances of ligands to reference points. An example input for the points is: +# +# 5.655 1.497 18.223 +# 1.494 -8.367 18.574 +# 13.034 6.306 25.232 +# +# Data can be space or tab separated but must contain 3 and only 3 numbers for the x, y and z coordinates +# +# That would encode 3 points. +# Each record in the SDF input is read and the closest heavy atom to each of the reference points is recorded as +# a property named distance1 where the numeric part is the index (starting from 1) of the points (in that example +# there would be properties for distance1, distance2 and distance3. + +import argparse, os, sys, math +from openbabel import pybel + + + +def log(*args, **kwargs): + """Log output to STDERR + """ + print(*args, file=sys.stderr, ** kwargs) + + +def execute(ligands_sdf, points_file, outfile): + """ + :param ligands_sdf: A SDF with the 3D molecules to test + :param points_file: A file with the points to consider. + :param outfile: The name of the file for the SDF output + :return: + """ + + + points = [] + + # read the points + with open(points_file, 'r') as f: + for line in f.readlines(): + line.strip() + if line: + p = line.split() + if len(p) == 3: + points.append((float(p[0]), float(p[1]), float(p[2]))) + log("Read points",p) + continue + log("Failed to read line:", line) + log('Found', len(points), 'atom points') + + sdf_writer = pybel.Outputfile("sdf", outfile, overwrite=True) + + count = 0 + for mol in pybel.readfile("sdf", ligands_sdf): + count += 1 + if count % 50000 == 0: + log('Processed', count) + + try: + # print("Processing mol", mol.title) + + clone = pybel.Molecule(mol) + clone.removeh() + + coords = [] + for atom in clone.atoms: + coords.append(atom.coords) + + p = 0 + for point in points: + p += 1 + distances = [] + for i in coords: + # calculates distance based on cartesian coordinates + distance = math.sqrt((point[0] - i[0])**2 + (point[1] - i[1])**2 + (point[2] - i[2])**2) + distances.append(distance) + # log("distance:", distance) + min_distance = min(distances) + # log('Min:', min_distance) + # log(count, p, min_distance) + + mol.data['distance' + str(p)] = min_distance + + sdf_writer.write(mol) + + except Exception as e: + log('Failed to handle molecule: '+ str(e)) + continue + + sdf_writer.close() + log('Wrote', count, 'molecules') + + +def main(): + global work_dir + + parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points') + + parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)") + parser.add_argument('-p', '--points', help="PDB format file with atoms") + parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results") + + + args = parser.parse_args() + log("XChem distances args: ", args) + + execute(args.input, args.points, args.outfile) + + +if __name__ == "__main__": + main() |
b |
diff -r eed751918a20 -r 49d21d05f77c test-data/ligands.sdf --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/ligands.sdf Wed Mar 25 16:48:55 2020 -0400 |
[ |
b'@@ -0,0 +1,348 @@\n+C[C@H](NS(C)(=O)=O)c1ccccn1\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 13 13 0 0 0 0 0 0 0 0999 V2000\n+ 13.1106 0.7550 23.2143 C 0 0 0 0 0 0\n+ 12.0087 1.7547 22.8361 C 0 0 0 0 0 0\n+ 11.4071 1.4261 21.5334 N 0 0 0 0 0 0\n+ 9.9388 0.8337 21.6338 S 0 0 0 0 0 0\n+ 9.0411 2.3499 21.9038 C 0 0 0 0 0 0\n+ 9.3678 0.3273 20.3892 O 0 0 0 0 0 0\n+ 9.6367 0.0497 22.8298 O 0 0 0 0 0 0\n+ 12.5588 3.1798 22.7273 C 0 0 0 0 0 0\n+ 12.8091 3.9103 23.8920 C 0 0 0 0 0 0\n+ 13.2507 5.2279 23.7909 C 0 0 0 0 0 0\n+ 13.4398 5.7854 22.5337 C 0 0 0 0 0 0\n+ 13.1881 4.9943 21.4282 C 0 0 0 0 0 0\n+ 12.7627 3.7170 21.4945 N 0 0 0 0 0 0\n+ 2 8 1 0 0 0\n+ 2 3 1 0 0 0\n+ 2 1 1 0 0 0\n+ 3 4 1 0 0 0\n+ 4 5 1 0 0 0\n+ 4 6 2 0 0 0\n+ 4 7 2 0 0 0\n+ 8 9 2 0 0 0\n+ 8 13 1 0 0 0\n+ 9 10 1 0 0 0\n+ 10 11 2 0 0 0\n+ 11 12 1 0 0 0\n+ 12 13 2 0 0 0\n+M END\n+$$$$\n+C[C@@H](NS(C)(=O)=O)c1ccccn1\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 13 13 0 0 0 0 0 0 0 0999 V2000\n+ 6.8559 1.7506 21.3142 C 0 0 0 0 0 0\n+ 7.4019 0.6872 20.3547 C 0 0 0 0 0 0\n+ 7.4689 1.2291 18.9518 N 0 0 0 0 0 0\n+ 6.4062 0.5634 17.8113 S 0 0 0 0 0 0\n+ 4.8453 0.8765 18.6009 C 0 0 0 0 0 0\n+ 6.4632 1.4005 16.6345 O 0 0 0 0 0 0\n+ 6.5887 -0.8668 17.7602 O 0 0 0 0 0 0\n+ 8.6586 -0.0013 20.8698 C 0 0 0 0 0 0\n+ 9.8290 0.7331 21.0631 C 0 0 0 0 0 0\n+ 10.9643 0.0817 21.5388 C 0 0 0 0 0 0\n+ 10.9072 -1.2804 21.8047 C 0 0 0 0 0 0\n+ 9.7156 -1.9413 21.5709 C 0 0 0 0 0 0\n+ 8.6053 -1.3390 21.0982 N 0 0 0 0 0 0\n+ 2 1 1 0 0 0\n+ 2 3 1 0 0 0\n+ 2 8 1 0 0 0\n+ 3 4 1 0 0 0\n+ 4 5 1 0 0 0\n+ 4 6 2 0 0 0\n+ 4 7 2 0 0 0\n+ 8 9 2 0 0 0\n+ 8 13 1 0 0 0\n+ 9 10 1 0 0 0\n+ 10 11 2 0 0 0\n+ 11 12 1 0 0 0\n+ 12 13 2 0 0 0\n+M END\n+$$$$\n+FC(F)(F)c1ccccc1\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 10 10 0 0 0 0 0 0 0 0999 V2000\n+ 9.6749 1.3998 21.0525 F 0 0 0 0 0 0\n+ 10.4122 1.5718 22.1856 C 0 0 0 0 0 0\n+ 10.7385 2.8940 22.2189 F 0 0 0 0 0 0\n+ 9.5494 1.3747 23.2216 F 0 0 0 0 0 0\n+ 11.6206 0.6719 22.2638 C 0 0 0 0 0 0\n+ 12.7782 1.0724 22.9480 C 0 0 0 0 0 0\n+ 13.8994 0.2427 22.9901 C 0 0 0 0 0 0\n+ 13.8750 -0.9970 22.3527 C 0 0 0 0 0 0\n+ 12.7278 -1.4119 21.6796 C 0 0 0 0 0 0\n+ 11.6049 -0.5838 21.6354 C 0 0 0 0 0 0\n+ 1 2 1 0 0 0\n+ 2 3 1 0 0 0\n+ 2 4 1 0 0 0\n+ 2 5 1 0 0 0\n+ 5 6 2 0 0 0\n+ 5 10 1 0 0 0\n+ 6 7 1 0 0 0\n+ 7 8 2 0 0 0\n+ 8 9 1 0 0 0\n+ 9 10 2 0 0 0\n+M END\n+$$$$\n+O=[SH](=O)c1ccccc1\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 9 9 0 0 0 0 0 0 0 0999 V2000\n+ 13.5834 1.0389 23.2971 O 0 0 0 0 0 0\n+ 12.4089 1.7539 22.8402 S 0 0 0 0 0 0\n+ 11.1134 1.5953 23.4682 O 0 0 0 0 0 0\n+ 12.7957 3.4745 22.8588 C 0 0 0 0 0 0\n+ 13.0347 4.1328 21.6517 C 0 0 0 0 0 0\n+ 13.3512 5.4908 21.6741 C 0 0 0 0 0 0\n+ 13.4284 6.1723 22.8914 C 0 0 0 0 0 0\n+ 13.1905 5.5006 24.0928 C 0 0 0 0 0 0\n+ 12.8736 4.1434 24.0815 C 0 0 0 0 0 0\n+ 1 2 2 0 0 0\n+ 2 3 2 0 0 0\n+ 2 4 1 0 0 0\n+ 4 5 2 0 0 0\n+ 4 9 1 0 0 0\n+ 5 6 1 0 0 0\n+ 6 7 2 0 0 0\n+ 7 8 1 0 0 0\n+ 8 9 2 0 0 0\n+M END\n+$$$$\n+CSCCNC(=O)c1ccccc1\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 13 13 0 0 0 0 0 0 0 0999 V2000\n+ 5.5458 -1.4150 18.8612 C'..b'0\n+ 11 12 1 0 0 0\n+M END\n+$$$$\n+C=Cc1ccccc1\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 8 8 0 0 0 0 0 0 0 0999 V2000\n+ 3.0012 2.5994 19.2746 C 0 0 0 0 0 0\n+ 3.4784 1.3846 19.5760 C 0 0 0 0 0 0\n+ 4.7459 0.8376 19.0724 C 0 0 0 0 0 0\n+ 5.0977 -0.4790 19.4008 C 0 0 0 0 0 0\n+ 6.2949 -1.0353 18.9390 C 0 0 0 0 0 0\n+ 7.1469 -0.2825 18.1324 C 0 0 0 0 0 0\n+ 6.7985 1.0178 17.7793 C 0 0 0 0 0 0\n+ 5.6004 1.5702 18.2385 C 0 0 0 0 0 0\n+ 1 2 2 0 0 0\n+ 2 3 1 0 0 0\n+ 3 4 2 0 0 0\n+ 3 8 1 0 0 0\n+ 4 5 1 0 0 0\n+ 5 6 2 0 0 0\n+ 6 7 1 0 0 0\n+ 7 8 2 0 0 0\n+M END\n+$$$$\n+CC(C)(C)c1ccccc1\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 10 10 0 0 0 0 0 0 0 0999 V2000\n+ 13.4848 4.7599 24.0453 C 0 0 0 0 0 0\n+ 12.5126 4.3820 22.9047 C 0 0 0 0 0 0\n+ 11.1564 5.0474 23.2199 C 0 0 0 0 0 0\n+ 13.0811 4.9891 21.6031 C 0 0 0 0 0 0\n+ 12.3310 2.8640 22.7263 C 0 0 0 0 0 0\n+ 13.0824 1.9208 23.4483 C 0 0 0 0 0 0\n+ 12.9272 0.5461 23.2348 C 0 0 0 0 0 0\n+ 12.0121 0.0832 22.2967 C 0 0 0 0 0 0\n+ 11.2426 0.9922 21.5790 C 0 0 0 0 0 0\n+ 11.3986 2.3657 21.7948 C 0 0 0 0 0 0\n+ 1 2 1 0 0 0\n+ 2 3 1 0 0 0\n+ 2 4 1 0 0 0\n+ 2 5 1 0 0 0\n+ 5 6 2 0 0 0\n+ 5 10 1 0 0 0\n+ 6 7 1 0 0 0\n+ 7 8 2 0 0 0\n+ 8 9 1 0 0 0\n+ 9 10 2 0 0 0\n+M END\n+$$$$\n+CCN(C)C(=O)c1ccccc1\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 12 12 0 0 0 0 0 0 0 0999 V2000\n+ 10.6306 -0.4318 24.2468 C 0 0 0 0 0 0\n+ 11.2738 -0.2591 22.8777 C 0 0 0 0 0 0\n+ 11.5027 1.1477 22.5523 N 0 0 0 0 0 0\n+ 10.6408 1.7231 21.5282 C 0 0 0 0 0 0\n+ 12.6021 1.7847 23.1411 C 0 0 0 0 0 0\n+ 13.4087 1.1762 23.8483 O 0 0 0 0 0 0\n+ 12.8518 3.2389 22.9240 C 0 0 0 0 0 0\n+ 12.7404 3.8481 21.6696 C 0 0 0 0 0 0\n+ 12.9990 5.2139 21.5196 C 0 0 0 0 0 0\n+ 13.3123 5.9914 22.6330 C 0 0 0 0 0 0\n+ 13.3606 5.4074 23.8976 C 0 0 0 0 0 0\n+ 13.1331 4.0370 24.0436 C 0 0 0 0 0 0\n+ 1 2 1 0 0 0\n+ 2 3 1 0 0 0\n+ 3 4 1 0 0 0\n+ 3 5 1 0 0 0\n+ 5 6 2 0 0 0\n+ 5 7 1 0 0 0\n+ 7 8 2 0 0 0\n+ 7 12 1 0 0 0\n+ 8 9 1 0 0 0\n+ 9 10 2 0 0 0\n+ 10 11 1 0 0 0\n+ 11 12 2 0 0 0\n+M END\n+$$$$\n+CNC(=O)c1cccc(CCNS(C)(=O)=O)c1\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 17 17 0 0 0 0 0 0 0 0999 V2000\n+ 10.0019 1.6672 21.8593 C 0 0 0 0 0 0\n+ 8.7487 1.5971 21.1244 N 0 0 0 0 0 0\n+ 8.6290 0.5212 20.3389 C 0 0 0 0 0 0\n+ 9.4567 -0.3639 20.1329 O 0 0 0 0 0 0\n+ 7.3235 0.4378 19.6088 C 0 0 0 0 0 0\n+ 7.1514 -0.5242 18.6092 C 0 0 0 0 0 0\n+ 5.9627 -0.5677 17.8790 C 0 0 0 0 0 0\n+ 4.9453 0.3559 18.1379 C 0 0 0 0 0 0\n+ 5.0962 1.3237 19.1419 C 0 0 0 0 0 0\n+ 3.9963 2.3284 19.4060 C 0 0 0 0 0 0\n+ 3.7241 2.6438 20.8872 C 0 0 0 0 0 0\n+ 2.6087 3.6063 21.0810 N 0 0 0 0 0 0\n+ 1.3263 3.1043 22.0447 S 0 0 0 0 0 0\n+ 0.5403 1.9194 20.9788 C 0 0 0 0 0 0\n+ 1.8577 2.3935 23.1858 O 0 0 0 0 0 0\n+ 0.4357 4.2308 22.2210 O 0 0 0 0 0 0\n+ 6.2789 1.3264 19.8924 C 0 0 0 0 0 0\n+ 1 2 1 0 0 0\n+ 2 3 1 0 0 0\n+ 3 4 2 0 0 0\n+ 3 5 1 0 0 0\n+ 5 6 2 0 0 0\n+ 5 17 1 0 0 0\n+ 6 7 1 0 0 0\n+ 7 8 2 0 0 0\n+ 8 9 1 0 0 0\n+ 9 10 1 0 0 0\n+ 9 17 2 0 0 0\n+ 10 11 1 0 0 0\n+ 11 12 1 0 0 0\n+ 12 13 1 0 0 0\n+ 13 14 1 0 0 0\n+ 13 15 2 0 0 0\n+ 13 16 2 0 0 0\n+M END\n+$$$$\n' |