comparison distance_finder.py @ 10:c427987b54fd draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 6c84abdd07f292048bf2194073e2e938e94158c4"
author bgruening
date Wed, 25 Mar 2020 16:42:06 -0400
parents
children a72ae2711a97
comparison
equal deleted inserted replaced
9:a26f96c1f061 10:c427987b54fd
1 # Reports distances of ligands to reference points. An example input for the points is:
2 #
3 # 5.655 1.497 18.223
4 # 1.494 -8.367 18.574
5 # 13.034 6.306 25.232
6 #
7 # Data can be space or tab separated but must contain 3 and only 3 numbers for the x, y and z coordinates
8 #
9 # That would encode 3 points.
10 # Each record in the SDF input is read and the closest heavy atom to each of the reference points is recorded as
11 # a property named distance1 where the numeric part is the index (starting from 1) of the points (in that example
12 # there would be properties for distance1, distance2 and distance3.
13
14 import argparse, os, sys, math
15 from openbabel import pybel
16
17
18
19 def log(*args, **kwargs):
20 """Log output to STDERR
21 """
22 print(*args, file=sys.stderr, ** kwargs)
23
24
25 def execute(ligands_sdf, points_file, outfile):
26 """
27 :param ligands_sdf: A SDF with the 3D molecules to test
28 :param points_file: A file with the points to consider.
29 :param outfile: The name of the file for the SDF output
30 :return:
31 """
32
33
34 points = []
35
36 # read the points
37 with open(points_file, 'r') as f:
38 for line in f.readlines():
39 line.strip()
40 if line:
41 p = line.split()
42 if len(p) == 3:
43 points.append((float(p[0]), float(p[1]), float(p[2])))
44 log("Read points",p)
45 continue
46 log("Failed to read line:", line)
47 log('Found', len(points), 'atom points')
48
49 sdf_writer = pybel.Outputfile("sdf", outfile, overwrite=True)
50
51 count = 0
52 for mol in pybel.readfile("sdf", ligands_sdf):
53 count += 1
54 if count % 50000 == 0:
55 log('Processed', count)
56
57 try:
58 # print("Processing mol", mol.title)
59
60 clone = pybel.Molecule(mol)
61 clone.removeh()
62
63 coords = []
64 for atom in clone.atoms:
65 coords.append(atom.coords)
66
67 p = 0
68 for point in points:
69 p += 1
70 distances = []
71 for i in coords:
72 # calculates distance based on cartesian coordinates
73 distance = math.sqrt((point[0] - i[0])**2 + (point[1] - i[1])**2 + (point[2] - i[2])**2)
74 distances.append(distance)
75 # log("distance:", distance)
76 min_distance = min(distances)
77 # log('Min:', min_distance)
78 # log(count, p, min_distance)
79
80 mol.data['distance' + str(p)] = min_distance
81
82 sdf_writer.write(mol)
83
84 except Exception as e:
85 log('Failed to handle molecule: '+ str(e))
86 continue
87
88 sdf_writer.close()
89 log('Wrote', count, 'molecules')
90
91
92 def main():
93 global work_dir
94
95 parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points')
96
97 parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)")
98 parser.add_argument('-p', '--points', help="PDB format file with atoms")
99 parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results")
100
101
102 args = parser.parse_args()
103 log("XChem distances args: ", args)
104
105 execute(args.input, args.points, args.outfile)
106
107
108 if __name__ == "__main__":
109 main()