Mercurial > repos > bgruening > openbabel_remduplicates
comparison distance_finder.py @ 10:1dd562ae055d draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 6c84abdd07f292048bf2194073e2e938e94158c4"
author | bgruening |
---|---|
date | Wed, 25 Mar 2020 16:47:58 -0400 |
parents | |
children | 50ca8845e7f5 |
comparison
equal
deleted
inserted
replaced
9:39895aff0dd7 | 10:1dd562ae055d |
---|---|
1 # Reports distances of ligands to reference points. An example input for the points is: | |
2 # | |
3 # 5.655 1.497 18.223 | |
4 # 1.494 -8.367 18.574 | |
5 # 13.034 6.306 25.232 | |
6 # | |
7 # Data can be space or tab separated but must contain 3 and only 3 numbers for the x, y and z coordinates | |
8 # | |
9 # That would encode 3 points. | |
10 # Each record in the SDF input is read and the closest heavy atom to each of the reference points is recorded as | |
11 # a property named distance1 where the numeric part is the index (starting from 1) of the points (in that example | |
12 # there would be properties for distance1, distance2 and distance3. | |
13 | |
14 import argparse, os, sys, math | |
15 from openbabel import pybel | |
16 | |
17 | |
18 | |
19 def log(*args, **kwargs): | |
20 """Log output to STDERR | |
21 """ | |
22 print(*args, file=sys.stderr, ** kwargs) | |
23 | |
24 | |
25 def execute(ligands_sdf, points_file, outfile): | |
26 """ | |
27 :param ligands_sdf: A SDF with the 3D molecules to test | |
28 :param points_file: A file with the points to consider. | |
29 :param outfile: The name of the file for the SDF output | |
30 :return: | |
31 """ | |
32 | |
33 | |
34 points = [] | |
35 | |
36 # read the points | |
37 with open(points_file, 'r') as f: | |
38 for line in f.readlines(): | |
39 line.strip() | |
40 if line: | |
41 p = line.split() | |
42 if len(p) == 3: | |
43 points.append((float(p[0]), float(p[1]), float(p[2]))) | |
44 log("Read points",p) | |
45 continue | |
46 log("Failed to read line:", line) | |
47 log('Found', len(points), 'atom points') | |
48 | |
49 sdf_writer = pybel.Outputfile("sdf", outfile, overwrite=True) | |
50 | |
51 count = 0 | |
52 for mol in pybel.readfile("sdf", ligands_sdf): | |
53 count += 1 | |
54 if count % 50000 == 0: | |
55 log('Processed', count) | |
56 | |
57 try: | |
58 # print("Processing mol", mol.title) | |
59 | |
60 clone = pybel.Molecule(mol) | |
61 clone.removeh() | |
62 | |
63 coords = [] | |
64 for atom in clone.atoms: | |
65 coords.append(atom.coords) | |
66 | |
67 p = 0 | |
68 for point in points: | |
69 p += 1 | |
70 distances = [] | |
71 for i in coords: | |
72 # calculates distance based on cartesian coordinates | |
73 distance = math.sqrt((point[0] - i[0])**2 + (point[1] - i[1])**2 + (point[2] - i[2])**2) | |
74 distances.append(distance) | |
75 # log("distance:", distance) | |
76 min_distance = min(distances) | |
77 # log('Min:', min_distance) | |
78 # log(count, p, min_distance) | |
79 | |
80 mol.data['distance' + str(p)] = min_distance | |
81 | |
82 sdf_writer.write(mol) | |
83 | |
84 except Exception as e: | |
85 log('Failed to handle molecule: '+ str(e)) | |
86 continue | |
87 | |
88 sdf_writer.close() | |
89 log('Wrote', count, 'molecules') | |
90 | |
91 | |
92 def main(): | |
93 global work_dir | |
94 | |
95 parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points') | |
96 | |
97 parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)") | |
98 parser.add_argument('-p', '--points', help="PDB format file with atoms") | |
99 parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results") | |
100 | |
101 | |
102 args = parser.parse_args() | |
103 log("XChem distances args: ", args) | |
104 | |
105 execute(args.input, args.points, args.outfile) | |
106 | |
107 | |
108 if __name__ == "__main__": | |
109 main() |