Mercurial > repos > bgruening > openbabel_remduplicates
diff distance_finder.py @ 10:1dd562ae055d draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/openbabel commit 6c84abdd07f292048bf2194073e2e938e94158c4"
author | bgruening |
---|---|
date | Wed, 25 Mar 2020 16:47:58 -0400 |
parents | |
children | 50ca8845e7f5 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/distance_finder.py Wed Mar 25 16:47:58 2020 -0400 @@ -0,0 +1,109 @@ +# Reports distances of ligands to reference points. An example input for the points is: +# +# 5.655 1.497 18.223 +# 1.494 -8.367 18.574 +# 13.034 6.306 25.232 +# +# Data can be space or tab separated but must contain 3 and only 3 numbers for the x, y and z coordinates +# +# That would encode 3 points. +# Each record in the SDF input is read and the closest heavy atom to each of the reference points is recorded as +# a property named distance1 where the numeric part is the index (starting from 1) of the points (in that example +# there would be properties for distance1, distance2 and distance3. + +import argparse, os, sys, math +from openbabel import pybel + + + +def log(*args, **kwargs): + """Log output to STDERR + """ + print(*args, file=sys.stderr, ** kwargs) + + +def execute(ligands_sdf, points_file, outfile): + """ + :param ligands_sdf: A SDF with the 3D molecules to test + :param points_file: A file with the points to consider. + :param outfile: The name of the file for the SDF output + :return: + """ + + + points = [] + + # read the points + with open(points_file, 'r') as f: + for line in f.readlines(): + line.strip() + if line: + p = line.split() + if len(p) == 3: + points.append((float(p[0]), float(p[1]), float(p[2]))) + log("Read points",p) + continue + log("Failed to read line:", line) + log('Found', len(points), 'atom points') + + sdf_writer = pybel.Outputfile("sdf", outfile, overwrite=True) + + count = 0 + for mol in pybel.readfile("sdf", ligands_sdf): + count += 1 + if count % 50000 == 0: + log('Processed', count) + + try: + # print("Processing mol", mol.title) + + clone = pybel.Molecule(mol) + clone.removeh() + + coords = [] + for atom in clone.atoms: + coords.append(atom.coords) + + p = 0 + for point in points: + p += 1 + distances = [] + for i in coords: + # calculates distance based on cartesian coordinates + distance = math.sqrt((point[0] - i[0])**2 + (point[1] - i[1])**2 + (point[2] - i[2])**2) + distances.append(distance) + # log("distance:", distance) + min_distance = min(distances) + # log('Min:', min_distance) + # log(count, p, min_distance) + + mol.data['distance' + str(p)] = min_distance + + sdf_writer.write(mol) + + except Exception as e: + log('Failed to handle molecule: '+ str(e)) + continue + + sdf_writer.close() + log('Wrote', count, 'molecules') + + +def main(): + global work_dir + + parser = argparse.ArgumentParser(description='XChem distances - measure distances to particular points') + + parser.add_argument('-i', '--input', help="SDF containing the 3D molecules to score)") + parser.add_argument('-p', '--points', help="PDB format file with atoms") + parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results") + + + args = parser.parse_args() + log("XChem distances args: ", args) + + execute(args.input, args.points, args.outfile) + + +if __name__ == "__main__": + main()