# HG changeset patch # User bgruening # Date 1586789648 14400 # Node ID f6f9b47d02b6108366b1f63216965f46f1341229 # Parent 8d9c8ba2ec869d436090d968f5356cb648aaedba "planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/transfs commit 9955c29ac23ce80b2fbfd4082845a7809c8f2121" diff -r 8d9c8ba2ec86 -r f6f9b47d02b6 transfs.py --- a/transfs.py Wed Apr 08 07:28:49 2020 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,322 +0,0 @@ -# Create dir containing ligands.sdf and protein.pdb -# Enter docker container like this: -# docker run -it --rm --gpus all -v $PWD:/root/train/fragalysis_test_files/work:Z informaticsmatters/deep-app-ubuntu-1604:latest bash -# -# Now inside the container run like this: -# mkdir /tmp/work -# rm -rf /tmp/work/* && python3 work/transfs.py -i work/test-data/ligands.sdf -r work/test-data/receptor.pdb -d 2 -w /tmp/work -# -# If testing with no GPU you can use the --mock option to generate random scores -# -# Start container for testing like this: -# docker run -it --rm -v $PWD:$PWD:Z -w $PWD informaticsmatters/deep-app-ubuntu-1604:latest bash -# Inside container test like this: -# mkdir /tmp/work -# cd chemicaltoolbox/xchem-deep -# rm -rf /tmp/work/* && python3 transfs.py -i test-data/ligands.sdf -r test-data/receptor.pdb -d 2 -w /tmp/work --mock -# - -import argparse, os, sys, math -import subprocess -import random -from openbabel import pybel - -types_file_name = 'inputs.types' -types_file_name = 'inputs.types' -predict_file_name = 'predictions.txt' -work_dir = '.' -paths = None -inputs_protein = [] -inputs_ligands = [] - - -def log(*args, **kwargs): - """Log output to STDERR - """ - print(*args, file=sys.stderr, ** kwargs) - -def write_raw_inputs(receptor_pdb, ligands_sdf, distance): - """ - Analyses the PDB file for waters that clash with each ligand in the SDF and writes out: - 1. a PDB file named like receptor-123-543.pdb where the numeric parts are the waters that have been omitted - 2. a corresponding directory named like receptor-123-543 - 3. an SDF named like receptor-123-543/ligands.sdf containing those ligands that correspond to that receptor. - :param receptor_pdb: A PDB file without the ligand but with the crystallographic waters - :param ligands_sdf: A SDF with the docked poses - :param distance: The distance to consider when removing waters. Only heavy atoms in the ligand are considered. - :return: - """ - - global work_dir - global inputs_protein - global inputs_ligands - global paths - - - log("Writing data to", work_dir) - if not os.path.isdir(work_dir): - os.mkdir(work_dir) - - receptor_file = os.path.basename(receptor_pdb) - - sdf_writers = {} - paths = [] - - # read the receptor once as we'll need to process it many times - with open(receptor_pdb, 'r') as f: - lines = f.readlines() - - count = 0 - for mol in pybel.readfile("sdf", ligands_sdf): - count += 1 - if count % 50000 == 0: - log('Processed', count) - - try: - # print("Processing mol", mol.title) - - clone = pybel.Molecule(mol) - clone.removeh() - - coords = [] - for atom in clone.atoms: - coords.append(atom.coords) - - watnumcode = '' - - # getting receptor without waters that will clash with ligand - new_receptor_pdb = [] - for line in lines: - if line[17:20] == 'HOH': - x, y, z = float(line[30:39]), float(line[39:46]), float(line[46:55]) - distances = [] - for i in coords: - distances.append(math.sqrt((x-i[0])**2 + (y-i[1])**2 + (z-i[2])**2)) # calculates distance based on cartesian coordinates - if min(distances) > distance: # if all distances are larger than 2.0A, then molecule makes it to new file - new_receptor_pdb.append(line) - else: - watnum = line[23:28].strip() - # print("Skipped water " + watnum) - watnumcode += '-' + watnum - if line[17:20] != 'LIG' and line[17:20] != 'HOH': # ligand lines are also removed - new_receptor_pdb.append(line) - - - name = receptor_file[0:-4] + watnumcode - # print('CODE:', name) - mol.data['TransFSReceptor'] = name - - - if watnumcode not in sdf_writers: - # we've not yet encountered this combination of waters so need to write the PDB file - - dir = os.path.sep.join([work_dir, name]) - log('WRITING to :', dir) - - os.mkdir(dir) - paths.append(dir) - - sdf_writers[watnumcode] = pybel.Outputfile("sdf", os.path.sep.join([dir, 'ligands.sdf'])) - - # writing into new pdb file - receptor_writer = open(os.path.sep.join([work_dir, name + '.pdb']), "w+") - for line in new_receptor_pdb: - receptor_writer.write(str(line)) - receptor_writer.close() - - # write the molecule to the corresponding SDF file - sdf_out = sdf_writers[watnumcode] - sdf_out.write(mol) - - except Exception as e: - log('Failed to handle molecule: '+ str(e)) - continue - - for writer in sdf_writers.values(): - writer.close() - - log('Wrote', count, 'molecules and', len(sdf_writers), 'proteins') - -def write_inputs(protein_file, ligands_file, distance): - """ - Runs gninatyper on the proteins and ligands and generates the input.types file that will tell the predictor - what ligands correspond to what proteins. - - :param protein_file: - :param ligands_file: - :param distance: - :return: - """ - - global types_file_name - global work_dir - global inputs_protein - global inputs_ligands - global prepared_ligands - - write_raw_inputs(protein_file, ligands_file, distance) - - types_path = os.path.sep.join([work_dir, types_file_name]) - log("Writing types to", types_path) - with open(types_path, 'w') as types_file: - - for path in paths: - - log("Gninatyping ligands in", path) - - ligands_dir = os.path.sep.join([path, 'ligands']) - os.mkdir(ligands_dir) - - cmd1 = ['gninatyper', os.path.sep.join([path, 'ligands.sdf']), os.path.sep.join([ligands_dir, 'ligand'])] - log('CMD:', cmd1) - exit_code = subprocess.call(cmd1) - log("Status:", exit_code) - if exit_code: - raise Exception("Failed to write ligands") - ligand_gninatypes = os.listdir(os.path.sep.join([path, 'ligands'])) - - log("Gninatyping proteins in", path) - - proteins_dir = os.path.sep.join([path, 'proteins']) - os.mkdir(proteins_dir) - - cmd2 = ['gninatyper', path + '.pdb', os.path.sep.join([proteins_dir, 'protein'])] - log('CMD:', cmd2) - exit_code = subprocess.call(cmd2) - log("Status:", exit_code) - if exit_code: - raise Exception("Failed to write proteins") - protein_gninatypes = os.listdir(os.path.sep.join([path, 'proteins'])) - - num_proteins = 0 - num_ligands = 0 - - for protein in protein_gninatypes: - num_proteins += 1 - num_ligands = 0 - inputs_protein.append(protein) - inputs_protein.append(os.path.sep.join([path, 'proteins', protein])) - for ligand in ligand_gninatypes: - num_ligands += 1 - log("Handling", protein, ligand) - inputs_ligands.append(os.path.sep.join([path, 'ligands', ligand])) - line = "0 {0}{3}proteins{3}{1} {0}{3}ligands{3}{2}\n".format(path, protein, ligand, os.path.sep) - types_file.write(line) - - return num_proteins, num_ligands - - -def generate_predictions_filename(work_dir, predict_file_name): - return "{0}{1}{2}".format(work_dir, os.path.sep, predict_file_name) - - -def run_predictions(model): - global types_file_name - global predict_file_name - global work_dir - # python3 scripts/predict.py -m resources/dense.prototxt -w resources/weights.caffemodel -i work_0/test_set.types >> work_0/caffe_output/predictions.txt - cmd1 = ['python3', '/train/fragalysis_test_files/scripts/predict.py', - '-m', '/train/fragalysis_test_files/resources/dense.prototxt', - '-w', os.path.sep.join(['/train/fragalysis_test_files/resources', model]), - '-i', os.path.sep.join([work_dir, types_file_name]), - '-o', os.path.sep.join([work_dir, predict_file_name])] - log("CMD:", cmd1) - subprocess.call(cmd1) - - -def mock_predictions(): - global work_dir - global predict_file_name - - log("WARNING: generating mock results instead of running on GPU") - outfile = generate_predictions_filename(work_dir, predict_file_name) - count = 0 - with open(outfile, 'w') as predictions: - for path in paths: - log("Reading", path) - protein_gninatypes = os.listdir(os.path.sep.join([path, 'proteins'])) - ligand_gninatypes = os.listdir(os.path.sep.join([path, 'ligands'])) - for protein in protein_gninatypes: - for ligand in ligand_gninatypes: - count += 1 - score = random.random() - line = "{0} | 0 {1}{4}proteins{4}{2} {1}{4}ligands{4}{3}\n".format(score, path, protein, ligand, - os.path.sep) - # log("Writing", line) - predictions.write(line) - - log('Wrote', count, 'mock predictions') - - -def read_predictions(): - global predict_file_name - global work_dir - scores = {} - with open("{0}{1}{2}".format(work_dir, os.path.sep, predict_file_name), 'r') as input: - for line in input: - # log(line) - tokens = line.split() - if len(tokens) == 5 and tokens[1] == '|': - # log(len(tokens), tokens[0], tokens[3], tokens[4]) - record_no = inputs_ligands.index(tokens[4]) - if record_no is not None: - # log(record_no, tokens[0]) - scores[record_no] = tokens[0] - log("Found", len(scores), "scores") - return scores - - -def patch_scores_sdf(outfile, scores): - - counter = 0 - sdf_path = "{0}{1}{2}".format(work_dir, os.path.sep, outfile) - log("Writing results to {0}".format(sdf_path)) - sdf_file = pybel.Outputfile("sdf", sdf_path) - - for path in paths: - for mol in pybel.readfile("sdf", os.path.sep.join([path, 'ligands.sdf'])): - if counter in scores: - score = scores[counter] - # og("Score for record {0} is {1}".format(counter, score)) - mol.data['TransFSScore'] = score - sdf_file.write(mol) - else: - log("No score found for record", counter) - counter += 1 - sdf_file.close() - - -def execute(ligands_sdf, protein, outfile, distance, model='weights.caffemodel', mock=False): - - write_inputs(protein, ligands_sdf, distance) - if mock: - mock_predictions() - else: - run_predictions(model) - scores = read_predictions() - patch_scores_sdf(outfile, scores) - - -def main(): - global work_dir - - parser = argparse.ArgumentParser(description='XChem deep - pose scoring') - - parser.add_argument('-i', '--input', help="SDF containing the poses to score)") - parser.add_argument('-r', '--receptor', help="Receptor file for scoring (PDB format)") - parser.add_argument('-d', '--distance', type=float, default=2.0, help="Cuttoff for removing waters") - parser.add_argument('-o', '--outfile', default='output.sdf', help="File name for results") - parser.add_argument('-w', '--work-dir', default=".", help="Working directory") - parser.add_argument('-m', '--model', default="weights.caffemodel", help="Model to use for predictions") - parser.add_argument('--mock', action='store_true', help='Generate mock scores rather than run on GPU') - - args = parser.parse_args() - log("XChem deep args: ", args) - - work_dir = args.work_dir - - execute(args.input, args.receptor, args.outfile, args.distance, model=args.model, mock=args.mock) - - -if __name__ == "__main__": - main() diff -r 8d9c8ba2ec86 -r f6f9b47d02b6 transfs.xml --- a/transfs.xml Wed Apr 08 07:28:49 2020 -0400 +++ b/transfs.xml Mon Apr 13 10:54:08 2020 -0400 @@ -1,49 +1,42 @@ - + using deep learning - informaticsmatters/deep-app-ubuntu-1604:1.2 + informaticsmatters/transfs:1.3 - + - + @@ -63,7 +56,7 @@ - + @@ -73,8 +66,21 @@ + --> + + + + + + + + + + + + - +