Galaxy |

Changeset 0:de29b4f35536 (2020-03-27)

Next changeset 1:8d9c8ba2ec86 (2020-04-08)

Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/transfs commit d9a9e2f0e12fe9d2c37f632d99f2164df577b4af"

added:
README.txt
server/transfs.py
server/transfs.xml
test-data/ligands.sdf
test-data/receptor.pdb
transfs.py
transfs.xml

diff -r 000000000000 -r de29b4f35536 README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt Fri Mar 27 09:18:53 2020 -0400

@@ -0,0 +1,27 @@
+THIS TOOL WILL NOT RUN AT PRESENT.
+
+The tool is 'work in progress' and needs at least the following sorting out:
+
+1. Execution environment
+
+Current the xchem_deep_score.py code can be run in the informaticsmatters/deep-app-ubuntu-1604:latest
+container (see instructions at the top of the python file for doing so). The Galaxy execution environment needs
+to define to run as this docker container.
+Alternatively a conda environment could potentially be created but the dependencies are very complex and
+some components need to be built from source.
+Details for the dependencies are mostly described in the GitHub repo for the docker image:
+https://github.com/InformaticsMatters/dls-deep/tree/ubuntu
+
+2. GPU availability
+
+The code must run in an environment with a GPU and with the CUDA drivers.
+The docker image mentioned above has everything that is needed and will run on a GPU enabled environment
+(a special version of Docker on the host machine is needed that supports GPUs).
+
+Only the predictions need a GPU. The prior and latter steps run on CPU. Without a GPU you can specify the --mock
+option which uses random numbers for the predicted scores.
+
+3. Associated Python scripts.
+
+The docker image contains additional python scripts (primarily /train/fragalysis_test_files/predict.py)
+that are needed. If not running in a container these will need to be made available.
\ No newline at end of file

diff -r 000000000000 -r de29b4f35536 server/transfs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/transfs.py Fri Mar 27 09:18:53 2020 -0400

[

b'@@ -0,0 +1,322 @@\n+# Create dir containing ligands.sdf and protein.pdb\n+# Enter docker container like this:\n+# docker run -it --rm --gpus all -v $PWD:/root/train/fragalysis_test_files/work:Z informaticsmatters/deep-app-ubuntu-1604:latest bash\n+#\n+# Now inside the container run like this:\n+# mkdir /tmp/work\n+# rm -rf /tmp/work/* && python3 work/transfs.py -i work/test-data/ligands.sdf -r work/test-data/receptor.pdb -d 2 -w /tmp/work\n+#\n+# If testing with no GPU you can use the --mock option to generate random scores\n+#\n+# Start container for testing like this:\n+# docker run -it --rm -v $PWD:$PWD:Z -w $PWD informaticsmatters/deep-app-ubuntu-1604:latest bash\n+# Inside container test like this:\n+# mkdir /tmp/work\n+# cd chemicaltoolbox/xchem-deep\n+# rm -rf /tmp/work/* && python3 transfs.py -i test-data/ligands.sdf -r test-data/receptor.pdb -d 2 -w /tmp/work --mock\n+#\n+\n+import argparse, os, sys, math\n+import subprocess\n+import random\n+from openbabel import pybel\n+\n+types_file_name = \'inputs.types\'\n+types_file_name = \'inputs.types\'\n+predict_file_name = \'predictions.txt\'\n+work_dir = \'.\'\n+paths = None\n+inputs_protein = []\n+inputs_ligands = []\n+\n+\n+def log(*args, **kwargs):\n+ """Log output to STDERR\n+ """\n+ print(*args, file=sys.stderr, ** kwargs)\n+\n+def write_raw_inputs(receptor_pdb, ligands_sdf, distance):\n+ """\n+ Analyses the PDB file for waters that clash with each ligand in the SDF and writes out:\n+ 1. a PDB file named like receptor-123-543.pdb where the numeric parts are the waters that have been omitted\n+ 2. a corresponding directory named like receptor-123-543\n+ 3. an SDF named like receptor-123-543/ligands.sdf containing those ligands that correspond to that receptor.\n+ :param receptor_pdb: A PDB file without the ligand but with the crystallographic waters\n+ :param ligands_sdf: A SDF with the docked poses\n+ :param distance: The distance to consider when removing waters. Only heavy atoms in the ligand are considered.\n+ :return:\n+ """\n+\n+ global work_dir\n+ global inputs_protein\n+ global inputs_ligands\n+ global paths\n+\n+\n+ log("Writing data to", work_dir)\n+ if not os.path.isdir(work_dir):\n+ os.mkdir(work_dir)\n+\n+ receptor_file = os.path.basename(receptor_pdb)\n+\n+ sdf_writers = {}\n+ paths = []\n+\n+ # read the receptor once as we\'ll need to process it many times\n+ with open(receptor_pdb, \'r\') as f:\n+ lines = f.readlines()\n+\n+ count = 0\n+ for mol in pybel.readfile("sdf", ligands_sdf):\n+ count += 1\n+ if count % 50000 == 0:\n+ log(\'Processed\', count)\n+\n+ try:\n+ # print("Processing mol", mol.title)\n+\n+ clone = pybel.Molecule(mol)\n+ clone.removeh()\n+\n+ coords = []\n+ for atom in clone.atoms:\n+ coords.append(atom.coords)\n+\n+ watnumcode = \'\'\n+\n+ # getting receptor without waters that will clash with ligand\n+ new_receptor_pdb = []\n+ for line in lines:\n+ if line[17:20] == \'HOH\':\n+ x, y, z = float(line[30:39]), float(line[39:46]), float(line[46:55])\n+ distances = []\n+ for i in coords:\n+ distances.append(math.sqrt((x-i[0])**2 + (y-i[1])**2 + (z-i[2])**2)) # calculates distance based on cartesian coordinates\n+ if min(distances) > distance: # if all distances are larger than 2.0A, then molecule makes it to new file\n+ new_receptor_pdb.append(line)\n+ else:\n+ watnum = line[23:28].strip()\n+ # print("Skipped water " + watnum)\n+ watnumcode += \'-\' + watnum\n+ if line[17:20] != \'LIG\' and line[17:20] != \'HOH\': # ligand lines are also removed\n+ new_receptor_pdb.append(line)\n+\n+\n+ name = receptor_file[0:-4] + watnumcode\n+ # print(\''..b'ragalysis_test_files/scripts/predict.py\',\n+ \'-m\', \'/train/fragalysis_test_files/resources/dense.prototxt\',\n+ \'-w\', \'/train/fragalysis_test_files/resources/weights.caffemodel\',\n+ \'-i\', os.path.sep.join([work_dir, types_file_name]),\n+ \'-o\', os.path.sep.join([work_dir, predict_file_name])]\n+ log("CMD:", cmd1)\n+ subprocess.call(cmd1)\n+\n+\n+def mock_predictions():\n+ global work_dir\n+ global predict_file_name\n+\n+ log("WARNING: generating mock results instead of running on GPU")\n+ outfile = generate_predictions_filename(work_dir, predict_file_name)\n+ count = 0\n+ with open(outfile, \'w\') as predictions:\n+ for path in paths:\n+ log("Reading", path)\n+ protein_gninatypes = os.listdir(os.path.sep.join([path, \'proteins\']))\n+ ligand_gninatypes = os.listdir(os.path.sep.join([path, \'ligands\']))\n+ for protein in protein_gninatypes:\n+ for ligand in ligand_gninatypes:\n+ count += 1\n+ score = random.random()\n+ line = "{0} | 0 {1}{4}proteins{4}{2} {1}{4}ligands{4}{3}\\n".format(score, path, protein, ligand,\n+ os.path.sep)\n+ # log("Writing", line)\n+ predictions.write(line)\n+\n+ log(\'Wrote\', count, \'mock predictions\')\n+\n+\n+def read_predictions():\n+ global predict_file_name\n+ global work_dir\n+ scores = {}\n+ with open("{0}{1}{2}".format(work_dir, os.path.sep, predict_file_name), \'r\') as input:\n+ for line in input:\n+ # log(line)\n+ tokens = line.split()\n+ if len(tokens) == 5 and tokens[1] == \'|\':\n+ # log(len(tokens), tokens[0], tokens[3], tokens[4])\n+ record_no = inputs_ligands.index(tokens[4])\n+ if record_no is not None:\n+ # log(record_no, tokens[0])\n+ scores[record_no] = tokens[0]\n+ log("Found", len(scores), "scores")\n+ return scores\n+\n+\n+def patch_scores_sdf(outfile, scores):\n+\n+ counter = 0\n+ sdf_path = "{0}{1}{2}".format(work_dir, os.path.sep, outfile)\n+ log("Writing results to {0}".format(sdf_path))\n+ sdf_file = pybel.Outputfile("sdf", sdf_path)\n+\n+ for path in paths:\n+ for mol in pybel.readfile("sdf", os.path.sep.join([path, \'ligands.sdf\'])):\n+ if counter in scores:\n+ score = scores[counter]\n+ # og("Score for record {0} is {1}".format(counter, score))\n+ mol.data[\'TransFSScore\'] = score\n+ sdf_file.write(mol)\n+ else:\n+ log("No score found for record", counter)\n+ counter += 1\n+ sdf_file.close()\n+\n+\n+def execute(ligands_sdf, protein, outfile, distance, mock=False):\n+\n+ write_inputs(protein, ligands_sdf, distance)\n+ if mock:\n+ mock_predictions()\n+ else:\n+ run_predictions()\n+ scores = read_predictions()\n+ patch_scores_sdf(outfile, scores)\n+\n+\n+def main():\n+ global work_dir\n+\n+ parser = argparse.ArgumentParser(description=\'XChem deep - pose scoring\')\n+\n+ parser.add_argument(\'-i\', \'--input\', help="SDF containing the poses to score)")\n+ parser.add_argument(\'-r\', \'--receptor\', help="Receptor file for scoring (PDB format)")\n+ parser.add_argument(\'-d\', \'--distance\', type=float, default=2.0, help="Cuttoff for removing waters")\n+ parser.add_argument(\'-o\', \'--outfile\', default=\'output.sdf\', help="File name for results")\n+ parser.add_argument(\'-w\', \'--work-dir\', default=".", help="Working directory")\n+ parser.add_argument(\'--mock\', action=\'store_true\', help=\'Generate mock scores rather than run on GPU\')\n+\n+ args = parser.parse_args()\n+ log("XChem deep args: ", args)\n+\n+ work_dir = args.work_dir\n+\n+ execute(args.input, args.receptor, args.outfile, args.distance, mock=args.mock)\n+\n+\n+if __name__ == "__main__":\n+ main()\n+\n'

diff -r 000000000000 -r de29b4f35536 server/transfs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/transfs.xml Fri Mar 27 09:18:53 2020 -0400

[

@@ -0,0 +1,108 @@
+<tool id="xchem_transfs_scoring" name="XChem TransFS pose scoring" version="0.2.0">
+    <description>using deep learning</description>
+
+    <requirements>
+        
+        
+        
+        <container type="docker">informaticsmatters/deep-app-ubuntu-1604:latest</container>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+    cd /train/fragalysis_test_files/ &&
+    mkdir workdir &&
+    cd workdir &&
+
+    cp '$ligands' ligands.sdf &&
+    cp '$receptor' receptor.pdb &&
+
+    ##mkdir -p /root/train/ &&
+    ##ln -s /train/fragalysis_test_files/ /root/train/ &&
+
+    ##adduser centos --uid 1000 --quiet --no-create-home --system &&
+    ##apt install sudo -y &&
+
+    ## mkdir -p ligands &&
+    cd ../ &&
+    python '$__tool_directory__/transfs.py' -i ./workdir/ligands.sdf -r ./workdir/receptor.pdb -d $distance -w /train/fragalysis_test_files/workdir &&
+    ls -l &&
+    ls -l workdir &&
+    sudo -u ubuntu cp ./workdir/output.sdf '$output' &&
+    head -n 10000 ./workdir/output.sdf &&
+
+    mkdir -p ./pdb &&
+    cp -r ./workdir/receptor*.pdb ./pdb &&
+    tar -cvhf archiv.tar ./pdb &&
+    sudo -u ubuntu cp archiv.tar '$output_receptors' &&
+
+    sudo -u ubuntu cp ./workdir/predictions.txt '$predictions'
+
+
+    ]]></command>
+
+    <inputs>
+        <param type="data" name="receptor" format="pdb" label="Receptor" help="Select a receptor (pdb format)."/>
+        <param type="data" name="ligands" format="sdf,mol" label="Ligands" help="Ligands (docked poses) in SDF format)"/>
+        <param name="distance" type="float" value="2.0" min="1.0" max="5.0" label="Distance to waters" help="Remove waters closer than this distance to any ligand heavy atom"/>
+        <param type="hidden" name="mock" value="" label="Mock calculations" help="Use random numbers instead of running on GPU"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="sdf" label="XChem pose scoring on ${on_string}"/>
+        <data name="predictions" format="txt" label="Predictions on ${on_string}"/>
+        <data name="output_receptors" format="tar" label="Receptors ${on_string}"/>
+
+        
+    </outputs>
+
+    <tests>
+ <test>
+            <param name="receptor" value="receptor.pdb"/>
+            <param name="ligands" value="ligands.sdf"/>
+            
+            <param name="distance" value="4.0"/>
+            <output name="output" ftype="sdf">
+                <assert_contents>
+                    <has_text text="TransFSReceptor"/>
+                    <has_text text="TransFSScore"/>
+                </assert_contents>
+            </output>
+            
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+This tool performs scoring of docked ligand poses using deep learning.
+It uses the gnina and libmolgrid toolkits to perform the scoring to generate
+a prediction for how good the pose is.
+
+
+-----
+
+.. class:: infomark
+
+**Inputs**
+
+1. The protein receptor to dock into as a file in PDB format. This should have the ligand removed but retain the waters.
+2. A set of ligand poses to score in SDF format.
+
+-----
+
+.. class:: infomark
+
+**Outputs**
+
+An SDF file is produced as output. The binding affinity scores are contained within the SDF file
+as the TransFSScore property and the PDB file (with the waters that clash with the ligand removed)
+that was used for the scoring as the TransFSReceptor property.
+Values for the score range from 0 (poor binding) to 1 (good binding).
+
+A set of PDB files is also output, each one with different crystallographic waters removed. Each ligand is
+examined against input PDB structure and the with waters that clash (any heavy atom of the ligand closer than
+the 'distance' parameter being removed. The filenames are encoded with the water numbers that are removed.
+
+    ]]></help>
+</tool>

diff -r 000000000000 -r de29b4f35536 test-data/ligands.sdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ligands.sdf Fri Mar 27 09:18:53 2020 -0400

b'@@ -0,0 +1,1510 @@\n+MolPort-002-851-943\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 19 20 0 0 0 0 0 0 0 0999 V2000\n+ 28.6216 -44.1866 73.3638 C 0 0 0 0 0 0\n+ 30.1353 -44.0328 73.4362 C 0 0 0 0 0 0\n+ 30.7597 -45.2722 73.8147 O 0 0 0 0 0 0\n+ 32.1148 -45.2860 73.9853 C 0 0 0 0 0 0\n+ 32.8758 -44.2937 74.5886 C 0 0 0 0 0 0\n+ 34.2640 -44.4447 74.6917 C 0 0 0 0 0 0\n+ 34.9396 -43.4382 75.2691 F 0 0 0 0 0 0\n+ 34.9421 -45.5768 74.2048 C 0 0 0 0 0 0\n+ 34.1425 -46.5638 73.5990 C 0 0 0 0 0 0\n+ 32.7560 -46.4133 73.4768 C 0 0 0 0 0 0\n+ 36.4169 -45.7333 74.2945 C 0 0 0 0 0 0\n+ 37.3030 -44.7299 73.8615 C 0 0 0 0 0 0\n+ 38.6916 -44.8971 73.9261 C 0 0 0 0 0 0\n+ 39.2314 -46.0682 74.4460 C 0 0 0 0 0 0\n+ 38.3836 -47.0720 74.8981 C 0 0 0 0 0 0\n+ 36.9964 -46.9076 74.8159 C 0 0 0 0 0 0\n+ 30.6590 -43.6789 72.0407 C 0 0 0 0 0 0\n+ 31.1658 -44.4817 71.2685 O 0 0 0 0 0 0\n+ 30.5050 -42.3687 71.7235 O 0 0 0 0 0 0\n+ 1 2 1 0 0 0\n+ 2 3 1 0 0 0\n+ 2 17 1 0 0 0\n+ 3 4 1 0 0 0\n+ 4 5 2 0 0 0\n+ 4 10 1 0 0 0\n+ 5 6 1 0 0 0\n+ 6 7 1 0 0 0\n+ 6 8 2 0 0 0\n+ 8 9 1 0 0 0\n+ 8 11 1 0 0 0\n+ 9 10 2 0 0 0\n+ 11 12 2 0 0 0\n+ 11 16 1 0 0 0\n+ 12 13 1 0 0 0\n+ 13 14 2 0 0 0\n+ 14 15 1 0 0 0\n+ 15 16 2 0 0 0\n+ 17 18 2 0 0 0\n+ 17 19 1 0 0 0\n+M END\n+> <CHROM.1>\n+-177.01127901,-99.92003744,40.28785274,-50.56487888,33.82101420,-44.94803558\n+73.75196667,2.86183962,-0.16145766,-0.25378100\n+\n+> <Name>\n+MolPort-002-851-943\n+\n+> <RI>\n+0\n+\n+> <Rbt.Current_Directory>\n+/home/timbo/github/im/docking-validation/targets/nudt7/expts/vscreening/NUDT7A-x0129/work/94/3883032ea0e243c22b1a7b7b149ac4\n+\n+> <Rbt.Executable>\n+rbdock ($Id: //depot/dev/client3/rdock/2013.1/src/exe/rbdock.cxx#4 $)\n+\n+> <Rbt.Library>\n+libRbt.so (2013.1, Build901 2013/11/27)\n+\n+> <Rbt.Parameter_File>\n+/rDock_2013.1/data/scripts/dock.prm\n+\n+> <Rbt.Receptor>\n+docking.prm\n+\n+> <SCORE>\n+-6.92904\n+\n+> <SCORE.INTER>\n+-6.0525\n+\n+> <SCORE.INTER.CONST>\n+1\n+\n+> <SCORE.INTER.POLAR>\n+0\n+\n+> <SCORE.INTER.REPUL>\n+0\n+\n+> <SCORE.INTER.ROT>\n+4\n+\n+> <SCORE.INTER.VDW>\n+-15.4525\n+\n+> <SCORE.INTER.norm>\n+-0.318553\n+\n+> <SCORE.INTRA>\n+-0.876534\n+\n+> <SCORE.INTRA.DIHEDRAL>\n+2.31595\n+\n+> <SCORE.INTRA.DIHEDRAL.0>\n+4.86231\n+\n+> <SCORE.INTRA.POLAR>\n+0\n+\n+> <SCORE.INTRA.POLAR.0>\n+0\n+\n+> <SCORE.INTRA.REPUL>\n+0\n+\n+> <SCORE.INTRA.REPUL.0>\n+0\n+\n+> <SCORE.INTRA.VDW>\n+-2.03451\n+\n+> <SCORE.INTRA.VDW.0>\n+0.564556\n+\n+> <SCORE.INTRA.norm>\n+-0.0461334\n+\n+> <SCORE.RESTR>\n+0\n+\n+> <SCORE.RESTR.CAVITY>\n+0\n+\n+> <SCORE.RESTR.norm>\n+0\n+\n+> <SCORE.SYSTEM>\n+0\n+\n+> <SCORE.SYSTEM.CONST>\n+0\n+\n+> <SCORE.SYSTEM.DIHEDRAL>\n+0\n+\n+> <SCORE.SYSTEM.norm>\n+0\n+\n+> <SCORE.heavy>\n+19\n+\n+> <SCORE.norm>\n+-0.364686\n+\n+$$$$\n+MolPort-002-851-943\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 19 20 0 0 0 0 0 0 0 0999 V2000\n+ 28.7887 -43.9849 73.4350 C 0 0 0 0 0 0\n+ 30.3065 -43.8574 73.4561 C 0 0 0 0 0 0\n+ 30.9156 -45.0596 73.9590 O 0 0 0 0 0 0\n+ 32.2669 -45.0638 74.1571 C 0 0 0 0 0 0\n+ 32.9869 -44.1547 74.9208 C 0 0 0 0 0 0\n+ 34.3766 -44.2833 75.0323 C 0 0 0 0 0 0\n+ 35.0112 -43.3585 75.7706 F 0 0 0 0 0 0\n+ 35.0968 -45.3106 74.3963 C 0 0 0 0 0 0\n+ 34.3382 -46.2150 73.6302 C 0 0 0 0 0 0\n+ 32.9505 -46.0840 73.4996 C 0 0 0 0 0 0\n+ 36.5736 -45.4404 74.4959 C 0 0 0 0 0 0\n+ 37.4235 -44.3192 74.5083 C 0 0 0 0 0 0\n+ 38.8156 -44.4525 74.5763 C 0 0 0 0 0 0\n+ 39.3943 -45.7144 74.6543 C 0 0 0 0 0 0\n+ 38.5819 -46.8416 74.6613 C 0 0 0 0 0 0\n+ 37.1920 -46.7044 74.5761 C 0 0 0 0 0 0\n+ 30.8027 -43.6753 72.0182 C 0 0 0 0 0 '..b'e/rbdock.cxx#4 $)\n+\n+> <Rbt.Library>\n+libRbt.so (2013.1, Build901 2013/11/27)\n+\n+> <Rbt.Parameter_File>\n+/rDock_2013.1/data/scripts/dock.prm\n+\n+> <Rbt.Receptor>\n+docking.prm\n+\n+> <SCORE>\n+-6.29997\n+\n+> <SCORE.INTER>\n+-5.91247\n+\n+> <SCORE.INTER.CONST>\n+1\n+\n+> <SCORE.INTER.POLAR>\n+0\n+\n+> <SCORE.INTER.REPUL>\n+0\n+\n+> <SCORE.INTER.ROT>\n+4\n+\n+> <SCORE.INTER.VDW>\n+-15.3125\n+\n+> <SCORE.INTER.norm>\n+-0.311182\n+\n+> <SCORE.INTRA>\n+-0.387505\n+\n+> <SCORE.INTRA.DIHEDRAL>\n+1.04805\n+\n+> <SCORE.INTRA.DIHEDRAL.0>\n+4.86231\n+\n+> <SCORE.INTRA.POLAR>\n+0\n+\n+> <SCORE.INTRA.POLAR.0>\n+0\n+\n+> <SCORE.INTRA.REPUL>\n+0\n+\n+> <SCORE.INTRA.REPUL.0>\n+0\n+\n+> <SCORE.INTRA.VDW>\n+-0.911532\n+\n+> <SCORE.INTRA.VDW.0>\n+0.564556\n+\n+> <SCORE.INTRA.norm>\n+-0.020395\n+\n+> <SCORE.RESTR>\n+0\n+\n+> <SCORE.RESTR.CAVITY>\n+0\n+\n+> <SCORE.RESTR.norm>\n+0\n+\n+> <SCORE.SYSTEM>\n+0\n+\n+> <SCORE.SYSTEM.CONST>\n+0\n+\n+> <SCORE.SYSTEM.DIHEDRAL>\n+0\n+\n+> <SCORE.SYSTEM.norm>\n+0\n+\n+> <SCORE.heavy>\n+19\n+\n+> <SCORE.norm>\n+-0.331577\n+\n+$$$$\n+MolPort-002-851-943\n+ rDOCK(R) 3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 19 20 0 0 0 0 0 0 0 0999 V2000\n+ 28.1301 -46.9340 70.0718 C 0 0 0 0 0 0\n+ 28.9931 -46.5530 71.2679 C 0 0 0 0 0 0\n+ 30.2845 -46.0850 70.8408 O 0 0 0 0 0 0\n+ 31.2330 -45.8494 71.7948 C 0 0 0 0 0 0\n+ 31.2492 -46.3704 73.0818 C 0 0 0 0 0 0\n+ 32.2909 -46.0377 73.9562 C 0 0 0 0 0 0\n+ 32.2332 -46.5616 75.1912 F 0 0 0 0 0 0\n+ 33.3464 -45.1837 73.5884 C 0 0 0 0 0 0\n+ 33.3007 -44.6726 72.2780 C 0 0 0 0 0 0\n+ 32.2541 -44.9867 71.4028 C 0 0 0 0 0 0\n+ 34.4439 -44.8123 74.5185 C 0 0 0 0 0 0\n+ 34.2182 -44.5848 75.8885 C 0 0 0 0 0 0\n+ 35.2544 -44.2099 76.7522 C 0 0 0 0 0 0\n+ 36.5513 -44.0721 76.2701 C 0 0 0 0 0 0\n+ 36.8109 -44.3061 74.9252 C 0 0 0 0 0 0\n+ 35.7693 -44.6642 74.0622 C 0 0 0 0 0 0\n+ 28.3209 -45.3915 72.0067 C 0 0 0 0 0 0\n+ 27.7039 -45.5051 73.0574 O 0 0 0 0 0 0\n+ 28.4756 -44.1955 71.3848 O 0 0 0 0 0 0\n+ 1 2 1 0 0 0\n+ 2 3 1 0 0 0\n+ 2 17 1 0 0 0\n+ 3 4 1 0 0 0\n+ 4 5 2 0 0 0\n+ 4 10 1 0 0 0\n+ 5 6 1 0 0 0\n+ 6 7 1 0 0 0\n+ 6 8 2 0 0 0\n+ 8 9 1 0 0 0\n+ 8 11 1 0 0 0\n+ 9 10 2 0 0 0\n+ 11 12 2 0 0 0\n+ 11 16 1 0 0 0\n+ 12 13 1 0 0 0\n+ 13 14 2 0 0 0\n+ 14 15 1 0 0 0\n+ 15 16 2 0 0 0\n+ 17 18 2 0 0 0\n+ 17 19 1 0 0 0\n+M END\n+> <CHROM.1>\n+-172.56786008,103.66611308,20.57483725,-37.44652631,32.01085503,-45.35131371\n+73.24978757,-2.61232414,0.19661218,1.57826506\n+\n+> <Name>\n+MolPort-002-851-943\n+\n+> <RI>\n+0\n+\n+> <Rbt.Current_Directory>\n+/home/timbo/github/im/docking-validation/targets/nudt7/expts/vscreening/NUDT7A-x0129/work/94/3883032ea0e243c22b1a7b7b149ac4\n+\n+> <Rbt.Executable>\n+rbdock ($Id: //depot/dev/client3/rdock/2013.1/src/exe/rbdock.cxx#4 $)\n+\n+> <Rbt.Library>\n+libRbt.so (2013.1, Build901 2013/11/27)\n+\n+> <Rbt.Parameter_File>\n+/rDock_2013.1/data/scripts/dock.prm\n+\n+> <Rbt.Receptor>\n+docking.prm\n+\n+> <SCORE>\n+-6.14591\n+\n+> <SCORE.INTER>\n+-4.70666\n+\n+> <SCORE.INTER.CONST>\n+1\n+\n+> <SCORE.INTER.POLAR>\n+0\n+\n+> <SCORE.INTER.REPUL>\n+0\n+\n+> <SCORE.INTER.ROT>\n+4\n+\n+> <SCORE.INTER.VDW>\n+-14.1067\n+\n+> <SCORE.INTER.norm>\n+-0.247719\n+\n+> <SCORE.INTRA>\n+-1.43925\n+\n+> <SCORE.INTRA.DIHEDRAL>\n+0.01514\n+\n+> <SCORE.INTRA.DIHEDRAL.0>\n+4.86231\n+\n+> <SCORE.INTRA.POLAR>\n+0\n+\n+> <SCORE.INTRA.POLAR.0>\n+0\n+\n+> <SCORE.INTRA.REPUL>\n+0\n+\n+> <SCORE.INTRA.REPUL.0>\n+0\n+\n+> <SCORE.INTRA.VDW>\n+-1.44682\n+\n+> <SCORE.INTRA.VDW.0>\n+0.564556\n+\n+> <SCORE.INTRA.norm>\n+-0.0757498\n+\n+> <SCORE.RESTR>\n+0\n+\n+> <SCORE.RESTR.CAVITY>\n+0\n+\n+> <SCORE.RESTR.norm>\n+0\n+\n+> <SCORE.SYSTEM>\n+0\n+\n+> <SCORE.SYSTEM.CONST>\n+0\n+\n+> <SCORE.SYSTEM.DIHEDRAL>\n+0\n+\n+> <SCORE.SYSTEM.norm>\n+0\n+\n+> <SCORE.heavy>\n+19\n+\n+> <SCORE.norm>\n+-0.323469\n+\n+$$$$\n\\ No newline at end of file\n'

diff -r 000000000000 -r de29b4f35536 test-data/receptor.pdb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/receptor.pdb Fri Mar 27 09:18:53 2020 -0400

b'@@ -0,0 +1,1640 @@\n+HEADER ---- 26-MAY-17 1umyp \n+TITLE NUDT7A-x0129 \n+EXPDTA X-RAY DIFFRACTION \n+REMARK 2 \n+REMARK 2 RESOLUTION. 2.45 ANGSTROMS \n+REMARK 3 \n+REMARK 3 R VALUE 0.174 \n+REMARK 50 DEPOSITOR \n+REMARK 50 admin \n+REMARK 50 PROJECT \n+REMARK 50 NUDT7 \n+REMARK 50 STRUCFILE \n+REMARK 50 /dls/science/groups/proasis/Data/PPWEB/oxc1umyp.pdb.gz \n+REMARK 50 STRUCSOURCE \n+REMARK 50 oxc \n+HET ACT D 1 4 \n+HET ACT D 2 4 \n+HET DMS C 1 4 \n+HET DMS C 2 4 \n+CRYST1 125.992 125.992 41.625 90.00 90.00 120.00 P 3 2 1 0 \n+ATOM 1 N SER A 15 59.869 -59.274 94.881 1.00 76.84 N \n+ATOM 2 CA SER A 15 59.985 -57.783 94.828 1.00 71.13 C \n+ATOM 3 C SER A 15 58.610 -57.164 94.540 1.00 67.52 C \n+ATOM 4 O SER A 15 57.643 -57.893 94.314 1.00 65.98 O \n+ATOM 5 CB SER A 15 61.002 -57.390 93.761 1.00 66.24 C \n+ATOM 6 OG SER A 15 60.570 -57.811 92.472 1.00 67.23 O \n+ATOM 7 N MET A 16 58.539 -55.834 94.563 1.00 58.08 N \n+ATOM 8 CA MET A 16 57.293 -55.088 94.326 1.00 59.05 C \n+ATOM 9 C MET A 16 56.683 -55.346 92.933 1.00 60.19 C \n+ATOM 10 O MET A 16 55.452 -55.432 92.802 1.00 57.04 O \n+ATOM 11 CB MET A 16 57.515 -53.574 94.563 1.00 55.78 C \n+ATOM 12 CG MET A 16 56.673 -52.616 93.735 1.00 58.54 C \n+ATOM 13 SD MET A 16 56.941 -50.885 94.202 1.00 67.90 S \n+ATOM 14 CE MET A 16 58.554 -50.592 93.508 1.00 63.96 C \n+ATOM 15 N LEU A 17 57.526 -55.468 91.912 1.00 53.59 N \n+ATOM 16 CA LEU A 17 57.027 -55.566 90.550 1.00 55.31 C \n+ATOM 17 C LEU A 17 56.583 -56.984 90.257 1.00 55.34 C \n+ATOM 18 O LEU A 17 55.576 -57.195 89.583 1.00 50.16 O \n+ATOM 19 CB LEU A 17 58.079 -55.116 89.534 1.00 53.94 C \n+ATOM 20 CG LEU A 17 58.257 -53.602 89.470 1.00 56.21 C \n+ATOM 21 CD1 LEU A 17 59.299 -53.272 88.406 1.00 54.07 C \n+ATOM 22 CD2 LEU A 17 56.947 -52.872 89.190 1.00 55.51 C \n+ATOM 23 N ASP A 18 57.343 -57.937 90.783 1.00 53.67 N \n+ATOM 24 CA ASP A 18 57.063 -59.355 90.610 1.00 55.96 C \n+ATOM 25 C ASP A 18 55.817 -59.777 91.346 1.00 54.31 C \n+ATOM 26 O ASP A 18 55.071 -60.644 90.851 1.00 51.31 O \n+ATOM 27 CB ASP A 18 58.244 -60.213 91.111 1.00 63.00 C \n+ATOM 28 CG ASP A 18 59.374 -60.344 90.084 1.00 68.30 C \n+ATOM 29 OD1 ASP A 18 59.406 -59.'..b' 100.506 1.00 57.57 O \n+HETATM 1573 O HOH B 153 52.191 -61.314 94.961 1.00 58.41 O \n+HETATM 1574 O HOH B 154 43.934 -26.726 90.863 1.00 59.30 O \n+HETATM 1575 O HOH B 155 53.659 -36.279 85.569 1.00 52.56 O \n+HETATM 1576 O HOH B 158 42.200 -33.494 96.714 1.00 56.16 O \n+HETATM 1577 O HOH B 159 55.504 -39.201 82.113 1.00 59.28 O \n+HETATM 1578 O HOH B 160 30.376 -55.240 82.444 1.00 63.54 O \n+HETATM 1579 O HOH B 161 51.795 -59.031 96.828 1.00 67.98 O \n+HETATM 1580 O HOH B 162 22.526 -46.703 74.926 1.00 57.40 O \n+HETATM 1581 O HOH B 164 50.733 -57.148 73.024 1.00 56.47 O \n+HETATM 1582 O HOH B 165 48.104 -47.994 59.942 1.00 45.95 O \n+HETATM 1583 O HOH B 166 54.973 -56.153 97.297 1.00 64.13 O \n+HETATM 1584 O HOH B 167 39.253 -43.161 103.576 1.00 54.76 O \n+HETATM 1585 O HOH B 170 51.981 -48.990 95.987 1.00 43.41 O \n+HETATM 1586 O HOH B 171 51.448 -48.354 56.783 1.00 56.00 O \n+HETATM 1587 O HOH B 172 60.277 -51.573 96.315 1.00 57.45 O \n+HETATM 1588 O HOH B 174 51.577 -54.024 69.020 1.00 60.28 O \n+HETATM 1589 O HOH B 175 35.389 -59.160 66.962 1.00 73.86 O \n+HETATM 1590 O HOH B 176 54.325 -40.404 94.788 1.00 71.31 O \n+HETATM 1591 O HOH B 178 30.495 -40.155 85.802 1.00 63.37 O \n+HETATM 1592 O HOH B 179 50.958 -37.034 91.471 1.00 53.17 O \n+HETATM 1593 O HOH B 180 53.453 -52.474 96.288 1.00 57.62 O \n+HETATM 1594 O HOH B 182 54.933 -49.346 66.605 1.00 65.59 O \n+HETATM 1595 O HOH B 184 52.755 -38.706 96.230 1.00 60.20 O \n+HETATM 1596 O HOH B 188 48.197 -39.939 97.526 1.00 38.94 O \n+HETATM 1597 O HOH B 190 47.525 -59.982 77.805 1.00 58.12 O \n+HETATM 1598 O HOH B 191 51.832 -50.237 67.773 1.00 58.72 O \n+HETATM 1599 O HOH B 192 49.087 -42.082 98.262 1.00 50.34 O \n+HETATM 1600 O HOH B 193 60.481 -55.025 91.931 1.00 49.74 O \n+HETATM 1601 O HOH B 194 26.866 -55.431 67.876 1.00 55.14 O \n+HETATM 1602 O HOH B 196 42.244 -39.591 71.487 1.00 60.09 O \n+HETATM 1603 O HOH B 199 61.339 -56.770 90.212 1.00 59.65 O \n+HETATM 1604 C ACT D 1 39.632 -32.467 85.009 1.00 92.90 C \n+HETATM 1605 O ACT D 1 39.508 -32.058 86.187 1.00 93.19 O \n+HETATM 1606 CH3 ACT D 1 38.386 -32.806 84.223 1.00 84.58 C \n+HETATM 1607 OXT ACT D 1 40.773 -32.594 84.493 1.00 90.66 O \n+HETATM 1608 C ACT D 2 58.453 -56.204 76.997 1.00 88.70 C \n+HETATM 1609 O ACT D 2 57.709 -56.233 75.983 1.00 85.04 O \n+HETATM 1610 CH3 ACT D 2 58.693 -57.480 77.774 1.00 83.03 C \n+HETATM 1611 OXT ACT D 2 58.996 -55.131 77.372 1.00 85.86 O \n+HETATM 1612 O DMS C 1 60.177 -42.366 76.499 1.00 68.99 O \n+HETATM 1613 C1 DMS C 1 60.547 -42.644 73.915 1.00 91.27 C \n+HETATM 1614 C2 DMS C 1 58.286 -41.770 74.686 1.00 93.07 C \n+HETATM 1615 S DMS C 1 59.919 -41.707 75.193 1.00 96.08 S \n+HETATM 1616 O DMS C 2 62.131 -50.369 84.252 1.00100.77 O \n+HETATM 1617 C1 DMS C 2 63.293 -52.714 84.253 1.00108.09 C \n+HETATM 1618 C2 DMS C 2 60.654 -52.426 84.716 1.00115.63 C \n+HETATM 1619 S DMS C 2 61.947 -51.761 83.804 1.00116.45 S \n+END \n\\ No newline at end of file\n'

diff -r 000000000000 -r de29b4f35536 transfs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/transfs.py Fri Mar 27 09:18:53 2020 -0400

[

b'@@ -0,0 +1,321 @@\n+# Create dir containing ligands.sdf and protein.pdb\n+# Enter docker container like this:\n+# docker run -it --rm --gpus all -v $PWD:/root/train/fragalysis_test_files/work:Z informaticsmatters/deep-app-ubuntu-1604:latest bash\n+#\n+# Now inside the container run like this:\n+# mkdir /tmp/work\n+# rm -rf /tmp/work/* && python3 work/transfs.py -i work/test-data/ligands.sdf -r work/test-data/receptor.pdb -d 2 -w /tmp/work\n+#\n+# If testing with no GPU you can use the --mock option to generate random scores\n+#\n+# Start container for testing like this:\n+# docker run -it --rm -v $PWD:$PWD:Z -w $PWD informaticsmatters/deep-app-ubuntu-1604:latest bash\n+# Inside container test like this:\n+# mkdir /tmp/work\n+# cd chemicaltoolbox/xchem-deep\n+# rm -rf /tmp/work/* && python3 transfs.py -i test-data/ligands.sdf -r test-data/receptor.pdb -d 2 -w /tmp/work --mock\n+#\n+\n+import argparse, os, sys, math\n+import subprocess\n+import random\n+from openbabel import pybel\n+\n+types_file_name = \'inputs.types\'\n+types_file_name = \'inputs.types\'\n+predict_file_name = \'predictions.txt\'\n+work_dir = \'.\'\n+paths = None\n+inputs_protein = []\n+inputs_ligands = []\n+\n+\n+def log(*args, **kwargs):\n+ """Log output to STDERR\n+ """\n+ print(*args, file=sys.stderr, ** kwargs)\n+\n+def write_raw_inputs(receptor_pdb, ligands_sdf, distance):\n+ """\n+ Analyses the PDB file for waters that clash with each ligand in the SDF and writes out:\n+ 1. a PDB file named like receptor-123-543.pdb where the numeric parts are the waters that have been omitted\n+ 2. a corresponding directory named like receptor-123-543\n+ 3. an SDF named like receptor-123-543/ligands.sdf containing those ligands that correspond to that receptor.\n+ :param receptor_pdb: A PDB file without the ligand but with the crystallographic waters\n+ :param ligands_sdf: A SDF with the docked poses\n+ :param distance: The distance to consider when removing waters. Only heavy atoms in the ligand are considered.\n+ :return:\n+ """\n+\n+ global work_dir\n+ global inputs_protein\n+ global inputs_ligands\n+ global paths\n+\n+\n+ log("Writing data to", work_dir)\n+ if not os.path.isdir(work_dir):\n+ os.mkdir(work_dir)\n+\n+ receptor_file = os.path.basename(receptor_pdb)\n+\n+ sdf_writers = {}\n+ paths = []\n+\n+ # read the receptor once as we\'ll need to process it many times\n+ with open(receptor_pdb, \'r\') as f:\n+ lines = f.readlines()\n+\n+ count = 0\n+ for mol in pybel.readfile("sdf", ligands_sdf):\n+ count += 1\n+ if count % 50000 == 0:\n+ log(\'Processed\', count)\n+\n+ try:\n+ # print("Processing mol", mol.title)\n+\n+ clone = pybel.Molecule(mol)\n+ clone.removeh()\n+\n+ coords = []\n+ for atom in clone.atoms:\n+ coords.append(atom.coords)\n+\n+ watnumcode = \'\'\n+\n+ # getting receptor without waters that will clash with ligand\n+ new_receptor_pdb = []\n+ for line in lines:\n+ if line[17:20] == \'HOH\':\n+ x, y, z = float(line[30:39]), float(line[39:46]), float(line[46:55])\n+ distances = []\n+ for i in coords:\n+ distances.append(math.sqrt((x-i[0])**2 + (y-i[1])**2 + (z-i[2])**2)) # calculates distance based on cartesian coordinates\n+ if min(distances) > distance: # if all distances are larger than 2.0A, then molecule makes it to new file\n+ new_receptor_pdb.append(line)\n+ else:\n+ watnum = line[23:28].strip()\n+ # print("Skipped water " + watnum)\n+ watnumcode += \'-\' + watnum\n+ if line[17:20] != \'LIG\' and line[17:20] != \'HOH\': # ligand lines are also removed\n+ new_receptor_pdb.append(line)\n+\n+\n+ name = receptor_file[0:-4] + watnumcode\n+ # print(\''..b'/fragalysis_test_files/scripts/predict.py\',\n+ \'-m\', \'/train/fragalysis_test_files/resources/dense.prototxt\',\n+ \'-w\', \'/train/fragalysis_test_files/resources/weights.caffemodel\',\n+ \'-i\', os.path.sep.join([work_dir, types_file_name]),\n+ \'-o\', os.path.sep.join([work_dir, predict_file_name])]\n+ log("CMD:", cmd1)\n+ subprocess.call(cmd1)\n+\n+\n+def mock_predictions():\n+ global work_dir\n+ global predict_file_name\n+\n+ log("WARNING: generating mock results instead of running on GPU")\n+ outfile = generate_predictions_filename(work_dir, predict_file_name)\n+ count = 0\n+ with open(outfile, \'w\') as predictions:\n+ for path in paths:\n+ log("Reading", path)\n+ protein_gninatypes = os.listdir(os.path.sep.join([path, \'proteins\']))\n+ ligand_gninatypes = os.listdir(os.path.sep.join([path, \'ligands\']))\n+ for protein in protein_gninatypes:\n+ for ligand in ligand_gninatypes:\n+ count += 1\n+ score = random.random()\n+ line = "{0} | 0 {1}{4}proteins{4}{2} {1}{4}ligands{4}{3}\\n".format(score, path, protein, ligand,\n+ os.path.sep)\n+ # log("Writing", line)\n+ predictions.write(line)\n+\n+ log(\'Wrote\', count, \'mock predictions\')\n+\n+\n+def read_predictions():\n+ global predict_file_name\n+ global work_dir\n+ scores = {}\n+ with open("{0}{1}{2}".format(work_dir, os.path.sep, predict_file_name), \'r\') as input:\n+ for line in input:\n+ # log(line)\n+ tokens = line.split()\n+ if len(tokens) == 5 and tokens[1] == \'|\':\n+ # log(len(tokens), tokens[0], tokens[3], tokens[4])\n+ record_no = inputs_ligands.index(tokens[4])\n+ if record_no is not None:\n+ # log(record_no, tokens[0])\n+ scores[record_no] = tokens[0]\n+ log("Found", len(scores), "scores")\n+ return scores\n+\n+\n+def patch_scores_sdf(outfile, scores):\n+\n+ counter = 0\n+ sdf_path = "{0}{1}{2}".format(work_dir, os.path.sep, outfile)\n+ log("Writing results to {0}".format(sdf_path))\n+ sdf_file = pybel.Outputfile("sdf", sdf_path)\n+\n+ for path in paths:\n+ for mol in pybel.readfile("sdf", os.path.sep.join([path, \'ligands.sdf\'])):\n+ if counter in scores:\n+ score = scores[counter]\n+ # og("Score for record {0} is {1}".format(counter, score))\n+ mol.data[\'TransFSScore\'] = score\n+ sdf_file.write(mol)\n+ else:\n+ log("No score found for record", counter)\n+ counter += 1\n+ sdf_file.close()\n+\n+\n+def execute(ligands_sdf, protein, outfile, distance, mock=False):\n+\n+ write_inputs(protein, ligands_sdf, distance)\n+ if mock:\n+ mock_predictions()\n+ else:\n+ run_predictions()\n+ scores = read_predictions()\n+ patch_scores_sdf(outfile, scores)\n+\n+\n+def main():\n+ global work_dir\n+\n+ parser = argparse.ArgumentParser(description=\'XChem deep - pose scoring\')\n+\n+ parser.add_argument(\'-i\', \'--input\', help="SDF containing the poses to score)")\n+ parser.add_argument(\'-r\', \'--receptor\', help="Receptor file for scoring (PDB format)")\n+ parser.add_argument(\'-d\', \'--distance\', type=float, default=2.0, help="Cuttoff for removing waters")\n+ parser.add_argument(\'-o\', \'--outfile\', default=\'output.sdf\', help="File name for results")\n+ parser.add_argument(\'-w\', \'--work-dir\', default=".", help="Working directory")\n+ parser.add_argument(\'--mock\', action=\'store_true\', help=\'Generate mock scores rather than run on GPU\')\n+\n+ args = parser.parse_args()\n+ log("XChem deep args: ", args)\n+\n+ work_dir = args.work_dir\n+\n+ execute(args.input, args.receptor, args.outfile, args.distance, mock=args.mock)\n+\n+\n+if __name__ == "__main__":\n+ main()\n'

diff -r 000000000000 -r de29b4f35536 transfs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/transfs.xml Fri Mar 27 09:18:53 2020 -0400

[

@@ -0,0 +1,108 @@
+<tool id="xchem_transfs_scoring" name="XChem TransFS pose scoring" version="0.2.0">
+    <description>using deep learning</description>
+
+    <requirements>
+        
+        
+        
+        <container type="docker">informaticsmatters/deep-app-ubuntu-1604:0.9</container>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+    cd /train/fragalysis_test_files/ &&
+    mkdir workdir &&
+    cd workdir &&
+
+    cp '$ligands' ligands.sdf &&
+    cp '$receptor' receptor.pdb &&
+
+    ##mkdir -p /root/train/ &&
+    ##ln -s /train/fragalysis_test_files/ /root/train/ &&
+
+    ##adduser centos --uid 1000 --quiet --no-create-home --system &&
+    ##apt install sudo -y &&
+
+    ## mkdir -p ligands &&
+    cd ../ &&
+    python '$__tool_directory__/transfs.py' -i ./workdir/ligands.sdf -r ./workdir/receptor.pdb -d $distance -w /train/fragalysis_test_files/workdir &&
+    ls -l &&
+    ls -l workdir &&
+    sudo -u ubuntu cp ./workdir/output.sdf '$output' &&
+    head -n 10000 ./workdir/output.sdf &&
+
+    mkdir -p ./pdb &&
+    cp -r ./workdir/receptor*.pdb ./pdb &&
+    tar -cvhf archiv.tar ./pdb &&
+    sudo -u ubuntu cp archiv.tar '$output_receptors' &&
+
+    sudo -u ubuntu cp ./workdir/predictions.txt '$predictions'
+
+
+    ]]></command>
+
+    <inputs>
+        <param type="data" name="receptor" format="pdb" label="Receptor" help="Select a receptor (pdb format)."/>
+        <param type="data" name="ligands" format="sdf,mol" label="Ligands" help="Ligands (docked poses) in SDF format)"/>
+        <param name="distance" type="float" value="2.0" min="1.0" max="5.0" label="Distance to waters" help="Remove waters closer than this distance to any ligand heavy atom"/>
+        <param type="hidden" name="mock" value="" label="Mock calculations" help="Use random numbers instead of running on GPU"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="sdf" label="XChem pose scoring on ${on_string}"/>
+        <data name="predictions" format="txt" label="Predictions on ${on_string}"/>
+        <data name="output_receptors" format="tar" label="Receptors ${on_string}"/>
+
+        
+    </outputs>
+
+    <tests>
+     <test>
+            <param name="receptor" value="receptor.pdb"/>
+            <param name="ligands" value="ligands.sdf"/>
+            <param name="mock" value="--mock" />
+            <param name="distance" value="4.0"/>
+            <output name="output" ftype="sdf">
+                <assert_contents>
+                    <has_text text="TransFSReceptor"/>
+                    <has_text text="TransFSScore"/>
+                </assert_contents>
+            </output>
+            
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+This tool performs scoring of docked ligand poses using deep learning.
+It uses the gnina and libmolgrid toolkits to perform the scoring to generate
+a prediction for how good the pose is.
+
+
+-----
+
+.. class:: infomark
+
+**Inputs**
+
+1. The protein receptor to dock into as a file in PDB format. This should have the ligand removed but retain the waters.
+2. A set of ligand poses to score in SDF format.
+
+-----
+
+.. class:: infomark
+
+**Outputs**
+
+An SDF file is produced as output. The binding affinity scores are contained within the SDF file
+as the TransFSScore property and the PDB file (with the waters that clash with the ligand removed)
+that was used for the scoring as the TransFSReceptor property.
+Values for the score range from 0 (poor binding) to 1 (good binding).
+
+A set of PDB files is also output, each one with different crystallographic waters removed. Each ligand is
+examined against input PDB structure and the with waters that clash (any heavy atom of the ligand closer than
+the 'distance' parameter being removed. The filenames are encoded with the water numbers that are removed.
+
+    ]]></help>
+</tool>