Repository 'xchem_transfs_scoring'
hg clone https://toolshed.g2.bx.psu.edu/repos/bgruening/xchem_transfs_scoring

Changeset 0:de29b4f35536 (2020-03-27)
Next changeset 1:8d9c8ba2ec86 (2020-04-08)
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/transfs commit d9a9e2f0e12fe9d2c37f632d99f2164df577b4af"
added:
README.txt
server/transfs.py
server/transfs.xml
test-data/ligands.sdf
test-data/receptor.pdb
transfs.py
transfs.xml
b
diff -r 000000000000 -r de29b4f35536 README.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/README.txt Fri Mar 27 09:18:53 2020 -0400
b
@@ -0,0 +1,27 @@
+THIS TOOL WILL NOT RUN AT PRESENT.
+
+The tool is 'work in progress' and needs at least the following sorting out:
+
+1. Execution environment
+
+Current the xchem_deep_score.py code can be run in the informaticsmatters/deep-app-ubuntu-1604:latest
+container (see instructions at the top of the python file for doing so). The Galaxy execution environment needs
+to define to run as this docker container.
+Alternatively a conda environment could potentially be created but the dependencies are very complex and
+some components need to be built from source.
+Details for the dependencies are mostly described in the GitHub repo for the docker image:
+https://github.com/InformaticsMatters/dls-deep/tree/ubuntu
+
+2. GPU availability
+
+The code must run in an environment with a GPU and with the CUDA drivers.
+The docker image mentioned above has everything that is needed and will run on a GPU enabled environment
+(a special version of Docker on the host machine is needed that supports GPUs).
+
+Only the predictions need a GPU. The prior and latter steps run on CPU. Without a GPU you can specify the --mock
+option which uses random numbers for the predicted scores.
+
+3. Associated Python scripts.
+
+The docker image contains additional python scripts (primarily /train/fragalysis_test_files/predict.py)
+that are needed. If not running in a container these will need to be made available.
\ No newline at end of file
b
diff -r 000000000000 -r de29b4f35536 server/transfs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/transfs.py Fri Mar 27 09:18:53 2020 -0400
[
b'@@ -0,0 +1,322 @@\n+# Create dir containing ligands.sdf and protein.pdb\n+# Enter docker container like this:\n+#   docker run -it --rm --gpus all -v $PWD:/root/train/fragalysis_test_files/work:Z informaticsmatters/deep-app-ubuntu-1604:latest bash\n+#\n+# Now inside the container run like this:\n+#   mkdir /tmp/work\n+#   rm -rf /tmp/work/* && python3 work/transfs.py -i work/test-data/ligands.sdf -r work/test-data/receptor.pdb -d 2 -w /tmp/work\n+#\n+# If testing with no GPU you can use the --mock option to generate random scores\n+#\n+# Start container for testing like this:\n+#    docker run -it --rm -v $PWD:$PWD:Z -w $PWD informaticsmatters/deep-app-ubuntu-1604:latest bash\n+# Inside container test like this:\n+#   mkdir /tmp/work\n+#   cd chemicaltoolbox/xchem-deep\n+#   rm -rf /tmp/work/* && python3 transfs.py -i test-data/ligands.sdf -r test-data/receptor.pdb -d 2 -w /tmp/work --mock\n+#\n+\n+import argparse, os, sys, math\n+import subprocess\n+import random\n+from openbabel import pybel\n+\n+types_file_name = \'inputs.types\'\n+types_file_name = \'inputs.types\'\n+predict_file_name = \'predictions.txt\'\n+work_dir = \'.\'\n+paths = None\n+inputs_protein = []\n+inputs_ligands = []\n+\n+\n+def log(*args, **kwargs):\n+    """Log output to STDERR\n+    """\n+    print(*args, file=sys.stderr, ** kwargs)\n+\n+def write_raw_inputs(receptor_pdb, ligands_sdf, distance):\n+    """\n+    Analyses the PDB file for waters that clash with each ligand in the SDF and writes out:\n+    1. a PDB file named like receptor-123-543.pdb where the numeric parts are the waters that have been omitted\n+    2. a corresponding directory named like receptor-123-543\n+    3. an SDF named like receptor-123-543/ligands.sdf containing those ligands that correspond to that receptor.\n+    :param receptor_pdb: A PDB file without the ligand but with the crystallographic waters\n+    :param ligands_sdf: A SDF with the docked poses\n+    :param distance: The distance to consider when removing waters. Only heavy atoms in the ligand are considered.\n+    :return:\n+    """\n+\n+    global work_dir\n+    global inputs_protein\n+    global inputs_ligands\n+    global paths\n+\n+\n+    log("Writing data to", work_dir)\n+    if not os.path.isdir(work_dir):\n+        os.mkdir(work_dir)\n+\n+    receptor_file = os.path.basename(receptor_pdb)\n+\n+    sdf_writers = {}\n+    paths = []\n+\n+    # read the receptor once as we\'ll need to process it many times\n+    with open(receptor_pdb, \'r\') as f:\n+        lines = f.readlines()\n+\n+    count = 0\n+    for mol in pybel.readfile("sdf", ligands_sdf):\n+        count += 1\n+        if count % 50000 == 0:\n+            log(\'Processed\', count)\n+\n+        try:\n+            # print("Processing mol", mol.title)\n+\n+            clone = pybel.Molecule(mol)\n+            clone.removeh()\n+\n+            coords = []\n+            for atom in clone.atoms:\n+                coords.append(atom.coords)\n+\n+            watnumcode = \'\'\n+\n+            # getting receptor without waters that will clash with ligand\n+            new_receptor_pdb = []\n+            for line in lines:\n+                if line[17:20] == \'HOH\':\n+                    x, y, z = float(line[30:39]),  float(line[39:46]), float(line[46:55])\n+                    distances = []\n+                    for i in coords:\n+                        distances.append(math.sqrt((x-i[0])**2 + (y-i[1])**2 + (z-i[2])**2))  # calculates distance based on cartesian coordinates\n+                    if min(distances) > distance: # if all distances are larger than 2.0A, then molecule makes it to new file\n+                        new_receptor_pdb.append(line)\n+                    else:\n+                        watnum = line[23:28].strip()\n+                        # print("Skipped water " + watnum)\n+                        watnumcode += \'-\' + watnum\n+                if line[17:20] != \'LIG\' and line[17:20] != \'HOH\':  # ligand lines are also removed\n+                    new_receptor_pdb.append(line)\n+\n+\n+            name = receptor_file[0:-4] + watnumcode\n+            # print(\''..b'ragalysis_test_files/scripts/predict.py\',\n+            \'-m\', \'/train/fragalysis_test_files/resources/dense.prototxt\',\n+            \'-w\', \'/train/fragalysis_test_files/resources/weights.caffemodel\',\n+            \'-i\', os.path.sep.join([work_dir, types_file_name]),\n+            \'-o\', os.path.sep.join([work_dir, predict_file_name])]\n+    log("CMD:", cmd1)\n+    subprocess.call(cmd1)\n+\n+\n+def mock_predictions():\n+    global work_dir\n+    global predict_file_name\n+\n+    log("WARNING: generating mock results instead of running on GPU")\n+    outfile = generate_predictions_filename(work_dir, predict_file_name)\n+    count = 0\n+    with open(outfile, \'w\') as predictions:\n+        for path in paths:\n+            log("Reading", path)\n+            protein_gninatypes = os.listdir(os.path.sep.join([path, \'proteins\']))\n+            ligand_gninatypes = os.listdir(os.path.sep.join([path, \'ligands\']))\n+            for protein in protein_gninatypes:\n+                for ligand in ligand_gninatypes:\n+                    count += 1\n+                    score = random.random()\n+                    line = "{0} | 0 {1}{4}proteins{4}{2} {1}{4}ligands{4}{3}\\n".format(score, path, protein, ligand,\n+                                                                                       os.path.sep)\n+                    # log("Writing", line)\n+                    predictions.write(line)\n+\n+    log(\'Wrote\', count, \'mock predictions\')\n+\n+\n+def read_predictions():\n+    global predict_file_name\n+    global work_dir\n+    scores = {}\n+    with open("{0}{1}{2}".format(work_dir, os.path.sep, predict_file_name), \'r\') as input:\n+        for line in input:\n+            # log(line)\n+            tokens = line.split()\n+            if len(tokens) == 5 and tokens[1] == \'|\':\n+                # log(len(tokens), tokens[0], tokens[3], tokens[4])\n+                record_no = inputs_ligands.index(tokens[4])\n+                if record_no is not None:\n+                    # log(record_no, tokens[0])\n+                    scores[record_no] = tokens[0]\n+    log("Found", len(scores), "scores")\n+    return scores\n+\n+\n+def patch_scores_sdf(outfile, scores):\n+\n+    counter = 0\n+    sdf_path = "{0}{1}{2}".format(work_dir, os.path.sep, outfile)\n+    log("Writing results to {0}".format(sdf_path))\n+    sdf_file = pybel.Outputfile("sdf", sdf_path)\n+\n+    for path in paths:\n+        for mol in pybel.readfile("sdf", os.path.sep.join([path, \'ligands.sdf\'])):\n+            if counter in scores:\n+                score = scores[counter]\n+                # og("Score for record {0} is {1}".format(counter, score))\n+                mol.data[\'TransFSScore\'] = score\n+                sdf_file.write(mol)\n+            else:\n+                log("No score found for record", counter)\n+            counter += 1\n+    sdf_file.close()\n+\n+\n+def execute(ligands_sdf, protein, outfile, distance, mock=False):\n+\n+    write_inputs(protein, ligands_sdf, distance)\n+    if mock:\n+        mock_predictions()\n+    else:\n+        run_predictions()\n+    scores = read_predictions()\n+    patch_scores_sdf(outfile, scores)\n+\n+\n+def main():\n+    global work_dir\n+\n+    parser = argparse.ArgumentParser(description=\'XChem deep - pose scoring\')\n+\n+    parser.add_argument(\'-i\', \'--input\', help="SDF containing the poses to score)")\n+    parser.add_argument(\'-r\', \'--receptor\', help="Receptor file for scoring (PDB format)")\n+    parser.add_argument(\'-d\', \'--distance\', type=float, default=2.0, help="Cuttoff for removing waters")\n+    parser.add_argument(\'-o\', \'--outfile\', default=\'output.sdf\', help="File name for results")\n+    parser.add_argument(\'-w\', \'--work-dir\', default=".", help="Working directory")\n+    parser.add_argument(\'--mock\', action=\'store_true\', help=\'Generate mock scores rather than run on GPU\')\n+\n+    args = parser.parse_args()\n+    log("XChem deep args: ", args)\n+\n+    work_dir = args.work_dir\n+\n+    execute(args.input, args.receptor, args.outfile, args.distance, mock=args.mock)\n+\n+\n+if __name__ == "__main__":\n+    main()\n+\n'
b
diff -r 000000000000 -r de29b4f35536 server/transfs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/server/transfs.xml Fri Mar 27 09:18:53 2020 -0400
[
@@ -0,0 +1,108 @@
+<tool id="xchem_transfs_scoring" name="XChem TransFS pose scoring" version="0.2.0">
+    <description>using deep learning</description>
+
+    <requirements>
+        <!--requirement type="package" version="3.0.0">openbabel</requirement-->
+        <!--requirement type="package" version="3.7">python</requirement-->
+        <!-- many other requirements are needed -->
+        <container type="docker">informaticsmatters/deep-app-ubuntu-1604:latest</container>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+    cd /train/fragalysis_test_files/ &&
+    mkdir workdir &&
+    cd workdir &&
+
+    cp '$ligands' ligands.sdf &&
+    cp '$receptor' receptor.pdb &&
+
+    ##mkdir -p /root/train/ &&
+    ##ln -s /train/fragalysis_test_files/ /root/train/ &&
+
+    ##adduser centos --uid 1000 --quiet --no-create-home --system &&
+    ##apt install sudo -y &&
+
+    ## mkdir -p ligands &&
+    cd ../ &&
+    python '$__tool_directory__/transfs.py' -i ./workdir/ligands.sdf -r ./workdir/receptor.pdb -d $distance -w /train/fragalysis_test_files/workdir &&
+    ls -l &&
+    ls -l workdir &&
+    sudo -u ubuntu cp ./workdir/output.sdf '$output' &&
+    head -n 10000 ./workdir/output.sdf &&
+
+    mkdir -p ./pdb &&
+    cp -r ./workdir/receptor*.pdb ./pdb &&
+    tar -cvhf archiv.tar ./pdb &&
+    sudo -u ubuntu cp archiv.tar '$output_receptors' &&
+
+    sudo -u ubuntu cp ./workdir/predictions.txt '$predictions'
+
+
+    ]]></command>
+
+    <inputs>
+        <param type="data" name="receptor" format="pdb" label="Receptor" help="Select a receptor (pdb format)."/>
+        <param type="data" name="ligands" format="sdf,mol" label="Ligands" help="Ligands (docked poses) in SDF format)"/>
+        <param name="distance" type="float" value="2.0" min="1.0" max="5.0" label="Distance to waters" help="Remove waters closer than this distance to any ligand heavy atom"/>
+        <param type="hidden" name="mock" value="" label="Mock calculations" help="Use random numbers instead of running on GPU"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="sdf" label="XChem pose scoring on ${on_string}"/>
+        <data name="predictions" format="txt" label="Predictions on ${on_string}"/>
+        <data name="output_receptors" format="tar" label="Receptors ${on_string}"/>
+
+        <!--collection name="pdb_files" type="list" label="PDB files with variable number of waters">
+            <discover_datasets pattern="__name_and_ext__" directory="pdb" />
+        </collection-->
+    </outputs>
+
+    <tests>
+ <test>
+            <param name="receptor" value="receptor.pdb"/>
+            <param name="ligands" value="ligands.sdf"/>
+            <!--param name="mock" value="- -mock"/-->
+            <param name="distance" value="4.0"/>
+            <output name="output" ftype="sdf">
+                <assert_contents>
+                    <has_text text="TransFSReceptor"/>
+                    <has_text text="TransFSScore"/>
+                </assert_contents>
+            </output>
+            <!--output_collection name="pdb_files" type="list" count="2" /-->
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+This tool performs scoring of docked ligand poses using deep learning.
+It uses the gnina and libmolgrid toolkits to perform the scoring to generate
+a prediction for how good the pose is.
+
+
+-----
+
+.. class:: infomark
+
+**Inputs**
+
+1. The protein receptor to dock into as a file in PDB format. This should have the ligand removed but retain the waters.
+2. A set of ligand poses to score in SDF format.
+
+-----
+
+.. class:: infomark
+
+**Outputs**
+
+An SDF file is produced as output. The binding affinity scores are contained within the SDF file
+as the TransFSScore property and the PDB file (with the waters that clash with the ligand removed)
+that was used for the scoring as the TransFSReceptor property.
+Values for the score range from 0 (poor binding) to 1 (good binding).
+
+A set of PDB files is also output, each one with different crystallographic waters removed. Each ligand is
+examined against input PDB structure and the with waters that clash (any heavy atom of the ligand closer than
+the 'distance' parameter being removed. The filenames are encoded with the water numbers that are removed.
+
+    ]]></help>
+</tool>
b
diff -r 000000000000 -r de29b4f35536 test-data/ligands.sdf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ligands.sdf Fri Mar 27 09:18:53 2020 -0400
b
b'@@ -0,0 +1,1510 @@\n+MolPort-002-851-943\n+  rDOCK(R)          3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 19 20  0  0  0  0  0  0  0  0999 V2000\n+   28.6216  -44.1866   73.3638 C   0  0  0  0  0  0\n+   30.1353  -44.0328   73.4362 C   0  0  0  0  0  0\n+   30.7597  -45.2722   73.8147 O   0  0  0  0  0  0\n+   32.1148  -45.2860   73.9853 C   0  0  0  0  0  0\n+   32.8758  -44.2937   74.5886 C   0  0  0  0  0  0\n+   34.2640  -44.4447   74.6917 C   0  0  0  0  0  0\n+   34.9396  -43.4382   75.2691 F   0  0  0  0  0  0\n+   34.9421  -45.5768   74.2048 C   0  0  0  0  0  0\n+   34.1425  -46.5638   73.5990 C   0  0  0  0  0  0\n+   32.7560  -46.4133   73.4768 C   0  0  0  0  0  0\n+   36.4169  -45.7333   74.2945 C   0  0  0  0  0  0\n+   37.3030  -44.7299   73.8615 C   0  0  0  0  0  0\n+   38.6916  -44.8971   73.9261 C   0  0  0  0  0  0\n+   39.2314  -46.0682   74.4460 C   0  0  0  0  0  0\n+   38.3836  -47.0720   74.8981 C   0  0  0  0  0  0\n+   36.9964  -46.9076   74.8159 C   0  0  0  0  0  0\n+   30.6590  -43.6789   72.0407 C   0  0  0  0  0  0\n+   31.1658  -44.4817   71.2685 O   0  0  0  0  0  0\n+   30.5050  -42.3687   71.7235 O   0  0  0  0  0  0\n+  1  2  1  0  0  0\n+  2  3  1  0  0  0\n+  2 17  1  0  0  0\n+  3  4  1  0  0  0\n+  4  5  2  0  0  0\n+  4 10  1  0  0  0\n+  5  6  1  0  0  0\n+  6  7  1  0  0  0\n+  6  8  2  0  0  0\n+  8  9  1  0  0  0\n+  8 11  1  0  0  0\n+  9 10  2  0  0  0\n+ 11 12  2  0  0  0\n+ 11 16  1  0  0  0\n+ 12 13  1  0  0  0\n+ 13 14  2  0  0  0\n+ 14 15  1  0  0  0\n+ 15 16  2  0  0  0\n+ 17 18  2  0  0  0\n+ 17 19  1  0  0  0\n+M  END\n+>  <CHROM.1>\n+-177.01127901,-99.92003744,40.28785274,-50.56487888,33.82101420,-44.94803558\n+73.75196667,2.86183962,-0.16145766,-0.25378100\n+\n+>  <Name>\n+MolPort-002-851-943\n+\n+>  <RI>\n+0\n+\n+>  <Rbt.Current_Directory>\n+/home/timbo/github/im/docking-validation/targets/nudt7/expts/vscreening/NUDT7A-x0129/work/94/3883032ea0e243c22b1a7b7b149ac4\n+\n+>  <Rbt.Executable>\n+rbdock ($Id: //depot/dev/client3/rdock/2013.1/src/exe/rbdock.cxx#4 $)\n+\n+>  <Rbt.Library>\n+libRbt.so (2013.1, Build901 2013/11/27)\n+\n+>  <Rbt.Parameter_File>\n+/rDock_2013.1/data/scripts/dock.prm\n+\n+>  <Rbt.Receptor>\n+docking.prm\n+\n+>  <SCORE>\n+-6.92904\n+\n+>  <SCORE.INTER>\n+-6.0525\n+\n+>  <SCORE.INTER.CONST>\n+1\n+\n+>  <SCORE.INTER.POLAR>\n+0\n+\n+>  <SCORE.INTER.REPUL>\n+0\n+\n+>  <SCORE.INTER.ROT>\n+4\n+\n+>  <SCORE.INTER.VDW>\n+-15.4525\n+\n+>  <SCORE.INTER.norm>\n+-0.318553\n+\n+>  <SCORE.INTRA>\n+-0.876534\n+\n+>  <SCORE.INTRA.DIHEDRAL>\n+2.31595\n+\n+>  <SCORE.INTRA.DIHEDRAL.0>\n+4.86231\n+\n+>  <SCORE.INTRA.POLAR>\n+0\n+\n+>  <SCORE.INTRA.POLAR.0>\n+0\n+\n+>  <SCORE.INTRA.REPUL>\n+0\n+\n+>  <SCORE.INTRA.REPUL.0>\n+0\n+\n+>  <SCORE.INTRA.VDW>\n+-2.03451\n+\n+>  <SCORE.INTRA.VDW.0>\n+0.564556\n+\n+>  <SCORE.INTRA.norm>\n+-0.0461334\n+\n+>  <SCORE.RESTR>\n+0\n+\n+>  <SCORE.RESTR.CAVITY>\n+0\n+\n+>  <SCORE.RESTR.norm>\n+0\n+\n+>  <SCORE.SYSTEM>\n+0\n+\n+>  <SCORE.SYSTEM.CONST>\n+0\n+\n+>  <SCORE.SYSTEM.DIHEDRAL>\n+0\n+\n+>  <SCORE.SYSTEM.norm>\n+0\n+\n+>  <SCORE.heavy>\n+19\n+\n+>  <SCORE.norm>\n+-0.364686\n+\n+$$$$\n+MolPort-002-851-943\n+  rDOCK(R)          3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 19 20  0  0  0  0  0  0  0  0999 V2000\n+   28.7887  -43.9849   73.4350 C   0  0  0  0  0  0\n+   30.3065  -43.8574   73.4561 C   0  0  0  0  0  0\n+   30.9156  -45.0596   73.9590 O   0  0  0  0  0  0\n+   32.2669  -45.0638   74.1571 C   0  0  0  0  0  0\n+   32.9869  -44.1547   74.9208 C   0  0  0  0  0  0\n+   34.3766  -44.2833   75.0323 C   0  0  0  0  0  0\n+   35.0112  -43.3585   75.7706 F   0  0  0  0  0  0\n+   35.0968  -45.3106   74.3963 C   0  0  0  0  0  0\n+   34.3382  -46.2150   73.6302 C   0  0  0  0  0  0\n+   32.9505  -46.0840   73.4996 C   0  0  0  0  0  0\n+   36.5736  -45.4404   74.4959 C   0  0  0  0  0  0\n+   37.4235  -44.3192   74.5083 C   0  0  0  0  0  0\n+   38.8156  -44.4525   74.5763 C   0  0  0  0  0  0\n+   39.3943  -45.7144   74.6543 C   0  0  0  0  0  0\n+   38.5819  -46.8416   74.6613 C   0  0  0  0  0  0\n+   37.1920  -46.7044   74.5761 C   0  0  0  0  0  0\n+   30.8027  -43.6753   72.0182 C   0  0  0  0  0  '..b'e/rbdock.cxx#4 $)\n+\n+>  <Rbt.Library>\n+libRbt.so (2013.1, Build901 2013/11/27)\n+\n+>  <Rbt.Parameter_File>\n+/rDock_2013.1/data/scripts/dock.prm\n+\n+>  <Rbt.Receptor>\n+docking.prm\n+\n+>  <SCORE>\n+-6.29997\n+\n+>  <SCORE.INTER>\n+-5.91247\n+\n+>  <SCORE.INTER.CONST>\n+1\n+\n+>  <SCORE.INTER.POLAR>\n+0\n+\n+>  <SCORE.INTER.REPUL>\n+0\n+\n+>  <SCORE.INTER.ROT>\n+4\n+\n+>  <SCORE.INTER.VDW>\n+-15.3125\n+\n+>  <SCORE.INTER.norm>\n+-0.311182\n+\n+>  <SCORE.INTRA>\n+-0.387505\n+\n+>  <SCORE.INTRA.DIHEDRAL>\n+1.04805\n+\n+>  <SCORE.INTRA.DIHEDRAL.0>\n+4.86231\n+\n+>  <SCORE.INTRA.POLAR>\n+0\n+\n+>  <SCORE.INTRA.POLAR.0>\n+0\n+\n+>  <SCORE.INTRA.REPUL>\n+0\n+\n+>  <SCORE.INTRA.REPUL.0>\n+0\n+\n+>  <SCORE.INTRA.VDW>\n+-0.911532\n+\n+>  <SCORE.INTRA.VDW.0>\n+0.564556\n+\n+>  <SCORE.INTRA.norm>\n+-0.020395\n+\n+>  <SCORE.RESTR>\n+0\n+\n+>  <SCORE.RESTR.CAVITY>\n+0\n+\n+>  <SCORE.RESTR.norm>\n+0\n+\n+>  <SCORE.SYSTEM>\n+0\n+\n+>  <SCORE.SYSTEM.CONST>\n+0\n+\n+>  <SCORE.SYSTEM.DIHEDRAL>\n+0\n+\n+>  <SCORE.SYSTEM.norm>\n+0\n+\n+>  <SCORE.heavy>\n+19\n+\n+>  <SCORE.norm>\n+-0.331577\n+\n+$$$$\n+MolPort-002-851-943\n+  rDOCK(R)          3D\n+libRbt.so/2013.1/901 2013/11/27\n+ 19 20  0  0  0  0  0  0  0  0999 V2000\n+   28.1301  -46.9340   70.0718 C   0  0  0  0  0  0\n+   28.9931  -46.5530   71.2679 C   0  0  0  0  0  0\n+   30.2845  -46.0850   70.8408 O   0  0  0  0  0  0\n+   31.2330  -45.8494   71.7948 C   0  0  0  0  0  0\n+   31.2492  -46.3704   73.0818 C   0  0  0  0  0  0\n+   32.2909  -46.0377   73.9562 C   0  0  0  0  0  0\n+   32.2332  -46.5616   75.1912 F   0  0  0  0  0  0\n+   33.3464  -45.1837   73.5884 C   0  0  0  0  0  0\n+   33.3007  -44.6726   72.2780 C   0  0  0  0  0  0\n+   32.2541  -44.9867   71.4028 C   0  0  0  0  0  0\n+   34.4439  -44.8123   74.5185 C   0  0  0  0  0  0\n+   34.2182  -44.5848   75.8885 C   0  0  0  0  0  0\n+   35.2544  -44.2099   76.7522 C   0  0  0  0  0  0\n+   36.5513  -44.0721   76.2701 C   0  0  0  0  0  0\n+   36.8109  -44.3061   74.9252 C   0  0  0  0  0  0\n+   35.7693  -44.6642   74.0622 C   0  0  0  0  0  0\n+   28.3209  -45.3915   72.0067 C   0  0  0  0  0  0\n+   27.7039  -45.5051   73.0574 O   0  0  0  0  0  0\n+   28.4756  -44.1955   71.3848 O   0  0  0  0  0  0\n+  1  2  1  0  0  0\n+  2  3  1  0  0  0\n+  2 17  1  0  0  0\n+  3  4  1  0  0  0\n+  4  5  2  0  0  0\n+  4 10  1  0  0  0\n+  5  6  1  0  0  0\n+  6  7  1  0  0  0\n+  6  8  2  0  0  0\n+  8  9  1  0  0  0\n+  8 11  1  0  0  0\n+  9 10  2  0  0  0\n+ 11 12  2  0  0  0\n+ 11 16  1  0  0  0\n+ 12 13  1  0  0  0\n+ 13 14  2  0  0  0\n+ 14 15  1  0  0  0\n+ 15 16  2  0  0  0\n+ 17 18  2  0  0  0\n+ 17 19  1  0  0  0\n+M  END\n+>  <CHROM.1>\n+-172.56786008,103.66611308,20.57483725,-37.44652631,32.01085503,-45.35131371\n+73.24978757,-2.61232414,0.19661218,1.57826506\n+\n+>  <Name>\n+MolPort-002-851-943\n+\n+>  <RI>\n+0\n+\n+>  <Rbt.Current_Directory>\n+/home/timbo/github/im/docking-validation/targets/nudt7/expts/vscreening/NUDT7A-x0129/work/94/3883032ea0e243c22b1a7b7b149ac4\n+\n+>  <Rbt.Executable>\n+rbdock ($Id: //depot/dev/client3/rdock/2013.1/src/exe/rbdock.cxx#4 $)\n+\n+>  <Rbt.Library>\n+libRbt.so (2013.1, Build901 2013/11/27)\n+\n+>  <Rbt.Parameter_File>\n+/rDock_2013.1/data/scripts/dock.prm\n+\n+>  <Rbt.Receptor>\n+docking.prm\n+\n+>  <SCORE>\n+-6.14591\n+\n+>  <SCORE.INTER>\n+-4.70666\n+\n+>  <SCORE.INTER.CONST>\n+1\n+\n+>  <SCORE.INTER.POLAR>\n+0\n+\n+>  <SCORE.INTER.REPUL>\n+0\n+\n+>  <SCORE.INTER.ROT>\n+4\n+\n+>  <SCORE.INTER.VDW>\n+-14.1067\n+\n+>  <SCORE.INTER.norm>\n+-0.247719\n+\n+>  <SCORE.INTRA>\n+-1.43925\n+\n+>  <SCORE.INTRA.DIHEDRAL>\n+0.01514\n+\n+>  <SCORE.INTRA.DIHEDRAL.0>\n+4.86231\n+\n+>  <SCORE.INTRA.POLAR>\n+0\n+\n+>  <SCORE.INTRA.POLAR.0>\n+0\n+\n+>  <SCORE.INTRA.REPUL>\n+0\n+\n+>  <SCORE.INTRA.REPUL.0>\n+0\n+\n+>  <SCORE.INTRA.VDW>\n+-1.44682\n+\n+>  <SCORE.INTRA.VDW.0>\n+0.564556\n+\n+>  <SCORE.INTRA.norm>\n+-0.0757498\n+\n+>  <SCORE.RESTR>\n+0\n+\n+>  <SCORE.RESTR.CAVITY>\n+0\n+\n+>  <SCORE.RESTR.norm>\n+0\n+\n+>  <SCORE.SYSTEM>\n+0\n+\n+>  <SCORE.SYSTEM.CONST>\n+0\n+\n+>  <SCORE.SYSTEM.DIHEDRAL>\n+0\n+\n+>  <SCORE.SYSTEM.norm>\n+0\n+\n+>  <SCORE.heavy>\n+19\n+\n+>  <SCORE.norm>\n+-0.323469\n+\n+$$$$\n\\ No newline at end of file\n'
b
diff -r 000000000000 -r de29b4f35536 test-data/receptor.pdb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/receptor.pdb Fri Mar 27 09:18:53 2020 -0400
b
b'@@ -0,0 +1,1640 @@\n+HEADER    ----                                    26-MAY-17   1umyp             \n+TITLE     NUDT7A-x0129                                                          \n+EXPDTA    X-RAY DIFFRACTION                                                     \n+REMARK   2                                                                      \n+REMARK   2 RESOLUTION. 2.45 ANGSTROMS                                           \n+REMARK   3                                                                      \n+REMARK   3   R VALUE                    0.174                                   \n+REMARK  50 DEPOSITOR                                                            \n+REMARK  50   admin                                                              \n+REMARK  50 PROJECT                                                              \n+REMARK  50   NUDT7                                                              \n+REMARK  50 STRUCFILE                                                            \n+REMARK  50   /dls/science/groups/proasis/Data/PPWEB/oxc1umyp.pdb.gz             \n+REMARK  50 STRUCSOURCE                                                          \n+REMARK  50   oxc                                                                \n+HET    ACT  D   1       4                                                       \n+HET    ACT  D   2       4                                                       \n+HET    DMS  C   1       4                                                       \n+HET    DMS  C   2       4                                                       \n+CRYST1  125.992  125.992   41.625  90.00  90.00 120.00 P 3 2 1       0          \n+ATOM      1  N   SER A  15      59.869 -59.274  94.881  1.00 76.84           N  \n+ATOM      2  CA  SER A  15      59.985 -57.783  94.828  1.00 71.13           C  \n+ATOM      3  C   SER A  15      58.610 -57.164  94.540  1.00 67.52           C  \n+ATOM      4  O   SER A  15      57.643 -57.893  94.314  1.00 65.98           O  \n+ATOM      5  CB  SER A  15      61.002 -57.390  93.761  1.00 66.24           C  \n+ATOM      6  OG  SER A  15      60.570 -57.811  92.472  1.00 67.23           O  \n+ATOM      7  N   MET A  16      58.539 -55.834  94.563  1.00 58.08           N  \n+ATOM      8  CA  MET A  16      57.293 -55.088  94.326  1.00 59.05           C  \n+ATOM      9  C   MET A  16      56.683 -55.346  92.933  1.00 60.19           C  \n+ATOM     10  O   MET A  16      55.452 -55.432  92.802  1.00 57.04           O  \n+ATOM     11  CB  MET A  16      57.515 -53.574  94.563  1.00 55.78           C  \n+ATOM     12  CG  MET A  16      56.673 -52.616  93.735  1.00 58.54           C  \n+ATOM     13  SD  MET A  16      56.941 -50.885  94.202  1.00 67.90           S  \n+ATOM     14  CE  MET A  16      58.554 -50.592  93.508  1.00 63.96           C  \n+ATOM     15  N   LEU A  17      57.526 -55.468  91.912  1.00 53.59           N  \n+ATOM     16  CA  LEU A  17      57.027 -55.566  90.550  1.00 55.31           C  \n+ATOM     17  C   LEU A  17      56.583 -56.984  90.257  1.00 55.34           C  \n+ATOM     18  O   LEU A  17      55.576 -57.195  89.583  1.00 50.16           O  \n+ATOM     19  CB  LEU A  17      58.079 -55.116  89.534  1.00 53.94           C  \n+ATOM     20  CG  LEU A  17      58.257 -53.602  89.470  1.00 56.21           C  \n+ATOM     21  CD1 LEU A  17      59.299 -53.272  88.406  1.00 54.07           C  \n+ATOM     22  CD2 LEU A  17      56.947 -52.872  89.190  1.00 55.51           C  \n+ATOM     23  N   ASP A  18      57.343 -57.937  90.783  1.00 53.67           N  \n+ATOM     24  CA  ASP A  18      57.063 -59.355  90.610  1.00 55.96           C  \n+ATOM     25  C   ASP A  18      55.817 -59.777  91.346  1.00 54.31           C  \n+ATOM     26  O   ASP A  18      55.071 -60.644  90.851  1.00 51.31           O  \n+ATOM     27  CB  ASP A  18      58.244 -60.213  91.111  1.00 63.00           C  \n+ATOM     28  CG  ASP A  18      59.374 -60.344  90.084  1.00 68.30           C  \n+ATOM     29  OD1 ASP A  18      59.406 -59.'..b' 100.506  1.00 57.57           O  \n+HETATM 1573  O   HOH B 153      52.191 -61.314  94.961  1.00 58.41           O  \n+HETATM 1574  O   HOH B 154      43.934 -26.726  90.863  1.00 59.30           O  \n+HETATM 1575  O   HOH B 155      53.659 -36.279  85.569  1.00 52.56           O  \n+HETATM 1576  O   HOH B 158      42.200 -33.494  96.714  1.00 56.16           O  \n+HETATM 1577  O   HOH B 159      55.504 -39.201  82.113  1.00 59.28           O  \n+HETATM 1578  O   HOH B 160      30.376 -55.240  82.444  1.00 63.54           O  \n+HETATM 1579  O   HOH B 161      51.795 -59.031  96.828  1.00 67.98           O  \n+HETATM 1580  O   HOH B 162      22.526 -46.703  74.926  1.00 57.40           O  \n+HETATM 1581  O   HOH B 164      50.733 -57.148  73.024  1.00 56.47           O  \n+HETATM 1582  O   HOH B 165      48.104 -47.994  59.942  1.00 45.95           O  \n+HETATM 1583  O   HOH B 166      54.973 -56.153  97.297  1.00 64.13           O  \n+HETATM 1584  O   HOH B 167      39.253 -43.161 103.576  1.00 54.76           O  \n+HETATM 1585  O   HOH B 170      51.981 -48.990  95.987  1.00 43.41           O  \n+HETATM 1586  O   HOH B 171      51.448 -48.354  56.783  1.00 56.00           O  \n+HETATM 1587  O   HOH B 172      60.277 -51.573  96.315  1.00 57.45           O  \n+HETATM 1588  O   HOH B 174      51.577 -54.024  69.020  1.00 60.28           O  \n+HETATM 1589  O   HOH B 175      35.389 -59.160  66.962  1.00 73.86           O  \n+HETATM 1590  O   HOH B 176      54.325 -40.404  94.788  1.00 71.31           O  \n+HETATM 1591  O   HOH B 178      30.495 -40.155  85.802  1.00 63.37           O  \n+HETATM 1592  O   HOH B 179      50.958 -37.034  91.471  1.00 53.17           O  \n+HETATM 1593  O   HOH B 180      53.453 -52.474  96.288  1.00 57.62           O  \n+HETATM 1594  O   HOH B 182      54.933 -49.346  66.605  1.00 65.59           O  \n+HETATM 1595  O   HOH B 184      52.755 -38.706  96.230  1.00 60.20           O  \n+HETATM 1596  O   HOH B 188      48.197 -39.939  97.526  1.00 38.94           O  \n+HETATM 1597  O   HOH B 190      47.525 -59.982  77.805  1.00 58.12           O  \n+HETATM 1598  O   HOH B 191      51.832 -50.237  67.773  1.00 58.72           O  \n+HETATM 1599  O   HOH B 192      49.087 -42.082  98.262  1.00 50.34           O  \n+HETATM 1600  O   HOH B 193      60.481 -55.025  91.931  1.00 49.74           O  \n+HETATM 1601  O   HOH B 194      26.866 -55.431  67.876  1.00 55.14           O  \n+HETATM 1602  O   HOH B 196      42.244 -39.591  71.487  1.00 60.09           O  \n+HETATM 1603  O   HOH B 199      61.339 -56.770  90.212  1.00 59.65           O  \n+HETATM 1604  C   ACT D   1      39.632 -32.467  85.009  1.00 92.90           C  \n+HETATM 1605  O   ACT D   1      39.508 -32.058  86.187  1.00 93.19           O  \n+HETATM 1606  CH3 ACT D   1      38.386 -32.806  84.223  1.00 84.58           C  \n+HETATM 1607  OXT ACT D   1      40.773 -32.594  84.493  1.00 90.66           O  \n+HETATM 1608  C   ACT D   2      58.453 -56.204  76.997  1.00 88.70           C  \n+HETATM 1609  O   ACT D   2      57.709 -56.233  75.983  1.00 85.04           O  \n+HETATM 1610  CH3 ACT D   2      58.693 -57.480  77.774  1.00 83.03           C  \n+HETATM 1611  OXT ACT D   2      58.996 -55.131  77.372  1.00 85.86           O  \n+HETATM 1612  O   DMS C   1      60.177 -42.366  76.499  1.00 68.99           O  \n+HETATM 1613  C1  DMS C   1      60.547 -42.644  73.915  1.00 91.27           C  \n+HETATM 1614  C2  DMS C   1      58.286 -41.770  74.686  1.00 93.07           C  \n+HETATM 1615  S   DMS C   1      59.919 -41.707  75.193  1.00 96.08           S  \n+HETATM 1616  O   DMS C   2      62.131 -50.369  84.252  1.00100.77           O  \n+HETATM 1617  C1  DMS C   2      63.293 -52.714  84.253  1.00108.09           C  \n+HETATM 1618  C2  DMS C   2      60.654 -52.426  84.716  1.00115.63           C  \n+HETATM 1619  S   DMS C   2      61.947 -51.761  83.804  1.00116.45           S  \n+END                                                                             \n\\ No newline at end of file\n'
b
diff -r 000000000000 -r de29b4f35536 transfs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/transfs.py Fri Mar 27 09:18:53 2020 -0400
[
b'@@ -0,0 +1,321 @@\n+# Create dir containing ligands.sdf and protein.pdb\n+# Enter docker container like this:\n+#   docker run -it --rm --gpus all -v $PWD:/root/train/fragalysis_test_files/work:Z informaticsmatters/deep-app-ubuntu-1604:latest bash\n+#\n+# Now inside the container run like this:\n+#   mkdir /tmp/work\n+#   rm -rf /tmp/work/* && python3 work/transfs.py -i work/test-data/ligands.sdf -r work/test-data/receptor.pdb -d 2 -w /tmp/work\n+#\n+# If testing with no GPU you can use the --mock option to generate random scores\n+#\n+# Start container for testing like this:\n+#    docker run -it --rm -v $PWD:$PWD:Z -w $PWD informaticsmatters/deep-app-ubuntu-1604:latest bash\n+# Inside container test like this:\n+#   mkdir /tmp/work\n+#   cd chemicaltoolbox/xchem-deep\n+#   rm -rf /tmp/work/* && python3 transfs.py -i test-data/ligands.sdf -r test-data/receptor.pdb -d 2 -w /tmp/work --mock\n+#\n+\n+import argparse, os, sys, math\n+import subprocess\n+import random\n+from openbabel import pybel\n+\n+types_file_name = \'inputs.types\'\n+types_file_name = \'inputs.types\'\n+predict_file_name = \'predictions.txt\'\n+work_dir = \'.\'\n+paths = None\n+inputs_protein = []\n+inputs_ligands = []\n+\n+\n+def log(*args, **kwargs):\n+    """Log output to STDERR\n+    """\n+    print(*args, file=sys.stderr, ** kwargs)\n+\n+def write_raw_inputs(receptor_pdb, ligands_sdf, distance):\n+    """\n+    Analyses the PDB file for waters that clash with each ligand in the SDF and writes out:\n+    1. a PDB file named like receptor-123-543.pdb where the numeric parts are the waters that have been omitted\n+    2. a corresponding directory named like receptor-123-543\n+    3. an SDF named like receptor-123-543/ligands.sdf containing those ligands that correspond to that receptor.\n+    :param receptor_pdb: A PDB file without the ligand but with the crystallographic waters\n+    :param ligands_sdf: A SDF with the docked poses\n+    :param distance: The distance to consider when removing waters. Only heavy atoms in the ligand are considered.\n+    :return:\n+    """\n+\n+    global work_dir\n+    global inputs_protein\n+    global inputs_ligands\n+    global paths\n+\n+\n+    log("Writing data to", work_dir)\n+    if not os.path.isdir(work_dir):\n+        os.mkdir(work_dir)\n+\n+    receptor_file = os.path.basename(receptor_pdb)\n+\n+    sdf_writers = {}\n+    paths = []\n+\n+    # read the receptor once as we\'ll need to process it many times\n+    with open(receptor_pdb, \'r\') as f:\n+        lines = f.readlines()\n+\n+    count = 0\n+    for mol in pybel.readfile("sdf", ligands_sdf):\n+        count += 1\n+        if count % 50000 == 0:\n+            log(\'Processed\', count)\n+\n+        try:\n+            # print("Processing mol", mol.title)\n+\n+            clone = pybel.Molecule(mol)\n+            clone.removeh()\n+\n+            coords = []\n+            for atom in clone.atoms:\n+                coords.append(atom.coords)\n+\n+            watnumcode = \'\'\n+\n+            # getting receptor without waters that will clash with ligand\n+            new_receptor_pdb = []\n+            for line in lines:\n+                if line[17:20] == \'HOH\':\n+                    x, y, z = float(line[30:39]),  float(line[39:46]), float(line[46:55])\n+                    distances = []\n+                    for i in coords:\n+                        distances.append(math.sqrt((x-i[0])**2 + (y-i[1])**2 + (z-i[2])**2))  # calculates distance based on cartesian coordinates\n+                    if min(distances) > distance: # if all distances are larger than 2.0A, then molecule makes it to new file\n+                        new_receptor_pdb.append(line)\n+                    else:\n+                        watnum = line[23:28].strip()\n+                        # print("Skipped water " + watnum)\n+                        watnumcode += \'-\' + watnum\n+                if line[17:20] != \'LIG\' and line[17:20] != \'HOH\':  # ligand lines are also removed\n+                    new_receptor_pdb.append(line)\n+\n+\n+            name = receptor_file[0:-4] + watnumcode\n+            # print(\''..b'/fragalysis_test_files/scripts/predict.py\',\n+            \'-m\', \'/train/fragalysis_test_files/resources/dense.prototxt\',\n+            \'-w\', \'/train/fragalysis_test_files/resources/weights.caffemodel\',\n+            \'-i\', os.path.sep.join([work_dir, types_file_name]),\n+            \'-o\', os.path.sep.join([work_dir, predict_file_name])]\n+    log("CMD:", cmd1)\n+    subprocess.call(cmd1)\n+\n+\n+def mock_predictions():\n+    global work_dir\n+    global predict_file_name\n+\n+    log("WARNING: generating mock results instead of running on GPU")\n+    outfile = generate_predictions_filename(work_dir, predict_file_name)\n+    count = 0\n+    with open(outfile, \'w\') as predictions:\n+        for path in paths:\n+            log("Reading", path)\n+            protein_gninatypes = os.listdir(os.path.sep.join([path, \'proteins\']))\n+            ligand_gninatypes = os.listdir(os.path.sep.join([path, \'ligands\']))\n+            for protein in protein_gninatypes:\n+                for ligand in ligand_gninatypes:\n+                    count += 1\n+                    score = random.random()\n+                    line = "{0} | 0 {1}{4}proteins{4}{2} {1}{4}ligands{4}{3}\\n".format(score, path, protein, ligand,\n+                                                                                       os.path.sep)\n+                    # log("Writing", line)\n+                    predictions.write(line)\n+\n+    log(\'Wrote\', count, \'mock predictions\')\n+\n+\n+def read_predictions():\n+    global predict_file_name\n+    global work_dir\n+    scores = {}\n+    with open("{0}{1}{2}".format(work_dir, os.path.sep, predict_file_name), \'r\') as input:\n+        for line in input:\n+            # log(line)\n+            tokens = line.split()\n+            if len(tokens) == 5 and tokens[1] == \'|\':\n+                # log(len(tokens), tokens[0], tokens[3], tokens[4])\n+                record_no = inputs_ligands.index(tokens[4])\n+                if record_no is not None:\n+                    # log(record_no, tokens[0])\n+                    scores[record_no] = tokens[0]\n+    log("Found", len(scores), "scores")\n+    return scores\n+\n+\n+def patch_scores_sdf(outfile, scores):\n+\n+    counter = 0\n+    sdf_path = "{0}{1}{2}".format(work_dir, os.path.sep, outfile)\n+    log("Writing results to {0}".format(sdf_path))\n+    sdf_file = pybel.Outputfile("sdf", sdf_path)\n+\n+    for path in paths:\n+        for mol in pybel.readfile("sdf", os.path.sep.join([path, \'ligands.sdf\'])):\n+            if counter in scores:\n+                score = scores[counter]\n+                # og("Score for record {0} is {1}".format(counter, score))\n+                mol.data[\'TransFSScore\'] = score\n+                sdf_file.write(mol)\n+            else:\n+                log("No score found for record", counter)\n+            counter += 1\n+    sdf_file.close()\n+\n+\n+def execute(ligands_sdf, protein, outfile, distance, mock=False):\n+\n+    write_inputs(protein, ligands_sdf, distance)\n+    if mock:\n+        mock_predictions()\n+    else:\n+        run_predictions()\n+    scores = read_predictions()\n+    patch_scores_sdf(outfile, scores)\n+\n+\n+def main():\n+    global work_dir\n+\n+    parser = argparse.ArgumentParser(description=\'XChem deep - pose scoring\')\n+\n+    parser.add_argument(\'-i\', \'--input\', help="SDF containing the poses to score)")\n+    parser.add_argument(\'-r\', \'--receptor\', help="Receptor file for scoring (PDB format)")\n+    parser.add_argument(\'-d\', \'--distance\', type=float, default=2.0, help="Cuttoff for removing waters")\n+    parser.add_argument(\'-o\', \'--outfile\', default=\'output.sdf\', help="File name for results")\n+    parser.add_argument(\'-w\', \'--work-dir\', default=".", help="Working directory")\n+    parser.add_argument(\'--mock\', action=\'store_true\', help=\'Generate mock scores rather than run on GPU\')\n+\n+    args = parser.parse_args()\n+    log("XChem deep args: ", args)\n+\n+    work_dir = args.work_dir\n+\n+    execute(args.input, args.receptor, args.outfile, args.distance, mock=args.mock)\n+\n+\n+if __name__ == "__main__":\n+    main()\n'
b
diff -r 000000000000 -r de29b4f35536 transfs.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/transfs.xml Fri Mar 27 09:18:53 2020 -0400
[
@@ -0,0 +1,108 @@
+<tool id="xchem_transfs_scoring" name="XChem TransFS pose scoring" version="0.2.0">
+    <description>using deep learning</description>
+
+    <requirements>
+        <!--requirement type="package" version="3.0.0">openbabel</requirement-->
+        <!--requirement type="package" version="3.7">python</requirement-->
+        <!-- many other requirements are needed -->
+        <container type="docker">informaticsmatters/deep-app-ubuntu-1604:0.9</container>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+
+    cd /train/fragalysis_test_files/ &&
+    mkdir workdir &&
+    cd workdir &&
+
+    cp '$ligands' ligands.sdf &&
+    cp '$receptor' receptor.pdb &&
+
+    ##mkdir -p /root/train/ &&
+    ##ln -s /train/fragalysis_test_files/ /root/train/ &&
+
+    ##adduser centos --uid 1000 --quiet --no-create-home --system &&
+    ##apt install sudo -y &&
+
+    ## mkdir -p ligands &&
+    cd ../ &&
+    python '$__tool_directory__/transfs.py' -i ./workdir/ligands.sdf -r ./workdir/receptor.pdb -d $distance -w /train/fragalysis_test_files/workdir &&
+    ls -l &&
+    ls -l workdir &&
+    sudo -u ubuntu cp ./workdir/output.sdf '$output' &&
+    head -n 10000 ./workdir/output.sdf &&
+
+    mkdir -p ./pdb &&
+    cp -r ./workdir/receptor*.pdb ./pdb &&
+    tar -cvhf archiv.tar ./pdb &&
+    sudo -u ubuntu cp archiv.tar '$output_receptors' &&
+
+    sudo -u ubuntu cp ./workdir/predictions.txt '$predictions'
+
+
+    ]]></command>
+
+    <inputs>
+        <param type="data" name="receptor" format="pdb" label="Receptor" help="Select a receptor (pdb format)."/>
+        <param type="data" name="ligands" format="sdf,mol" label="Ligands" help="Ligands (docked poses) in SDF format)"/>
+        <param name="distance" type="float" value="2.0" min="1.0" max="5.0" label="Distance to waters" help="Remove waters closer than this distance to any ligand heavy atom"/>
+        <param type="hidden" name="mock" value="" label="Mock calculations" help="Use random numbers instead of running on GPU"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="sdf" label="XChem pose scoring on ${on_string}"/>
+        <data name="predictions" format="txt" label="Predictions on ${on_string}"/>
+        <data name="output_receptors" format="tar" label="Receptors ${on_string}"/>
+
+        <!--collection name="pdb_files" type="list" label="PDB files with variable number of waters">
+            <discover_datasets pattern="__name_and_ext__" directory="pdb" />
+        </collection-->
+    </outputs>
+
+    <tests>
+     <test>
+            <param name="receptor" value="receptor.pdb"/>
+            <param name="ligands" value="ligands.sdf"/>
+            <param name="mock" value="--mock" />
+            <param name="distance" value="4.0"/>
+            <output name="output" ftype="sdf">
+                <assert_contents>
+                    <has_text text="TransFSReceptor"/>
+                    <has_text text="TransFSScore"/>
+                </assert_contents>
+            </output>
+            <!--output_collection name="pdb_files" type="list" count="2" /-->
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+This tool performs scoring of docked ligand poses using deep learning.
+It uses the gnina and libmolgrid toolkits to perform the scoring to generate
+a prediction for how good the pose is.
+
+
+-----
+
+.. class:: infomark
+
+**Inputs**
+
+1. The protein receptor to dock into as a file in PDB format. This should have the ligand removed but retain the waters.
+2. A set of ligand poses to score in SDF format.
+
+-----
+
+.. class:: infomark
+
+**Outputs**
+
+An SDF file is produced as output. The binding affinity scores are contained within the SDF file
+as the TransFSScore property and the PDB file (with the waters that clash with the ligand removed)
+that was used for the scoring as the TransFSReceptor property.
+Values for the score range from 0 (poor binding) to 1 (good binding).
+
+A set of PDB files is also output, each one with different crystallographic waters removed. Each ligand is
+examined against input PDB structure and the with waters that clash (any heavy atom of the ligand closer than
+the 'distance' parameter being removed. The filenames are encoded with the water numbers that are removed.
+
+    ]]></help>
+</tool>