diff sucos.py @ 6:4f1896782f7c draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 05dc325ce687441e5d3bdbdedcc0e3529cd5e070"
author bgruening
date Wed, 14 Apr 2021 09:31:11 +0000
parents 8161c08627bf
children
line wrap: on
line diff
--- a/sucos.py	Tue Jul 28 08:48:35 2020 -0400
+++ b/sucos.py	Wed Apr 14 09:31:11 2021 +0000
@@ -9,26 +9,39 @@
 """
 
 from __future__ import print_function
-import argparse, os, sys, gzip
+
+import argparse
+import os
+
 import numpy as np
-from rdkit import Chem, rdBase, RDConfig
+import utils
+from rdkit import Chem, RDConfig
 from rdkit.Chem import AllChem, rdShapeHelpers
 from rdkit.Chem.FeatMaps import FeatMaps
-import utils
 
-
-### start function definitions #########################################
+# start function definitions #########################################
 
 # Setting up the features to use in FeatureMap
-fdef = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))
+fdef = AllChem.BuildFeatureFactory(
+    os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef")
+)
 
 fmParams = {}
 for k in fdef.GetFeatureFamilies():
     fparams = FeatMaps.FeatMapParams()
     fmParams[k] = fparams
 
-keep = ('Donor', 'Acceptor', 'NegIonizable', 'PosIonizable', 'ZnBinder',
-        'Aromatic', 'Hydrophobe', 'LumpedHydrophobe')
+keep = (
+    "Donor",
+    "Acceptor",
+    "NegIonizable",
+    "PosIonizable",
+    "ZnBinder",
+    "Aromatic",
+    "Hydrophobe",
+    "LumpedHydrophobe",
+)
+
 
 def filterFeature(f):
     result = f.GetFamily() in keep
@@ -37,6 +50,7 @@
         utils.log("Filtered out feature type", f.GetFamily())
     return result
 
+
 def getRawFeatures(mol):
 
     rawFeats = fdef.GetFeaturesForMol(mol)
@@ -44,7 +58,10 @@
     filtered = list(filter(filterFeature, rawFeats))
     return filtered
 
-def get_FeatureMapScore(small_feats, large_feats, tani=False, score_mode=FeatMaps.FeatMapScoreMode.All):
+
+def get_FeatureMapScore(
+    small_feats, large_feats, tani=False, score_mode=FeatMaps.FeatMapScoreMode.All
+):
     """
     Generate the feature map score.
 
@@ -58,7 +75,10 @@
     for rawFeats in [small_feats, large_feats]:
         # filter that list down to only include the ones we're interested in
         featLists.append(rawFeats)
-    fms = [FeatMaps.FeatMap(feats=x, weights=[1] * len(x), params=fmParams) for x in featLists]
+    fms = [
+        FeatMaps.FeatMap(feats=x, weights=[1] * len(x), params=fmParams)
+        for x in featLists
+    ]
     # set the score mode
     fms[0].scoreMode = score_mode
 
@@ -69,24 +89,35 @@
             B = len(featLists[1])
             if B != fms[1].GetNumFeatures():
                 utils.log("Why isn't B equal to number of features...?!")
-            tani_score = float(c) / (A+B-c)
+            tani_score = float(c) / (A + B - c)
             return tani_score
         else:
-            fm_score = fms[0].ScoreFeats(featLists[1]) / min(fms[0].GetNumFeatures(), len(featLists[1]))
+            fm_score = fms[0].ScoreFeats(featLists[1]) / min(
+                fms[0].GetNumFeatures(), len(featLists[1])
+            )
             return fm_score
     except ZeroDivisionError:
         utils.log("ZeroDivisionError")
         return 0
 
     if tani:
-        tani_score = float(c) / (A+B-c)
+        tani_score = float(c) / (A + B - c)
         return tani_score
     else:
-        fm_score = fms[0].ScoreFeats(featLists[1]) / min(fms[0].GetNumFeatures(), len(featLists[1]))
+        fm_score = fms[0].ScoreFeats(featLists[1]) / min(
+            fms[0].GetNumFeatures(), len(featLists[1])
+        )
         return fm_score
 
 
-def get_SucosScore(ref_mol, query_mol, tani=False, ref_features=None, query_features=None, score_mode=FeatMaps.FeatMapScoreMode.All):
+def get_SucosScore(
+    ref_mol,
+    query_mol,
+    tani=False,
+    ref_features=None,
+    query_features=None,
+    score_mode=FeatMaps.FeatMapScoreMode.All,
+):
     """
     This is the key function that calculates the SuCOS scores and is expected to be called from other modules.
     To improve performance you can pre-calculate the features and pass them in as optional parameters to avoid having
@@ -107,29 +138,41 @@
         query_features = getRawFeatures(query_mol)
 
     fm_score = get_FeatureMapScore(ref_features, query_features, tani, score_mode)
-    fm_score = np.clip(fm_score, 0, 1)
+    fm_score = np.float(np.clip(fm_score, 0, 1))
 
-    try :
+    try:
         if tani:
             tani_sim = 1 - float(rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol))
             tani_sim = np.clip(tani_sim, 0, 1)
-            SuCOS_score = 0.5*fm_score + 0.5*tani_sim
+            SuCOS_score = 0.5 * fm_score + 0.5 * tani_sim
             return SuCOS_score, fm_score, tani_sim
         else:
-            protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False)
+            protrude_dist = rdShapeHelpers.ShapeProtrudeDist(
+                ref_mol, query_mol, allowReordering=False
+            )
             protrude_dist = np.clip(protrude_dist, 0, 1)
             protrude_val = 1.0 - protrude_dist
             SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val
             return SuCOS_score, fm_score, protrude_val
-    except:
+    except Exception:
         utils.log("Failed to calculate SuCOS scores. Returning 0,0,0")
         return 0, 0, 0
 
-def process(refmol_filename, inputs_filename, outputs_filename, refmol_index=None,
-            refmol_format=None, tani=False, score_mode=FeatMaps.FeatMapScoreMode.All):
 
-    ref_mol = utils.read_single_molecule(refmol_filename, index=refmol_index, format=refmol_format)
-    #utils.log("Reference mol has", ref_mol.GetNumHeavyAtoms(), "heavy atoms")
+def process(
+    refmol_filename,
+    inputs_filename,
+    outputs_filename,
+    refmol_index=None,
+    refmol_format=None,
+    tani=False,
+    score_mode=FeatMaps.FeatMapScoreMode.All,
+):
+
+    ref_mol = utils.read_single_molecule(
+        refmol_filename, index=refmol_index, format=refmol_format
+    )
+    # utils.log("Reference mol has", ref_mol.GetNumHeavyAtoms(), "heavy atoms")
     ref_features = getRawFeatures(ref_mol)
 
     input_file = utils.open_file_for_reading(inputs_filename)
@@ -141,12 +184,18 @@
     total = 0
     errors = 0
     for mol in suppl:
-        count +=1
+        count += 1
         if mol is None:
             continue
-        #utils.log("Mol has", str(mol.GetNumHeavyAtoms()), "heavy atoms")
+        # utils.log("Mol has", str(mol.GetNumHeavyAtoms()), "heavy atoms")
         try:
-            sucos_score, fm_score, val3 = get_SucosScore(ref_mol, mol, tani=tani, ref_features=ref_features, score_mode=score_mode)
+            sucos_score, fm_score, val3 = get_SucosScore(
+                ref_mol,
+                mol,
+                tani=tani,
+                ref_features=ref_features,
+                score_mode=score_mode,
+            )
             mol.SetDoubleProp("SuCOS_Score", sucos_score)
             mol.SetDoubleProp("SuCOS_FeatureMap_Score", fm_score)
             if tani:
@@ -155,9 +204,9 @@
                 mol.SetDoubleProp("SuCOS_Protrude_Score", val3)
             utils.log("Scores:", sucos_score, fm_score, val3)
             writer.write(mol)
-            total +=1
+            total += 1
         except ValueError as e:
-            errors +=1
+            errors += 1
             utils.log("Molecule", count, "failed to score:", e.message)
 
     input_file.close()
@@ -165,41 +214,73 @@
     writer.close()
     output_file.close()
 
-    utils.log("Completed.", total, "processed, ", count, "succeeded, ", errors, "errors")
+    utils.log(
+        "Completed.", total, "processed, ", count, "succeeded, ", errors, "errors"
+    )
+
 
 def parse_score_mode(value):
-    if value == None or value == 'all':
+    if value is None or value == "all":
         return FeatMaps.FeatMapScoreMode.All
-    elif value == 'closest':
+    elif value == "closest":
         return FeatMaps.FeatMapScoreMode.Closest
-    elif value == 'best':
+    elif value == "best":
         return FeatMaps.FeatMapScoreMode.Best
     else:
         raise ValueError(value + " is not a valid scoring mode option")
 
 
-### start main execution #########################################
+# start main execution #########################################
+
 
 def main():
 
-    parser = argparse.ArgumentParser(description='SuCOS with RDKit')
-    parser.add_argument('-i', '--input', help='Input file in SDF format. Can be gzipped (*.gz).')
-    parser.add_argument('-r', '--refmol', help='Molecule to compare against in Molfile (.mol) or SDF (.sdf) format')
-    parser.add_argument('--refmol-format', help="Format for the reference molecule (mol or sdf). " +
-                                                "Only needed if files don't have the expected extensions")
-    parser.add_argument('--refmolidx', help='Reference molecule index in SD file if not the first', type=int, default=1)
-    parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).')
-    parser.add_argument('--tanimoto', action='store_true', help='Include Tanimoto distance in score')
-    parser.add_argument('--score_mode', choices=['all', 'closest', 'best'],
-                        help="choose the scoring mode for the feature map, default is 'all'.")
+    parser = argparse.ArgumentParser(description="SuCOS with RDKit")
+    parser.add_argument(
+        "-i", "--input", help="Input file in SDF format. Can be gzipped (*.gz)."
+    )
+    parser.add_argument(
+        "-r",
+        "--refmol",
+        help="Molecule to compare against in Molfile (.mol) or SDF (.sdf) format",
+    )
+    parser.add_argument(
+        "--refmol-format",
+        help="Format for the reference molecule (mol or sdf). "
+        + "Only needed if files don't have the expected extensions",
+    )
+    parser.add_argument(
+        "--refmolidx",
+        help="Reference molecule index in SD file if not the first",
+        type=int,
+        default=1,
+    )
+    parser.add_argument(
+        "-o", "--output", help="Output file in SDF format. Can be gzipped (*.gz)."
+    )
+    parser.add_argument(
+        "--tanimoto", action="store_true", help="Include Tanimoto distance in score"
+    )
+    parser.add_argument(
+        "--score_mode",
+        choices=["all", "closest", "best"],
+        help="choose the scoring mode for the feature map, default is 'all'.",
+    )
 
     args = parser.parse_args()
     utils.log("SuCOS Args: ", args)
 
     score_mode = parse_score_mode(args.score_mode)
 
-    process(args.refmol, args.input, args.output, refmol_index=args.refmolidx,
-            refmol_format=args.refmol_format, tani=args.tanimoto, score_mode=score_mode)
+    process(
+        args.refmol,
+        args.input,
+        args.output,
+        refmol_index=args.refmolidx,
+        refmol_format=args.refmol_format,
+        tani=args.tanimoto,
+        score_mode=score_mode,
+    )
 
 
 if __name__ == "__main__":