changeset 2:2f110aef9b53 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit 6f1ee2812cca091561a2b2e464498dae2f913b8d"
author bgruening
date Thu, 19 Mar 2020 09:43:31 -0400
parents 8eab6d2b7bdf
children bf99565cec1f
files sucos.py sucos_max.py sucos_max.xml
diffstat 3 files changed, 56 insertions(+), 71 deletions(-) [+]
line wrap: on
line diff
--- a/sucos.py	Fri Oct 11 18:25:27 2019 -0400
+++ b/sucos.py	Thu Mar 19 09:43:31 2020 -0400
@@ -109,17 +109,21 @@
     fm_score = get_FeatureMapScore(ref_features, query_features, tani, score_mode)
     fm_score = np.clip(fm_score, 0, 1)
 
-    if tani:
-        tani_sim = 1 - float(rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol))
-        tani_sim = np.clip(tani_sim, 0, 1)
-        SuCOS_score = 0.5*fm_score + 0.5*tani_sim
-        return SuCOS_score, fm_score, tani_sim
-    else:
-        protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False)
-        protrude_dist = np.clip(protrude_dist, 0, 1)
-        protrude_val = 1.0 - protrude_dist
-        SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val
-        return SuCOS_score, fm_score, protrude_val
+    try :
+        if tani:
+            tani_sim = 1 - float(rdShapeHelpers.ShapeTanimotoDist(ref_mol, query_mol))
+            tani_sim = np.clip(tani_sim, 0, 1)
+            SuCOS_score = 0.5*fm_score + 0.5*tani_sim
+            return SuCOS_score, fm_score, tani_sim
+        else:
+            protrude_dist = rdShapeHelpers.ShapeProtrudeDist(ref_mol, query_mol, allowReordering=False)
+            protrude_dist = np.clip(protrude_dist, 0, 1)
+            protrude_val = 1.0 - protrude_dist
+            SuCOS_score = 0.5 * fm_score + 0.5 * protrude_val
+            return SuCOS_score, fm_score, protrude_val
+    except:
+        utils.log("Failed to calculate SuCOS scores. Returning 0,0,0")
+        return 0, 0, 0
 
 def process(refmol_filename, inputs_filename, outputs_filename, refmol_index=None,
             refmol_format=None, tani=False, score_mode=FeatMaps.FeatMapScoreMode.All):
--- a/sucos_max.py	Fri Oct 11 18:25:27 2019 -0400
+++ b/sucos_max.py	Thu Mar 19 09:43:31 2020 -0400
@@ -39,8 +39,7 @@
 from rdkit import Chem
 
 
-def process(inputfilename, clusterfilenames, outputfilename, mode):
-
+def process(inputfilename, clusterfilenames, outputfilename):
     all_clusters = {}
     for filename in clusterfilenames:
         cluster = []
@@ -79,7 +78,8 @@
         except:
             utils.log("WARNING: failed to generate features for molecule", mol_num, "in input")
             continue
-        scores = [0, 0, 0]
+        scores_max = [0, 0, 0]
+        scores_cum = [0, 0, 0]
         for clusterfilename in all_clusters:
             cluster = all_clusters[clusterfilename]
             index = 0
@@ -89,42 +89,35 @@
                 index += 1
                 comparisons += 1
                 sucos_score, fm_score, vol_score = sucos.get_SucosScore(hit, mol,
-                    tani=False, ref_features=ref_features, query_features=query_features)
-                if mode == 'max':
-                    if sucos_score > scores[0]:
-                        scores[0] = sucos_score
-                        scores[1] = fm_score
-                        scores[2] = vol_score
-                        cluster_name = clusterfilename
-                        cluster_index = index
-                elif mode == 'cum':
-                    scores[0] += sucos_score
-                    scores[1] += fm_score
-                    scores[2] += vol_score
-                else:
-                    raise ValueError("Invalid mode: " + mode)
+                                                                        tani=False, ref_features=ref_features,
+                                                                        query_features=query_features)
+
+                if sucos_score > scores_max[0]:
+                    scores_max[0] = sucos_score
+                    scores_max[1] = fm_score
+                    scores_max[2] = vol_score
+                    cluster_name = clusterfilename
+                    cluster_index = index
+
+                scores_cum[0] += sucos_score
+                scores_cum[1] += fm_score
+                scores_cum[2] += vol_score
 
-        if scores[0] > 0:
-            if mode == 'max':
-                cluster_file_name_only = cluster_name.split(os.sep)[-1]
-                #utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index)
-                mol.SetDoubleProp("Max_SuCOS_Score", scores[0])
-                mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores[1])
-                mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores[2])
-                mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only)
-                mol.SetIntProp("Max_SuCOS_Index", cluster_index)
+        if scores_max[0] > 0:
+            cluster_file_name_only = cluster_name.split(os.sep)[-1]
+            # utils.log("Max SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2],"File:", cluster_file_name_only, "Index:", cluster_index)
+            mol.SetDoubleProp("Max_SuCOS_Score", scores_max[0])
+            mol.SetDoubleProp("Max_SuCOS_FeatureMap_Score", scores_max[1])
+            mol.SetDoubleProp("Max_SuCOS_Protrude_Score", scores_max[2])
+            mol.SetProp("Max_SuCOS_Cluster", cluster_file_name_only)
+            mol.SetIntProp("Max_SuCOS_Index", cluster_index)
 
-            else:
-                #utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2])
-                mol.SetDoubleProp("Cum_SuCOS_Score", scores[0])
-                mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores[1])
-                mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores[2])
+            # utils.log("Cum SuCOS:", scores[0], "FM:", scores[1], "P:", scores[2])
+            mol.SetDoubleProp("Cum_SuCOS_Score", scores_cum[0])
+            mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1])
+            mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2])
 
-            writer.write(mol)
-
-        else:
-            utils.log("Molecule", mol_num, "did not overlay. Omitting from results")
-
+        writer.write(mol)
 
     input_file.close()
     writer.flush()
@@ -140,15 +133,13 @@
     parser = argparse.ArgumentParser(description='Max SuCOS scores with RDKit')
     parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).')
     parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).')
-    parser.add_argument('-m', '--mode', choices=['max', 'cum'],
-                        default='max', help='Score mode: max = best score, cum = sum of all scores')
     parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits")
 
     args = parser.parse_args()
     utils.log("Max SuCOS Args: ", args)
 
-    process(args.input, args.clusters, args.output, args.mode)
+    process(args.input, args.clusters, args.output)
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
--- a/sucos_max.xml	Fri Oct 11 18:25:27 2019 -0400
+++ b/sucos_max.xml	Thu Mar 19 09:43:31 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="sucos_max_score" name="Max SuCOS score" version="0.1.1">
+<tool id="sucos_max_score" name="Max SuCOS score" version="0.2.0">
     <description>- determine maximum SuCOS score of ligands against clustered fragment hits</description>
     <macros>
         <import>sucos_macros.xml</import>
@@ -8,7 +8,6 @@
         python '$__tool_directory__/sucos_max.py'
             -i '$input'
             -o '$output'
-            -m $mode
         #for $cluster in $clusters
             '$cluster'
         #end for
@@ -16,10 +15,6 @@
     <inputs>
         <param name="input" type="data" format="sdf" label="Ligands to be scored" help="Input in SDF format." />
         <param name="clusters" type="data" format="sdf" multiple="true" label="Set of clusters to score against" help="Clusters in SDF format." />
-        <param name="mode" type="select" value="max" label="Mode">
-            <option value="max">Max score</option>
-            <option value="cum">Cumulative score</option>
-        </param>
     </inputs>
     <outputs>
         <data format="sdf" name="output" label="The scored ligands"/>
@@ -28,19 +23,9 @@
         <test>
             <param name="input" ftype="sdf" value="sucos_cluster.sdf"/>
             <param name="clusters" ftype="sdf" value="cluster1.sdf,cluster2.sdf,cluster3.sdf,cluster4.sdf,cluster5.sdf,cluster6.sdf"/>
-            <param name="mode" value="max"/>
             <output name="output" ftype="sdf">
                 <assert_contents>
                     <has_text text="Max_SuCOS_Score" />
-                </assert_contents>
-            </output>
-        </test>
-        <test>
-            <param name="input" ftype="sdf" value="sucos_cluster.sdf"/>
-            <param name="clusters" ftype="sdf" value="cluster1.sdf,cluster2.sdf,cluster3.sdf,cluster4.sdf,cluster5.sdf,cluster6.sdf"/>
-            <param name="mode" value="cum"/>
-            <output name="output" ftype="sdf">
-                <assert_contents>
                     <has_text text="Cum_SuCOS_Score" />
                 </assert_contents>
             </output>
@@ -73,11 +58,16 @@
 
 **Output**
 
-The same SD file as the input ligands with a "Max_SuCOS_Score" property added containing the best (maximum) SuCOS score
-along with the "Max_SuCOS_FeatureMap_Score" and "Max_SuCOS_Tanimoto_Score" of that comparison.
+The same SD file as the input ligands with  the following properties added:
 
-In addition, the "Max_SuCOS_Cluster" field shows the name of the cluster file that contained the molecule with this best 
-score and the "Max_SuCOS_Index" shows the index (first record is index 1) of that molecule in the file.
+* Max_SuCOS_Score - the best (maximum) SuCOS score
+* Max_SuCOS_FeatureMap_Score - the corresponding FeatureMap score
+* Max_SuCOS_Protrude_Score - the corresponding Protrude score
+* Max_SuCOS_Cluster - the file name of the cluster that contained the max score
+* Max_SuCOS_Index - the index of the cluster that contained the max score (first record is index 1)
+* Cum_SuCOS_Score - the cumulative SuCOS score for all overlays (the sum of the individual scores)
+* Cum_SuCOS_FeatureMap_Score - the corresponding FeatureMap score
+* Cum_SuCOS_Protrude_Score - the corresponding Protrude score
 
     ]]></help>
     <expand macro="citations"/>