changeset 5:d4c67ced6abc draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/sucos commit c35334ca80c87a5078da1a6df85b34e23b80d837"
author bgruening
date Wed, 15 Apr 2020 09:26:07 -0400
parents 85fad59f8168
children 55ac04db36aa
files sucos_max.py sucos_max.xml
diffstat 2 files changed, 44 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/sucos_max.py	Mon Apr 06 09:12:07 2020 -0400
+++ b/sucos_max.py	Wed Apr 15 09:26:07 2020 -0400
@@ -39,7 +39,7 @@
 from rdkit import Chem
 
 
-def process(inputfilename, clusterfilenames, outputfilename):
+def process(inputfilename, clusterfilenames, outputfilename, filter_value, filter_field):
     all_clusters = {}
     for filename in clusterfilenames:
         cluster = []
@@ -120,7 +120,13 @@
         mol.SetDoubleProp("Cum_SuCOS_FeatureMap_Score", scores_cum[1] if scores_cum[1] > 0 else 0)
         mol.SetDoubleProp("Cum_SuCOS_Protrude_Score", scores_cum[2] if scores_cum[2] > 0 else 0)
 
-        writer.write(mol)
+        if filter_value and filter_field:
+            if mol.HasProp(filter_field):
+                val = mol.GetDoubleProp(filter_field)
+                if val > filter_value:
+                    writer.write(mol)
+        else:
+            writer.write(mol)
 
     input_file.close()
     writer.flush()
@@ -137,11 +143,13 @@
     parser.add_argument('-i', '--input', help='Input file to score in SDF format. Can be gzipped (*.gz).')
     parser.add_argument('-o', '--output', help='Output file in SDF format. Can be gzipped (*.gz).')
     parser.add_argument('clusters', nargs='*', help="One or more SDF files with the clustered hits")
+    parser.add_argument('--filter-value', type=float, help='Filter out values with scores less than this.')
+    parser.add_argument('--filter-field', help='Field to use to filter values.')
 
     args = parser.parse_args()
     utils.log("Max SuCOS Args: ", args)
 
-    process(args.input, args.clusters, args.output)
+    process(args.input, args.clusters, args.output, args.filter_value, args.filter_field)
 
 
 if __name__ == "__main__":
--- a/sucos_max.xml	Mon Apr 06 09:12:07 2020 -0400
+++ b/sucos_max.xml	Wed Apr 15 09:26:07 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="sucos_max_score" name="Max SuCOS score" version="0.2.2">
+<tool id="sucos_max_score" name="Max SuCOS score" version="0.2.3">
     <description>- determine maximum SuCOS score of ligands against clustered fragment hits</description>
     <macros>
         <import>sucos_macros.xml</import>
@@ -8,6 +8,9 @@
         python '$__tool_directory__/sucos_max.py'
             -i '$input'
             -o '$output'
+        #if $filter_field and $filter_value
+            --filter-field '$filter_field' --filter-value $filter_value
+        #end if
         #for $cluster in $clusters
             '$cluster'
         #end for
@@ -15,6 +18,17 @@
     <inputs>
         <param name="input" type="data" format="sdf" label="Ligands to be scored" help="Input in SDF format." />
         <param name="clusters" type="data" format="sdf" multiple="true" label="Set of clusters to score against" help="Clusters in SDF format." />
+        <param name="filter_field" type="text" label="Filter field" optional="true"
+               help="Filter output using this field's values. (e.g. Max_SuCOS_Score or Cum_SuCOS_Score)">
+            <sanitizer>
+                <valid initial="string.printable">
+                    <remove value="&apos;"/>
+                </valid>
+                <mapping initial="none"/>
+            </sanitizer>
+        </param>
+        <param name="filter_value" type="float" label="Filter value" optional="true"
+               help="Filter out scores less than this value."/>
     </inputs>
     <outputs>
         <data format="sdf" name="output" label="The scored ligands"/>
@@ -27,6 +41,20 @@
                 <assert_contents>
                     <has_text text="Max_SuCOS_Score" />
                     <has_text text="Cum_SuCOS_Score" />
+                    <has_n_lines n="2224" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="input" ftype="sdf" value="sucos_cluster.sdf"/>
+            <param name="clusters" ftype="sdf" value="cluster1.sdf,cluster2.sdf,cluster3.sdf,cluster4.sdf,cluster5.sdf,cluster6.sdf"/>
+            <param name="filter_field" value="Cum_SuCOS_Score"/>
+            <param name="filter_value" value="15"/>
+            <output name="output" ftype="sdf">
+                <assert_contents>
+                    <has_text text="Max_SuCOS_Score" />
+                    <has_text text="Cum_SuCOS_Score" />
+                    <has_n_lines n="686" />
                 </assert_contents>
             </output>
         </test>
@@ -54,6 +82,10 @@
 The clustered reference compounds are likely to have been generated using the "Cluster ligands using SuCOS" tool and
 will comprise a SDF format file for each cluster. The ligands to be scored are supplied in a SDF file.
 
+Optional filtering of the output is possible, for instance to retain only records with SuCOS scores greater that a
+certain value. Use the optional 'Filter field' and 'Filter value' parameters. The 'Filter field' would typically
+be one of the properties listed below.
+
 .. class:: infomark
 
 **Output**