Mercurial > repos > bgruening > rdock_sort_filter
diff sort_filter.xml @ 2:a6ec6c55267e draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/chemicaltoolbox/rdock commit db76c3bc4ced257e2f622fcf8fcc6d2e28de3577"
author | bgruening |
---|---|
date | Tue, 14 Apr 2020 06:35:35 -0400 |
parents | 784a9f7f079e |
children | d1ca4b45f615 |
line wrap: on
line diff
--- a/sort_filter.xml Fri Apr 03 13:33:32 2020 -0400 +++ b/sort_filter.xml Tue Apr 14 06:35:35 2020 -0400 @@ -1,4 +1,4 @@ -<tool id="rdock_sort_filter" name="SDF sort and filter" version="0.1.0"> +<tool id="rdock_sort_filter" name="SDF sort and filter" version="0.2.0"> <description>using the sdsort provided with rDock</description> <macros> <import>rdock_macros.xml</import> @@ -6,16 +6,38 @@ <expand macro="requirements"/> <command><![CDATA[ -sdsort -n $descending -s -f'$sort_field' -id'$name_field' '$input' | - sdfilter -f'\$_COUNT <= $top' -s'$name_field' | - sdsort -n $descending -f'$sort_field' > '$output' +cat '$input' +#if $filter +| sdfilter -f'$filter' +#end if +#if $name_field +| sdsort -n $descending -s -f'$sort_field' -id'$name_field' +| sdfilter -f'\$_COUNT <= $top' -s'$name_field' +#end if +#if $global_sort and $sort_field +| sdsort -n $descending -f'$sort_field' +#end if +> '$output' ]]></command> <inputs> <param type="data" name="input" format="sdf" label="Molecules" help="Molecules in SDF format"/> - <param name="top" type="integer" value="1" label="Number of records to keep in output" help="Number of best scoring records to keep"/> - <param name="sort_field" type="text" label="Field to sort on" optional="false" help="Name of the field to sort records by"> + + <param name="filter" type="text" label="Filter expression" optional="true" help="Perl expression for filter"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + <remove value="""/> + <remove value="@"/> + <remove value="#"/> + <remove value="|"/> + </valid> + <mapping initial="none"/> + </sanitizer> + </param> + + <param name="sort_field" type="text" label="Field to sort on" optional="true" help="Name of the field to sort records by"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> @@ -23,16 +45,21 @@ <mapping initial="none"/> </sanitizer> </param> - <param name="name_field" type="text" label="Grouping field name" optional="false" help="Name of the field to group records by (must be sequential)"> + <param name="descending" type="boolean" label="Sort descending" truevalue="-r" falsevalue="" checked="true" + help="Sort ascending or descending"/> + <param name="global_sort" type="boolean" label="Global sort" checked="true" + help="Sort all records in file after filtering (true) or just sort within the blocks identified by $name_field (false)"/> + + <param name="name_field" type="text" label="Grouping field name" optional="true" help="Name of the field to group records by (must be sequential)"> <sanitizer> <valid initial="string.printable"> <remove value="'"/> </valid> <mapping initial="none"/> </sanitizer> - </param> - <param name="descending" type="boolean" label="Sort descending" truevalue="-r" falsevalue="" checked="true" - help="Generate the name field (first line) for cases where this is empty"/> + </param> + <param name="top" type="integer" value="1" label="Number of records to keep in output" optional="true" help="Number of best scoring records to keep"/> + </inputs> <outputs> <data name="output" format="sdf" label="SDF sort+filter on ${on_string}"/> @@ -42,14 +69,44 @@ <param name="input" value="poses.sdf"/> <param name="sort_field" value="TransFSScore"/> <param name="name_field" value="Name"/> - <output name="output" file="poses-descending.sdf" ftype="sdf" /> + <param name="descending" value="True"/> + <output name="output" file="poses-descending.sdf" ftype="sdf"/> </test> <test> <param name="input" value="poses.sdf"/> <param name="sort_field" value="TransFSScore"/> <param name="name_field" value="Name"/> <param name="descending" value="False"/> - <output name="output" file="poses-ascending.sdf" ftype="sdf" /> + <output name="output" file="poses-ascending.sdf" ftype="sdf"/> + </test> + <test> + <param name="input" value="poses.sdf"/> + <param name="filter" value="$TransFSScore > 0.2"/> + <param name="sort_field" value="TransFSScore"/> + <param name="name_field" value="Name"/> + <param name="descending" value="False"/> + <output name="output" file="poses-filt-0.2.sdf" ftype="sdf"/> + </test> + <test> + <param name="input" value="poses.sdf"/> + <param name="filter" value="$TransFSScore > 0.1 and $TransFSScore > 0.2"/> + <param name="sort_field" value="TransFSScore"/> + <param name="name_field" value="Name"/> + <param name="descending" value="False"/> + <output name="output" file="poses-filt-0.2.sdf" ftype="sdf"/> + </test> + <test> + <param name="input" value="poses.sdf"/> + <param name="sort_field" value="TransFSScore"/> + <param name="name_field" value="Name"/> + <param name="descending" value="True"/> + <param name="global_sort" value="False"/> + <output name="output" file="poses-desc-noglobal.sdf" ftype="sdf"/> + </test> + <test> + <param name="input" value="poses.sdf"/> + <param name="filter" value="$TransFSScore > 0.2"/> + <output name="output" file="poses-filt-only.sdf" ftype="sdf"/> </test> </tests> <help><![CDATA[ @@ -65,19 +122,31 @@ .. class:: infomark **Inputs** -An SD-file, together with names of fields to sort and group records by, and the number of records to appear in the output. -The sorting is performed on groups of molecules, with the group being identified by a field in the SDF (the name_field -parameter). Records from a group MUST be sequential. -The records within each group are sorted by the value of a field in the SDF (the sort_field parameter) and you can -specify ascending or descending order (the descending parameter). -Finally a number of top scoring (the top parameter, typically having a value of 1) are written to the output. +An SD-file, together with names of fields to filter, sort and group records by, and the number of records to appear in the output. + +An optional filter can be specified that is first applied to the records. This filter (the 'filter' parameter) must be +specified as required by the 'sdfilter' application (see http://rdock.sourceforge.net/wp-content/uploads/2015/08/rDock_User_Guide.pdf) +which is a Perl expression. As an example, if your SDF has a field name 'SCORE' which has numeric values then a valid +filter expression would be '$SCORE > 90' (note the $ symbol). +If you require to use multiple filters then you can combine them in a single expression like this: +'$A < 5 and $B <7', or '$A < 5 or $B <7' + +The sorting is then performed on groups of molecules, with the groups being identified by a field in the SD-file (the 'name_field' +parameter). Records from a group MUST be sequential in the input file. If 'name_field' is not specified then this grouping +and sorting step is skipped. Sorting is performed by the rDock 'sdsort' application. +The records within each group are sorted by the value of a field in the SD-file (the 'sort_field' parameter) and you can +specify ascending or descending order (the 'descending' parameter). +Then a number of top scoring (the 'top' parameter, typically having a value of 1) are retained. + +Finally, if the 'global_sort' parameter is set to 'True' then the all the records remaining are sorted according to the +'sort_field' and 'descending' parameters. Note: this step can use lots of memory if the files are very big. ----- .. class:: infomark **Outputs** -An SD-file, containing molecules filtered by the field specified. +An SD-file, containing molecules filtered and sorted according to the parameters. ]]></help> <expand macro="citations"/>