diff chembl_structure_pipeline.xml @ 6:a57de37f12c2 draft

"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
author bgruening
date Wed, 07 Oct 2020 09:31:40 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/chembl_structure_pipeline.xml	Wed Oct 07 09:31:40 2020 +0000
@@ -0,0 +1,85 @@
+<tool id="chembl_structure_pipeline" name="ChEMBL structure pipeline" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
+    <description>for curation and standardizing of molecular structures</description>
+    <macros>
+        <token name="@TOOL_VERSION@">1.0.0</token>
+        <token name="@GALAXY_VERSION@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">chembl_structure_pipeline</requirement>
+    </requirements>  
+    <command><![CDATA[
+        python '$__tool_directory__/structure_pipeline.py' 
+            --input '$input'
+            --output '$output'
+            $standardize
+            $get_parent
+            $check
+]]>
+    </command>
+    <inputs>
+        <param label="SDF/MOL input" type="data" name="input" format="sdf,mol" argument="--input"/>
+        <param label="Standardize" checked="false" type="boolean" name="standardize" argument="--standardize" truevalue="--standardize" falsevalue="" help="Apply the 'Standardize' pipeline component"/>
+        <param label="Get parent" checked="false" type="boolean" name="get_parent" argument="--get_parent" truevalue="--get_parent" falsevalue="" help="Apply the 'GetParent' pipeline component"/>
+        <param label="Check" checked="false" type="boolean" name="check" argument="--check" truevalue="--check" falsevalue="" help="Apply the 'Check' pipeline component"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="sdf" label="Processed molecules"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="o_molblock.mol" />
+            <param name="standardize" value="true" />
+            <output name="output" value="std_molblock.mol"/>
+        </test>
+        <test>
+            <param name="input" value="o_molblock.mol" ftype="mol" />
+            <param name="get_parent" value="true" />
+            <output name="output" value="parent_molblock.mol"/>
+        </test>
+        <test>
+            <param name="input" value="o_molblock.mol" ftype="mol" />
+            <param name="check" value="true" />
+            <output name="output" value="check.mol"/>
+        </test>
+        <test>
+            <param name="input" value="10mol.sdf" ftype="sdf" />
+            <param name="standardize" value="true" />
+            <param name="get_parent" value="true" />
+            <param name="check" value="true" />
+            <output name="output" value="pipeline_multi.sdf"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+Apply the ChEMBL chemical curation pipeline to a set of chemical structures in SDF
+format. The pipeline is described in detail in the citation provided (Bento et al.,
+2020).
+
+The pipeline consists of three components:
+ - a Standardizer which formats compounds according to defined rules and conventions, based mostly on FDA/IUPAC guidelines.
+ - a GetParent component that removes any salts and solvents from the compound to create its parent.
+ - a Checker to test the validity of chemical structures and flag any serious errors. Errors are given a code from 0 (least serious) to 10 (most serious), the highest of which is stored in the SDF field `<MaxPenaltyScore>`. A list of all errors encountered is recorded under `<IssueMessages>`.
+
+Either one or more of these protocols can be applied in a single Galaxy job.
+
+-----
+
+.. class:: infomark
+
+**Input**
+
+One or more molecules in MOL/SDF format.
+
+-----
+
+.. class:: infomark
+
+**Output**
+
+A MOL/SD-file containing the processed molecules.
+
+]]></help>
+    <citations>
+        <citation type="doi">10.1186/s13321-020-00456-1</citation>
+    </citations>
+</tool>