Mercurial > repos > bgruening > chembl
diff chembl_structure_pipeline.xml @ 6:a57de37f12c2 draft
"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e"
author | bgruening |
---|---|
date | Wed, 07 Oct 2020 09:31:40 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chembl_structure_pipeline.xml Wed Oct 07 09:31:40 2020 +0000 @@ -0,0 +1,85 @@ +<tool id="chembl_structure_pipeline" name="ChEMBL structure pipeline" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@"> + <description>for curation and standardizing of molecular structures</description> + <macros> + <token name="@TOOL_VERSION@">1.0.0</token> + <token name="@GALAXY_VERSION@">0</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">chembl_structure_pipeline</requirement> + </requirements> + <command><![CDATA[ + python '$__tool_directory__/structure_pipeline.py' + --input '$input' + --output '$output' + $standardize + $get_parent + $check +]]> + </command> + <inputs> + <param label="SDF/MOL input" type="data" name="input" format="sdf,mol" argument="--input"/> + <param label="Standardize" checked="false" type="boolean" name="standardize" argument="--standardize" truevalue="--standardize" falsevalue="" help="Apply the 'Standardize' pipeline component"/> + <param label="Get parent" checked="false" type="boolean" name="get_parent" argument="--get_parent" truevalue="--get_parent" falsevalue="" help="Apply the 'GetParent' pipeline component"/> + <param label="Check" checked="false" type="boolean" name="check" argument="--check" truevalue="--check" falsevalue="" help="Apply the 'Check' pipeline component"/> + </inputs> + <outputs> + <data name="output" format="sdf" label="Processed molecules"/> + </outputs> + <tests> + <test> + <param name="input" value="o_molblock.mol" /> + <param name="standardize" value="true" /> + <output name="output" value="std_molblock.mol"/> + </test> + <test> + <param name="input" value="o_molblock.mol" ftype="mol" /> + <param name="get_parent" value="true" /> + <output name="output" value="parent_molblock.mol"/> + </test> + <test> + <param name="input" value="o_molblock.mol" ftype="mol" /> + <param name="check" value="true" /> + <output name="output" value="check.mol"/> + </test> + <test> + <param name="input" value="10mol.sdf" ftype="sdf" /> + <param name="standardize" value="true" /> + <param name="get_parent" value="true" /> + <param name="check" value="true" /> + <output name="output" value="pipeline_multi.sdf"/> + </test> + </tests> + <help><![CDATA[ + +Apply the ChEMBL chemical curation pipeline to a set of chemical structures in SDF +format. The pipeline is described in detail in the citation provided (Bento et al., +2020). + +The pipeline consists of three components: + - a Standardizer which formats compounds according to defined rules and conventions, based mostly on FDA/IUPAC guidelines. + - a GetParent component that removes any salts and solvents from the compound to create its parent. + - a Checker to test the validity of chemical structures and flag any serious errors. Errors are given a code from 0 (least serious) to 10 (most serious), the highest of which is stored in the SDF field `<MaxPenaltyScore>`. A list of all errors encountered is recorded under `<IssueMessages>`. + +Either one or more of these protocols can be applied in a single Galaxy job. + +----- + +.. class:: infomark + +**Input** + +One or more molecules in MOL/SDF format. + +----- + +.. class:: infomark + +**Output** + +A MOL/SD-file containing the processed molecules. + +]]></help> + <citations> + <citation type="doi">10.1186/s13321-020-00456-1</citation> + </citations> +</tool>