view chembl_structure_pipeline.xml @ 0:2f59c6239f25 draft default tip

"planemo upload for repository https://github.com/chembl/chembl_webresource_client commit 78f2261af4e00c830ea311337d0aed9b297aad8e-dirty"
author bgruening
date Sat, 10 Oct 2020 09:43:40 +0000
parents
children
line wrap: on
line source

<tool id="chembl_structure_pipeline" name="ChEMBL structure pipeline" version="@TOOL_VERSION@+galaxy@GALAXY_VERSION@">
    <description>for curation and standardizing of molecular structures</description>
    <macros>
        <token name="@TOOL_VERSION@">1.0.0</token>
        <token name="@GALAXY_VERSION@">0</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">chembl_structure_pipeline</requirement>
    </requirements>  
    <command><![CDATA[
        python '$__tool_directory__/structure_pipeline.py' 
            --input '$input'
            --output '$output'
            $standardize
            $get_parent
            $check
]]>
    </command>
    <inputs>
        <param label="SDF/MOL input" type="data" name="input" format="sdf,mol" argument="--input"/>
        <param label="Standardize" checked="false" type="boolean" name="standardize" argument="--standardize" truevalue="--standardize" falsevalue="" help="Apply the 'Standardize' pipeline component"/>
        <param label="Get parent" checked="false" type="boolean" name="get_parent" argument="--get_parent" truevalue="--get_parent" falsevalue="" help="Apply the 'GetParent' pipeline component"/>
        <param label="Check" checked="false" type="boolean" name="check" argument="--check" truevalue="--check" falsevalue="" help="Apply the 'Check' pipeline component"/>
    </inputs>
    <outputs>
        <data name="output" format="sdf" label="Processed molecules"/>
    </outputs>
    <tests>
        <test>
            <param name="input" value="o_molblock.mol" />
            <param name="standardize" value="true" />
            <output name="output" value="std_molblock.mol"/>
        </test>
        <test>
            <param name="input" value="o_molblock.mol" ftype="mol" />
            <param name="get_parent" value="true" />
            <output name="output" value="parent_molblock.mol"/>
        </test>
        <test>
            <param name="input" value="o_molblock.mol" ftype="mol" />
            <param name="check" value="true" />
            <output name="output" value="check.mol"/>
        </test>
        <test>
            <param name="input" value="10mol.sdf" ftype="sdf" />
            <param name="standardize" value="true" />
            <param name="get_parent" value="true" />
            <param name="check" value="true" />
            <output name="output" value="pipeline_multi.sdf"/>
        </test>
    </tests>
    <help><![CDATA[

Apply the ChEMBL chemical curation pipeline to a set of chemical structures in SDF
format. The pipeline is described in detail in the citation provided (Bento et al.,
2020).

The pipeline consists of three components:
 - a Standardizer which formats compounds according to defined rules and conventions, based mostly on FDA/IUPAC guidelines.
 - a GetParent component that removes any salts and solvents from the compound to create its parent.
 - a Checker to test the validity of chemical structures and flag any serious errors. Errors are given a code from 0 (least serious) to 10 (most serious), the highest of which is stored in the SDF field `<MaxPenaltyScore>`. A list of all errors encountered is recorded under `<IssueMessages>`.

Either one or more of these protocols can be applied in a single Galaxy job.

-----

.. class:: infomark

**Input**

One or more molecules in MOL/SDF format.

-----

.. class:: infomark

**Output**

A MOL/SD-file containing the processed molecules.

]]></help>
    <citations>
        <citation type="doi">10.1186/s13321-020-00456-1</citation>
    </citations>
</tool>