Mercurial > repos > recetox > matchms_split

<tool id="matchms_split" name="matchms split library" version="@TOOL_VERSION@+galaxy1" profile="21.09">
    <description>split a large library into subsets</description>
    <macros>
        <import>macros.xml</import>
        <import>help.xml</import>
    </macros>
    <edam_operations>
        <edam_operation>operation_3359</edam_operation>
    </edam_operations>
    <expand macro="bio.tools"/>
    <expand macro="creator"/>

    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
    </requirements>

    <command detect_errors='exit_code'><![CDATA[
        python3  '$__tool_directory__/matchms_split.py'
        --filename '$msp_input'
        --method '${method.split_type}'
        --outdir   'output'
        #if $method.split_type == "chunk-size"
        --parameter ${method.chunk_size}
        #else if $method.split_type == "num-chunks"
        --parameter ${method.num_chunks}
        #end if
    ]]></command>
    <inputs>
        <param type="data" name="msp_input" format="msp"
               help="Spectral library (.msp) to split into smaller chunks. Requires keyword 'compound_name', 'compoundname' or 'name' to identify individual spectra using the one-per-file method."/>
        <conditional name="method">
            <param name="split_type" type="select" label="Splitting method" display="radio" help="Method on how to split the library.">
                <option value="chunk-size" selected="true">Split into chunks of specified size</option>
                <option value="num-chunks">Split into specified number of chunks</option>
                <option value="one-per-file">Split to one spectrum per file</option>
            </param>
            <when value="chunk-size">
                <param name="chunk_size" type="integer" value="1000" min="2" help="Number of spectra per output chunks."/>
            </when>
            <when value="num-chunks">
                <param name="num_chunks" type="integer" value="5" min="2" help="Number of chunks into which to split the library."/>
            </when>
            <when value="one-per-file" />
        </conditional>
    </inputs>
    <outputs>
        <collection  format="msp" name="sample" type="list">
            <discover_datasets pattern="__designation_and_ext__" ext="msp" directory="output"/>
        </collection >
    </outputs>
    <tests>
        <test>
            <param name="msp_input" value="split/sample_input.msp"  />
            <param name="split_type" value="one-per-file" />
            <output_collection name="sample" type="list">
                <element name="1NITROPYRENE"         file="split/one-per-file/1NITROPYRENE.msp"         ftype="msp" compare="diff"/>
                <element name="23DICHLOROPHENOL"     file="split/one-per-file/23DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
                <element name="245TRICHLOROPHENOL"   file="split/one-per-file/245TRICHLOROPHENOL.msp"   ftype="msp" compare="diff"/>
                <element name="246TRICHLOROPHENOL"   file="split/one-per-file/246TRICHLOROPHENOL.msp"   ftype="msp" compare="diff"/>
                <element name="24DICHLOROPHENOL"     file="split/one-per-file/24DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
                <element name="24DINITROPHENOL"      file="split/one-per-file/24DINITROPHENOL.msp"      ftype="msp" compare="diff"/>
                <element name="25DICHLOROPHENOL"     file="split/one-per-file/25DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
                <element name="26DICHLOROPHENOL"     file="split/one-per-file/26DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
                <element name="34DICHLOROPHENOL"     file="split/one-per-file/34DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
                <element name="35DICHLOROPHENOL"     file="split/one-per-file/35DICHLOROPHENOL.msp"     ftype="msp" compare="diff"/>
            </output_collection>
        </test>
        <test>
            <param name="msp_input" value="split/sample_input.msp"  />
            <param name="split_type" value="num-chunks" />
            <param name="num_chunks" value="2" />
            <output_collection name="sample" type="list">
                <element name="chunk_0" file="split/num-chunks/chunk_0.msp" ftype="msp" compare="diff"/>
                <element name="chunk_1" file="split/num-chunks/chunk_1.msp" ftype="msp" compare="diff"/>
            </output_collection>
        </test>
        <test>
            <param name="msp_input" value="split/sample_input.msp"  />
            <param name="split_type" value="chunk-size" />
            <param name="chunk_size" value="4" />
            <output_collection name="sample" type="list">
                <element name="chunk_0" file="split/chunk-size/chunk_0.msp" ftype="msp" compare="diff"/>
                <element name="chunk_1" file="split/chunk-size/chunk_1.msp" ftype="msp" compare="diff"/>
                <element name="chunk_2" file="split/chunk-size/chunk_2.msp" ftype="msp" compare="diff"/>
            </output_collection>
        </test>
    </tests>
    <help>
        Description
            This tool can be used to split a mass spectral library into smaller subsets for further processing.
            This can be used to perform efficient spectral matching with large libraries, using job-level parallelism.

            Please observe that the `num-chunks` method does not preserve the order of the elements as it uses a `round robin` distribution.

        @HELP_matchms@
    </help>
    <expand macro="citations"/>
</tool>
author	recetox
date	Thu, 12 Oct 2023 13:27:42 +0000
parents	0cf68b536cd1
children	ffe46a9b1124