Mercurial > repos > recetox > matchms_similarity

<tool id="matchms_similarity" name="matchms similarity" version="@TOOL_VERSION@+galaxy2" profile="21.09">
    <description>calculate the similarity score and matched peaks</description>

    <macros>
        <import>macros.xml</import>
        <import>help.xml</import>
    </macros>
    <expand macro="creator"/>
    <expand macro="bio.tools"/>

    <requirements>
        <requirement type="package" version="0.8.0">spec2vec</requirement>
        <requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
    </requirements>

    <command detect_errors="exit_code"><![CDATA[
        sh ${matchms_python_cli}
    ]]> </command>

    <environment_variables>
        <environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
    </environment_variables>

    <configfiles>
        <configfile name="matchms_python_cli">
            python3 ${__tool_directory__}/matchms_similarity_wrapper.py \
            #if $ri_filtering.is_true == "TRUE"
            -r $ri_filtering.tolerance \
            #end if
            #if $symmetric.is_symmetric == "TRUE"
            -s \
            #else
            --ref "$references" \
            --ref_format "$references.ext" \
            #end if
            --array_type "$array_type" \
            #if $metric.similarity_metric == "Spec2Vec"
            --spec2vec_model "$metric.model_metadata" \
            --spec2vec_weights "$metric.model_weights" \
            --allow_missing_percentage $metric.algorithm.allow_missing_percentage \
            #end if
            "$queries" \
            "$queries.ext" \
            "$metric.similarity_metric" \
            #if $metric.similarity_metric == "Spec2Vec"
            0 \
            0 \
            #else
            $metric.algorithm.tolerance \
            $metric.algorithm.mz_power \
            #end if
            $metric.algorithm.intensity_power \
            "$similarity_scores"
        </configfile>
    </configfiles>

    <inputs>
        <param label="Queries spectra" name="queries" type="data" format="msp,mgf"
               help="Query mass spectra to match against references."/>
        <conditional name="symmetric">
            <param name="is_symmetric" label="Symmetric" type="select">
                <option value="FALSE" selected="true">FALSE</option>
                <option value="TRUE">TRUE</option>
            </param>
            <when value="FALSE">
                <param label="Reference spectra" name="references" type="data" format="msp,mgf"
                       help="Reference mass spectra to match against as library."/>
            </when>
            <when value="TRUE"></when>
        </conditional>
        <param label="Scores array type" name="array_type" type="select" display="radio"
               help="Matrix type for storing scores objects. Sparse type more memory-efficient and better for large arrays.
                    Note that whatever is selected the output might still be sprase array if scores have too much 0-entries.">
            <option value="numpy" selected="true">dense</option>
            <option value="sparse">sparse</option>
        </param>
        <conditional name="metric">
            <param label="Similarity metric" name="similarity_metric" type="select" display="radio"
                help="Similarity metric to use for score computation.">
                <expand macro="similarity_metrics"/>
                <option value="Spec2Vec">Spec2Vec</option>
            </param>
            <when value="CosineGreedy">
                <expand macro="similarity_algorithm_params"/>
            </when>
            <when value="CosineHungarian">
                <expand macro="similarity_algorithm_params"/>
            </when>
            <when value="ModifiedCosine">
                <expand macro="similarity_algorithm_params"/>
            </when>
            <when value="NeutralLossesCosine">
                <expand macro="similarity_algorithm_params"/>
            </when>
            <when value="Spec2Vec">
                <param label="Model JSON file" name="model_metadata" type="data" format="json"
                       help="Model JSON file to use for Spec2Vec similarity computing."/>
                <param label="Model NPY file" name="model_weights" type="data" format="binary"
                       help="Model NPY file to use for Spec2Vec similarity computing."/>

                <section name="algorithm" title="Algorithm Parameters" expanded="true">
                    <param label="intensity_power" name="intensity_power" type="float" value="0.0"
                           help="Spectrum vectors are a weighted sum of the word vectors. The given word intensities will be raised to the given power.
                            The default is 0, which means that no weighing will be done."/>
                    <param label="Maximum share of new peaks" name="allow_missing_percentage" type="float" value="0.1" max="1.0" min="0.0"
                           help="Maximum allowed share of the peaks that are new to the model in relation to the whole peak corpus."/>
                </section>
            </when>
        </conditional>


        <conditional name="ri_filtering">
            <param name="is_true" label="Apply RI filtering" type="select">
                <option value="FALSE" selected="true">FALSE</option>
                <option value="TRUE">TRUE</option>
            </param>
            <when value="TRUE">
                <param label="tolerance" name="tolerance" type="float" value="60"
                       help="Peaks will be considered a match when less than tolerance apart."/>
            </when>
            <when value="FALSE"></when>
        </conditional>
    </inputs>

    <outputs>
        <data label="$metric.similarity_metric scores of ${on_string}" name="similarity_scores" format="json"/>
    </outputs>

    <tests>
        <test> <!-- TEST #1: Test scoring of different file formats. -->
            <param name="references" value="similarity/fill.mgf" ftype="mgf"/>
            <param name="queries" value="similarity/fill2.msp" ftype="msp"/>
            <conditional name="metric">
                <param name="similarity_metric" value="CosineGreedy"/>
            </conditional>
            <output name="similarity_scores" file="similarity/scores_test1_out.json" ftype="json"/>
        </test>
        <test> <!-- TEST #2: Test scoring of the same file formats. -->
            <param name="references" value="similarity/RECETOX_Exposome_pesticides_HR_MS_20220323.msp" ftype="msp"/>
            <param name="queries" value="similarity/fill2.msp" ftype="msp"/>
            <conditional name="metric">
                <param name="similarity_metric" value="CosineGreedy"/>
            </conditional>
            <output name="similarity_scores" file="similarity/scores_test2_out.json" ftype="json"/>
        </test>
        <test> <!-- TEST #3: Test scoring with applying RI filtering  -->
            <param name="references" value="similarity/fill.mgf" ftype="mgf"/>
            <param name="queries" value="similarity/fill2.msp" ftype="msp"/>
            <conditional name="ri_filtering">
                <param name="is_true" value="TRUE"></param>
                <param name="tolerance" value="60.0" />
            </conditional>
            <conditional name="metric">
                <param name="similarity_metric" value="CosineHungarian"/>
            </conditional>
            <output name="similarity_scores" file="similarity/scores_test3_out.json" ftype="json"/>
        </test>
        <test> <!-- TEST #4: Test symmetric scoring. -->
            <param name="queries" value="similarity/recetox_gc-ei_ms_20201028.msp" ftype="msp"/>
            <param name="is_symmetric" value="TRUE"/>
            <conditional name="metric">
                <param name="similarity_metric" value="NeutralLossesCosine"/>
            </conditional>
            <output name="similarity_scores" file="similarity/scores_test4_out.json" ftype="json"/>
        </test>
        <test> <!-- TEST #5: Test symmetric scoring with applying RI filtering. -->
            <param name="queries" value="similarity/recetox_gc-ei_ms_20201028.msp" ftype="msp"/>
            <conditional name="metric">
                <param name="similarity_metric" value="ModifiedCosine"/>
            </conditional>
            <param name="is_symmetric" value="TRUE" />
            <conditional name="ri_filtering">
                <param name="is_true" value="TRUE"></param>
                <param name="tolerance" value="60.0" />
            </conditional>
            <output name="similarity_scores" file="similarity/scores_test5_out.json" ftype="json"/>
        </test>
        <test> <!-- TEST #6: Test Spec2Vec. -->
            <param name="references" value="similarity/spec2vec/inp_filtered_library.msp" ftype="msp"/>
            <param name="queries" value="similarity/spec2vec/inp_filtered_spectra.msp" ftype="msp"/>
            <conditional name="metric">
                <param name="similarity_metric" value="Spec2Vec"/>
                <param name="model_metadata" value="similarity/spec2vec/model.json" ftype="json"/>
                <param name="model_weights" value="similarity/spec2vec/weights_100.binary" ftype="auto"/>
                <param name="allow_missing_percentage" value="1.0"/>
            </conditional>
            <output name="similarity_scores" file="similarity/scores_test6_out.json" ftype="json" compare="sim_size" delta="1000"/>
        </test>
    </tests>

    <help>
        @HELP_matchms@
    </help>

    <expand macro="citations"/>
</tool>
author	recetox
date	Thu, 12 Oct 2023 13:25:30 +0000
parents	e5010b19d64d
children