view matchms_remove_spectra.xml @ 1:c24ebd41dd53 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 2abca9206b76bf42e6322d751ed3a31a6cc003a4
author recetox
date Tue, 04 Jun 2024 07:10:48 +0000
parents 80df426e7e47
children fbe335d27d25
line wrap: on
line source

<tool id="matchms_remove_spectra" name="matchms remove spectra" version="@TOOL_VERSION@+galaxy1" profile="21.09">
    <description>Filters spectra based on metadata presence</description>
    
    <macros>
        <import>macros.xml</import>
    </macros>

    <expand macro="creator"/>

    <edam_operations>
        <edam_operation>operation_3695</edam_operation>
    </edam_operations>
    
    <expand macro="bio.tools"/>

    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        python3 '${filter_spectra}'
    ]]></command>

    <configfiles>
        <configfile name="filter_spectra">
from matchms.exporting import save_as_mgf, save_as_msp
from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import is_valid_inchi, is_valid_smiles, is_valid_inchikey
from matchms.filtering import require_compound_name, require_formula, require_precursor_mz, \
require_retention_time, require_retention_index, require_valid_annotation
from matchms.importing import load_from_mgf, load_from_msp

#set metadata_fields = [str($f) for $f in $metadata_fields]
required_metadata = [x for x in $metadata_fields]
if "$spectra.ext" == "msp":
    spectra = list(load_from_msp("${spectra}"))
elif "$spectra.ext" == 'mgf':
    spectra = list(load_from_mgf("${spectra}"))
else:
    raise ValueError(f'File format {$spectra.ext} not supported for mass spectra file.')

filtered_spectra = []
removed_spectra = []

for spectrum in spectra:
    keep = True

    if 'smiles' in required_metadata:
        keep = keep and is_valid_smiles(spectrum.get('smiles', '').strip())
    
    if 'inchi' in required_metadata:
        keep = keep and is_valid_inchi(spectrum.get('inchi', '').strip())
    
    if 'inchikey' in required_metadata:
        inchikey = spectrum.get('inchikey', '')
        keep = keep and is_valid_inchikey(inchikey.strip())

    if 'precursor_mz' in required_metadata:
        result = require_precursor_mz(spectrum)
        keep = keep and result is not None
    
    if 'valid_annotation' in required_metadata:
        result = require_valid_annotation(spectrum)
        keep = keep and result is not None

    if 'formula' in required_metadata:
        result = require_formula(spectrum)
        keep = keep and result is not None

    if 'compound_name' in required_metadata:
        result = require_compound_name(spectrum)
        keep = keep and result is not None

    if 'retention_time' in required_metadata:
        result = require_retention_time(spectrum)
        keep = keep and result is not None
    
    if 'retention_index' in required_metadata:
        result = require_retention_index(spectrum)
        if result is not None:
            keep = keep and float(result.get('retention_index', 0)) > 0
        else:
            keep = False

    if keep:
        filtered_spectra.append(spectrum)
    
    if "$spectra_removed" == "TRUE" and keep == False:
        removed_spectra.append(spectrum)


if "$spectra.ext" == "msp":
    save_as_msp(filtered_spectra, "${output_filtered}")
    save_as_msp(removed_spectra, "${output_removed}")
else:
    save_as_mgf(filtered_spectra, "${output_filtered}")
    save_as_mgf(removed_spectra, "${output_removed}")

        </configfile>
    </configfiles>

    <inputs>
        <param name="spectra" type="data" format="msp,mgf" label="Input Spectra File" help="Input file containing mass spectra"/>
        <param name="metadata_fields" type="select" multiple="true" label="Metadata Fields" help="Select metadata fields required in the spectra">
            <option value="smiles">SMILES</option>
            <option value="inchi">InChI</option>
            <option value="inchikey">InChIKey</option>
            <option value="formula">Formula</option>
            <option value="retention_time">Retention Time</option>
            <option value="retention_index">Retention Index</option>
            <option value="precursor_mz">Precursor MZ</option>
            <option value="valid_annotation">Valid Annotation</option>
            <option value="compound_name">Compound Name</option>
        </param>
        <param name="spectra_removed" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Output Removed Spectra" help="Output spectra that were removed due to missing metadata"/>
    </inputs>

    <outputs>
        <data name="output_filtered" format_source="spectra" label="${tool.name} on ${on_string}: Filtered Spectra"/>
        <data name="output_removed" format_source="spectra" label="${tool.name} on ${on_string}: Removed Spectra">
            <filter>spectra_removed</filter>
        </data>
    </outputs>

    <tests>
        <test expect_num_outputs="1">
            <param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
            <param name="metadata_fields" value="smiles"/>
            <output name="output_filtered" file="remove_spectra/require_smiles.msp" ftype="msp"/>
        </test>
        <test expect_num_outputs="2">
            <param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
            <param name="metadata_fields" value="inchi"/>
            <param name="spectra_removed" value="TRUE"/>
            <output name="output_filtered" file="remove_spectra/require_inchi.msp" ftype="msp"/>
            <output name="output_removed" file="remove_spectra/require_inchi_removed_spectra.msp" ftype="msp"/>
        </test>
        <test expect_num_outputs="1">
            <param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
            <param name="metadata_fields" value="inchikey"/>
            <output name="output_filtered" file="remove_spectra/require_inchikey.msp" ftype="msp"/>
        </test>
        <test expect_num_outputs="1">
            <param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
            <param name="metadata_fields" value="formula"/>
            <output name="output_filtered" file="remove_spectra/require_formula.msp" ftype="msp"/>
        </test>
        <test expect_num_outputs="1">
            <param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
            <param name="metadata_fields" value="retention_time"/>
            <output name="output_filtered" file="remove_spectra/require_retention_time.msp" ftype="msp"/>
        </test>
        <test expect_num_outputs="1">
            <param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
            <param name="metadata_fields" value="retention_index"/>
            <output name="output_filtered" file="remove_spectra/require_retention_index.msp" ftype="msp"/>
        </test>
        <test expect_num_outputs="1">
            <param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
            <param name="metadata_fields" value="precursor_mz"/>
            <output name="output_filtered" file="remove_spectra/require_precursor_mz.msp" ftype="msp"/>
        </test>
        <test expect_num_outputs="1">
            <param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
            <param name="metadata_fields" value="compound_name"/>
            <output name="output_filtered" file="remove_spectra/require_compound_name.msp" ftype="msp"/>
        </test>
        <test expect_num_outputs="1">
            <param name="spectra" value="remove_spectra/require_filter.msp" ftype="msp"/>
            <param name="metadata_fields" value="valid_annotation"/>
            <output name="output_filtered">
                <assert_contents>
                    <has_n_lines n="0"/>
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[
        This tool filters input mass spectra based on the presence of specified metadata fields. Spectra missing any of the selected metadata fields are optionally logged and output separately.
        Valid Annotation filter removes spectra that are not fully annotated (correct and matching, smiles, inchi and inchikey)
    ]]></help>

    <expand macro="citations"/>
</tool>