Mercurial > repos > recetox > matchms_metadata_merge

<tool id="matchms_metadata_merge" name="matchms metadata merge" version="@TOOL_VERSION@+galaxy4" profile="21.09">
    <description>Merge metadata csv into MSP by a specified column</description>

    <macros>
        <import>macros.xml</import>
        <import>help.xml</import>
    </macros>

    <expand macro="creator"/>

    <edam_operations>
        <edam_operation>operation_2409</edam_operation>
    </edam_operations>
    <expand macro="bio.tools"/>

    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
    </requirements>

    <command detect_errors='aggressive'><![CDATA[
        python3 '${matchms_python_cli}'
    ]]></command>

    <environment_variables>
        <environment_variable name="OPENBLAS_NUM_THREADS">4</environment_variable>
        <environment_variable name="RLIMIT_NPROC">4</environment_variable>
    </environment_variables>

<configfiles>
<configfile name="matchms_python_cli">
import pandas
import matchms
import numpy as np

matchms.set_matchms_logger_level('ERROR')
matchms.Metadata.set_key_replacements({})

spectra = list(matchms.importing.load_from_msp('${spectral_library}', False))

metadata_table = pandas.read_csv('${metadata_table_file}', dtype=object)
metadata_table.columns = map(str.lower, metadata_table.columns)

metadata_table.drop_duplicates(subset='${user_specified_column}'.lower(), inplace=True)

spectra_metadata= pandas.DataFrame.from_dict([x.metadata for x in spectra])
spectra_metadata.dropna(axis=1, inplace=True)

merged = metadata_table.merge(spectra_metadata, on='${user_specified_column}'.lower(), how='right')

spectra_arr = np.asarray(spectra, dtype=object)

def update_metadata(spectrum: matchms.Spectrum, row):
    metadata = spectrum.metadata
    metadata.update(row)
    spectrum.metadata = metadata
    return spectrum

vec_update_metadata = np.vectorize(update_metadata)
merged_array = vec_update_metadata(spectra_arr, merged.to_dict(orient='records'))

matchms.exporting.save_as_msp(merged_array.tolist(), '${output}')
</configfile>
</configfiles>

    <inputs>
        <param label="Spectra file" name="spectral_library" type="data" format="msp"
            help="Mass spectral library file." />
        <param label="Metadata csv file" name="metadata_table_file" type="data" format="csv"
            help="csv file containing the metadata." />

        <param label="specify column/metadata key" name="user_specified_column" type="text" value="compound_name" help="Name of the user specified column to merge the data on." />
    </inputs>

    <outputs>
        <data label="${tool.name} on ${on_string}" name="output" format="msp">
        </data>
    </outputs>

    <tests>
        <test>
            <param name="spectral_library" value="metadata_merge/input.msp" ftype="msp"/>
            <param name="metadata_table_file" value="metadata_merge/metadata.csv" ftype="csv"/>
            <param name="user_specified_column" value="name"/>
            <output name="output" file="metadata_merge/output.msp" ftype="msp"/>
        </test>
    </tests>

    <help>
        **Description**
            The tool takes an msp file and a metadata csv file and merges the metadata in the csv
            file with the metadata in the MSP file on a user specified column.
    </help>

    <citations>
        <citation type="doi">https://doi.org/10.5281/zenodo.8083373</citation>
    </citations>
</tool>
author	recetox
date	Mon, 04 Dec 2023 13:42:59 +0000
parents	caf007467c84
children	6e965d099233