view msnbase_readmsdata.xml @ 15:7faf9b2d83f6 draft

planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics/ commit 2cb157bd9a8701a3d6874e084032cbd050b8953e
author workflow4metabolomics
date Mon, 11 Sep 2023 09:24:51 +0000
parents 11ab2081bd4a
children 12def6edac2f
line wrap: on
line source

<tool id="msnbase_readmsdata" name="MSnbase readMSData" version="@WRAPPER_VERSION@+galaxy1">
    <description>Imports mass-spectrometry data files</description>

    <macros>
        <import>macros.xml</import>
        <import>macros_msnbase.xml</import>
    </macros>

    <expand macro="requirements"/>
    <expand macro="stdio"/>

    <command><![CDATA[
        @COMMAND_RSCRIPT@/msnbase_readmsdata.r

        #if $input.is_of_type("mzxml") or $input.is_of_type("mzml") or $input.is_of_type("mzdata") or $input.is_of_type("netcdf"):
            #if $os.path.islink(str($input))
                #set $source = $os.readlink(str($input))
                singlefile_galaxyPath '$source' singlefile_sampleName '$os.path.basename($source)'
            #else
                singlefile_galaxyPath '$input' singlefile_sampleName '$input.element_identifier'
            #end if
        #else
            zipfile '$input'
        #end if

        @COMMAND_LOG_EXIT@
    ]]></command>

    <inputs>

        <param name="input" type="data" format="mzxml,mzml,mzdata,netcdf,zip" label="File(s) from your history containing your chromatograms" help="Single file mode for the following formats: mzxml, mzml, mzdata and netcdf. Zip file mode for the following formats: zip. See the help section below." />

    </inputs>

    <outputs>
        <data name="xsetRData" format="rdata.msnbase.raw" label="${input.name.rsplit('.',1)[0]}.raw.RData" from_work_dir="readmsdata.RData" />
        <data name="sampleMetadata" format="tabular" label="${input.name.rsplit('.',1)[0]}.sampleMetadata.tsv" from_work_dir="sampleMetadata.tsv" >
            <filter>input.extension not in ["mzxml","mzml","mzdata","netcdf"]</filter>
        </data>
    </outputs>

    <tests>

        <test expect_num_outputs="2">
            <param name="input" value="faahKO_reduce.zip"  ftype="zip" />
            <assert_stdout>
                <has_text text="rowNames: faahKO_reduce/KO/ko15.CDF faahKO_reduce/KO/ko16.CDF" />
                <has_text text="faahKO_reduce/WT/wt15.CDF faahKO_reduce/WT/wt16.CDF" />
                <has_text text="featureNames: F1.S0001 F1.S0002 ... F4.S1278 (5112 total)" />
                <has_text text="fvarLabels: fileIdx spIdx ... spectrum (33 total)" />
                <has_text text="faahKO_reduce/KO/ko15.CDF        ko15           KO" />
                <has_text text="faahKO_reduce/KO/ko16.CDF        ko16           KO" />
                <has_text text="faahKO_reduce/WT/wt15.CDF        wt15           WT" />
                <has_text text="faahKO_reduce/WT/wt16.CDF        wt16           WT" />
            </assert_stdout>
            <output name="sampleMetadata" value="sampleMetadata.tsv" />
        </test>
        <test expect_num_outputs="1">
            <param name="input" value="ko15.CDF"  ftype="netcdf" />
            <assert_stdout>
                <has_text text="rowNames: ko15.CDF" />
                <has_text text="ko15.CDF" />
                <has_text text="featureNames: F1.S0001 F1.S0002 ... F1.S1278 (1278 total)" />
                <has_text text="fvarLabels: fileIdx spIdx ... spectrum (33 total)" />
                <has_text text="ko15.CDF        ko15            ." />
            </assert_stdout>
        </test>
        <!-- DISABLE FOR TRAVIS
        Useful to generate test-data for the further steps
        <test>
            <param name="input" value="ko16.CDF"  ftype="netcdf" />
            <assert_stdout>
                <has_text text="rowNames: ./ko16.CDF" />
                <has_text text="ko16.CDF" />
                <has_text text="./ko16.CDF        ko16            ." />
            </assert_stdout>
        </test>
        <test>
            <param name="input" value="wt15.CDF"  ftype="netcdf" />
            <assert_stdout>
                <has_text text="rowNames: ./wt15.CDF" />
                <has_text text="wt15.CDF" />
                <has_text text="./wt15.CDF        wt15            ." />
            </assert_stdout>
        </test>
        <test>
            <param name="input" value="wt16.CDF"  ftype="netcdf" />
            <assert_stdout>
                <has_text text="rowNames: ./wt16.CDF" />
                <has_text text="wt16.CDF" />
                <has_text text="./wt16.CDF        wt16            ." />
            </assert_stdout>
        </test>
        -->
    </tests>

    <help><![CDATA[

@HELP_AUTHORS@

==================
MSnbase readMSData
==================

-----------
Description
-----------

Reads as set of XML-based mass-spectrometry data files and
generates an MSnExp object. This function uses the functionality
provided by the ‘mzR’ package to access data and meta data in
‘mzData’, ‘mzXML’ and ‘mzML’.

.. _xcms: https://bioconductor.org/packages/release/bioc/html/xcms.html


-----------------
Workflow position
-----------------

**Upstream tools**

========================= ==========================================
Name                      Format
========================= ==========================================
Upload File               mzxml,mzml,mzdata,netcdf,zip
========================= ==========================================

The easier way to process is to create a Dataset Collection of the type List

**Downstream tools**

=========================== ==================== ====================
Name                        Output file          Format
=========================== ==================== ====================
xcms.findChromPeaks         ``*``.raw.RData      rdata.msnbase.raw
=========================== ==================== ====================



**Example of a metabolomic workflow**

.. image:: msnbase_readmsdata_workflow.png

---------------------------------------------------



-----------
Input files
-----------

=========================== ==================================
Parameter : num + label     Format
=========================== ==================================
OR : Zip file               zip
--------------------------- ----------------------------------
OR : Single file            mzXML, mzML, mzData, netCDF
=========================== ==================================

**Choose your inputs**

You have two methods for your inputs:

    | Single file (recommended): You can put a single file as input. That way, you will be able to launch several readMSData and findChromPeaks in parallel and use "findChromPeaks Merger" before groupChromPeaks.
    | Zip file: You can put a zip file containing your inputs: myinputs.zip (containing all your conditions as sub-directories).

Zip file: Steps for creating the zip file
-----------------------------------------

**Step1: Creating your directory and hierarchize the subdirectories**


VERY IMPORTANT: If you zip your files under Windows, you must use the 7Zip_ software, otherwise your zip will not be well unzipped on the W4M platform (corrupted zip bug).

.. _7Zip: http://www.7-zip.org/

Your zip should contain all your conditions as sub-directories. For example, two conditions (mutant and wild):
arabidopsis/wild/01.raw
arabidopsis/mutant/01.raw

**Step2: Creating a zip file**

Create your zip file (*e.g.* arabidopsis.zip).

**Step 3 : Uploading it to our Galaxy server**

Advices for converting your files into mzXML format (XCMS input)
----------------------------------------------------------------

We recommend you to convert your raw files into **mzXML** in centroid mode (smaller files); this way the files will be compatible with the xmcs centWave algorithm.

**We recommend you the following parameters:**

Use Filtering: **True**

Use Peak Picking: **True**

Peak Peaking -Apply to MS Levels: **All Levels (1-)** : Centroid Mode

Use zlib: **64**

Binary Encoding: **64**

m/z Encoding: **64**

Intensity Encoding: **64**


------------
Output files
------------

xset.RData: rdata.msnbase.raw format

    | Rdata file that is necessary in the second step of the workflow "xcms.findChromPeaks".

sampleMetadata.tsv (only when a zip is used)

    | Tabular file that contains for each sample its associated class and polarity (positive,negative and mixed).
    | This file is necessary in further steps of the workflow, as the Anova and PCA steps for example.
    | You get a sampleMetadata.tsv only if you use a zip. Otherwise, you have to provide one for the findChromPeaks Merger step.

---------------------------------------------------

Changelog/News
--------------

.. _News: https://lgatto.github.io/MSnbase/news/index.html

**Version 2.16.1+galaxy0 - 08/04/2019**

- UPGRADE: upgrade the MSnbase version from 2.8.2 to 2.16.1 (see MSnbase News_). Almost all the new features may not concern our usage of MSnbase.

**Version 2.8.2.1 - 30/04/2019**

- BUGFIX: remove the pre-compute of the chromatograms which was memory consuming. Now, only xcms plot chromatogram will generate the Chromatograms.

**Version 2.8.2.0 - 08/04/2019**

- UPGRADE: upgrade the MSnbase version from 2.4.0 to 2.8.2 (see MSnbase News_). Almost all the new features may not concern our usage of MSnbase.

**Version 2.4.0.0 - 29/03/2018**

- NEW: a new dedicated tool to read the raw data. This function was previously included in xcms.findChromPeaks. This way, you will now be able to display TICs and BPCs before xcms.findChromPeaks.

    ]]></help>

    <expand macro="citation" />
</tool>