view mixomics_blocksplsda.xml @ 5:88c1fd2ac110 draft default tip

"planemo upload for repository https://gitlab.com/bilille/galaxy-viscorvar commit 21d09ff286a496ff475f32626d88dd42423ae663"
author ppericard
date Tue, 07 Sep 2021 10:40:08 +0000
parents d4e9f7546dfa
children
line wrap: on
line source

<tool id="mixomics_blocksplsda" name="mixOmics block.splsda" version="@TOOL_VERSION@+galaxy0" profile="16.04" workflow_compatible="true">

    <description>performs N-integration and feature selection with Projection to Latent Structures models (PLS) with sparse Discriminant Analysis</description>

    <macros>
        <import>macros.xml</import>
        <import>macros_mixomics.xml</import>
    </macros>

    <expand macro="requirements"/>
    <expand macro="stdio"/>

    <command detect_errors="aggressive"><![CDATA[

        mkdir outdir
        && @COMMAND_RSCRIPT@/mixomics_blocksplsda.R

        #for $b in $blocks
            --block
            #if $b.block_name
                ${b.block_name}
            #else
                ${b.data_matrix.name}
            #end if
            ${b.keep}
            ${b.data_matrix}
            ${b.variable_metadata}
        #end for

        --sample_metadata_in ${sample_metadata_in}
        --sample_description_col ${sample_description_col}

        --ncomp ${adv.ncomp}
        ${correlation}
        ##--scheme ${adv.scheme}
        --maxiter ${adv.maxiter}
        ##${adv.scale}
        --scale ## always activated hidden param
        ${adv.check_missing_values}
        ##--init ${adv.init}
        ##--tol ${adv.tol}
        ##${adv.nearzerovar}

        --rdata_out ${rdata_out}
        ##--sample_metadata_out ${sample_metadata_out}
        --variable_metadata_outdir outdir

    ]]></command>

    <inputs>
        <repeat name="blocks" title="Blocks">
            <param name="block_name" type="text" label="Block name" />
            <param name="keep" type="integer" value="0" min="0"
                   label="Number of variables to select for each component"
                   help="estimation of the number of variables of the block correlated with variables of the other blocks and correlated with response variables. If set to 0, all variables will be selected" />
            <param name="data_matrix" type="data" format="tabular"
                   label="Data matrix"
                   help="data matrix contains the values of the variables. For the file structure, see below in the section Input files" />
            <param name="variable_metadata" type="data" format="tabular" optional="true"
                   label="Variables metadata [optional]"
                   help="variables metadata contains the metadata of the variables. For the file structure, see below in the section Input files" />
        </repeat>
        <param name="sample_metadata_in" type="data" format="tabular"
               label="Samples metadata"
               help="samples metadata contains the metadata of the samples. For the file structure, see below in the section Global input files" />
        <param name="sample_description_col" type="integer" value="0" min="0"
               label="Samples groups column number"
               help="column from the samples metadata file containing samples groups. If set to 0, the last column will be used" />
        <param name="correlation" type="boolean" truevalue="--correlation"
               falsevalue="" checked="false"
               label="Correlation between all blocks"
               help="if set to Yes, data integration will take into account correlations between all the blocks. If set to No, data integration will only take into account correlations between the blocks and the response" />
        <section name="adv" title="Advanced Options" expanded="false">
            <param name="ncomp" type="integer" value="2" min="1"
                   label="Number of components to include in the model"
                   help="number of new variables (components) computed by the data integration" />
            <!-- <param name="scheme" type="select" label="Scheme">
                <option value="horst" selected="true">horst</option>
                <option value="factorial"            >factorial</option>
                <option value="centroid"             >centroid</option>
            </param> -->
            <param name="maxiter" type="integer" value="100" min="1"
                   label="Maximum number of iterations"
                   help="maximum number of iterations performed by block.splsda" />
            <!-- <param name="scale" type="boolean" truevalue="-\-scale" falsevalue="" checked="true"
                   label="Scale"
                   help="if checked, each block is standardized to zero means and unit variances" /> -->
            <param name="check_missing_values" type="boolean" truevalue="-\-check_missing_values" falsevalue="" checked="true"
                   label="Check for missing values"
                   help="will raise an error if missing values are found in data matrices" />
            <!-- <param name="init" type="select" label="Init">
                <option value="svd" selected="true">svd</option>
                <option value="svd.single"         >svd.single</option>
            </param>
            <param name="tol" type="float" value="1e-06" min="0"
                   label="Convergence stopping value"
                   help="[tol]" />
            <param name="nearzerovar" type="boolean" truevalue="-\-nearzerovar" falsevalue="" checked="true"
                   label="Should be set to TRUE in particular for data with many zero values" /> -->
        </section>
    </inputs>

    <outputs>
        <data name="rdata_out" format="rdata" label="${tool.name}_output.RData" />
        <!-- <data name="sample_metadata_out" format="tabular"
              label="${tool.name}_${sample_metadata_in.name}" /> -->
        <collection name="blocks_output" type="list" label="${tool.name}_blocks_output">
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tsv"
                               directory="outdir" format="tabular" />
        </collection>
    </outputs>

    <tests>
        <test>
            <repeat name="blocks">
                <param name="block_name" value="Block1" />
                <param name="data_matrix" value="in_block1_data.tabular" />
            </repeat>
            <repeat name="blocks">
                <param name="block_name" value="Block2" />
                <param name="data_matrix" value="in_block2_data.tabular" />
            </repeat>
            <param name="sample_metadata_in" value="in_sample_meta.tabular" />
            <!-- <param name="correlation" value=true /> -->
            <output name="rdata_out" value="mixomics_blocksplsda_output.rdata" />
            <!-- <output name="sample_metadata_out" value="out_sample_meta.tabular" /> -->
        </test>
    </tests>

    <help><![CDATA[

@HELP_AUTHORS@

======================
mixOmics blocks.splsda
======================

-----------
Description
-----------

The blocks.splsda function is part of the mixOmics package for exploration and integration of omics datasets.
This data integration takes as input parameters different omics datasets
(transcriptomics, metabolomics, metagenomics, ...) and a response variable (e.g. for a sample, the value of the response
variable is equal to « Treated »  or « Control »). This data integration is performed fo the scheme "horst" and the mode "regression". This data integration returns, for each omics dataset, variables
which are correlated with the variables of the other omic datasets and the response variable. The other functions of
this pipeline allow visualizing this correlated variables thanks to correlation circles and networks.

-----------------
Workflow position
-----------------

**Downstream tools**

======================= ================================== ==========
Name                               Output file               Format
======================= ================================== ==========
mixOmics.plotIndiv      mixomics_blocksplsda_output.RData  rdata
----------------------- ---------------------------------- ----------
mixOmics.plotVar        mixomics_blocksplsda_output.RData  rdata
----------------------- ---------------------------------- ----------
visCorVar.matCorAddVar  mixomics_blocksplsda_output.RData  rdata
======================= ================================== ==========

---------------------------------------------------

-----------
Input files
-----------

For each block (min 2 blocks):
------------------------------

+------------------------------+------------+
| Parameter : num + label      |   Format   |
+==============================+============+
| 1 : Data matrix              |   tabular  |
+------------------------------+------------+
| 2 : [opt] Variables metadata |   tabular  |
+------------------------------+------------+

1. Data matrix structure
The data matrix is in tabular format (.tsv).
The first column contains the variables names.
The first row contains the samples names.
Samples names must be in the same order for all blocks and the sample metadata (transposed). The data must not contain missing values.

2. Variables metadata structure
The variables metadata is in tabular format (.tsv).
The first colum contains the variables names.
The first row contains the metadata column names.
The number of rows in the metadata file must be the same than the number of rows in the block data file, and the variables need to be in the same order. If a metadata file is provided, block.splsda output will be appended as new columns, otherwise a new file will be created.


Global input files:
-------------------

+-----------------------------+------------+
| Parameter : num + label     |   Format   |
+=============================+============+
| 1 : Samples metadata        |   tabular  |
+-----------------------------+------------+

1. Samples metadata structure
Samples metadata is in tabular format (.tsv).
The first column contains the sample names.
The first row contains the metadata column names.
Samples names must be in the same order in samples metadata (transposed) and all the blocks. One of the column (the last by default) must contain the samples groups for data integration.

----------
Parameters
----------

Number of variables to select for each component
   tune.block.splsda can be used to set the number of variables to select on each component

@HELP_MANUAL@

------------
Output files
------------

mixomics_blocksplsda_output.RData
    | RData output
    | Contains the `mixomics_result` R object containing the result of the block.splsda function

mixomics_blocksplsda_blocks_output
    A collection with the variable metadata output files (mixomics_blocksplsda_block_{block name}_variable_metadata) for each input block

    ]]></help>

    <expand macro="citations" />

</tool>