diff rgcca.xml @ 0:067d45e6caa9 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit 00f9e92845737e05a4afb1c93043f35b7e4ea771"
author iuc
date Tue, 12 Jan 2021 10:12:04 +0000
children 4e73ea176c34
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rgcca.xml	Tue Jan 12 10:12:04 2021 +0000
@@ -0,0 +1,359 @@
+<tool id="rgcca" name="RGCCA" version="@TOOL_VERSION@+galaxy0">
+    <description>performs multiblock data analysis of several sets of variables (blocks) observed on the same group of individuals.</description>
+    <macros>
+        <import>macro.xml</import>
+    </macros>
+    <edam_topics>
+        <edam_topic>topic_2269</edam_topic>
+    </edam_topics>
+    <edam_operations>
+        <edam_operation>operation_2945</edam_operation>
+        <edam_operation>operation_3465</edam_operation>
+        <edam_operation>operation_0337</edam_operation>
+    </edam_operations>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">rgccacmd</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #set data_paths = ",".join([str(_.file_name) for _ in $blocks])
+        #set data_names = ",".join([str(_.element_identifier).replace(',', '_') for _ in $blocks])
+        Rscript '$__tool_directory__/launcher.R'
+            --datasets '${data_paths}'
+            --names '${data_names}'
+            --o1 '$individual_plot' --o2 '$corcircle' --o3 '$top_variables' --o4 '$ave' --o5 '$design' --o6 '$individual_table' --o7 '$variable_table' --o8 '$rdata'
+            $parse.header
+            --separator $parse.separator
+            $analyse.superblock
+            $analyse.scale
+            #if $analyse.tau.bool == 'false'
+            --penalty $analyse.tau.value
+            #else
+            --penalty $analyse.tau.bool
+            #end if
+            --ncomp $analyse.ncomp
+            --scheme $analyse.scheme
+            #if $analyse.method.family == '1'
+            --type pca
+            #else
+            --type $analyse.method.type
+            #end if
+            #if $analyse.connection
+            --connection $analyse.connection
+            #end if
+            #if $analyse.supervised.learning_mode == 'supervised'
+            --response $analyse.supervised.block_response
+            #end if
+            #if $graphic.response
+            --group $graphic.response
+            #end if
+            --compx $graphic.compx
+            --compy $graphic.compy
+            --nmark $graphic.nmark
+            $graphic.text
+            --block $graphic.blockx
+            --block_y $graphic.blocky
+    ]]></command>
+    <inputs>
+        <param name="blocks" type="data" format="tsv,tabular,txt,csv" multiple="true" optional="false" label = "Load blocks"
+            help="TSV file containing a matrix with: (i) quantitative values only (decimal should be separated by '.'), (ii) the samples in lines (should be labelled in the 1rst column) and (iii) variables in columns (should have a header)."/>
+        <section name="parse" title="Advanced parsing" help="By default, on tabulated files with a header.">
+            <param name="header" type="boolean" truevalue="" falsevalue="-H" checked="true" label="Consider the first row as header of columns" help="Used for both blocks and color files."/>
+            <param name="separator" type="select" display="radio" label="Column separator" help="Character used to separate the column (for all blocks, connection and color files).">
+                <option value="1" selected="true">Tabulation</option>
+                <option value="2">Semicolon</option>
+            </param>
+        </section>
+        <section name="analyse" title="Advanced analysis"
+            help="By default, the analysis: is a Regularised Generalised Canonical Correlation Analysis, scales the blocks, uses a superblock with a factorial scheme function, a tau equals to one and two components for each block.">
+            <param name="ncomp" type="integer" label="Number of component" value="2" min="2" max="5"
+                help="The number of component to use in the analysis for each block (should not be greater than the minimum number of variable among the blocks)."/>
+            <param name="scale" type="boolean" truevalue="" falsevalue="--scale" checked="true" label="Scale the blocks"
+                help="A data centering step is always performed. If activated, each block is normalised and divided by the square root of its number of variables."/>
+            <conditional name="method">
+                <param name="family" type="select" label="Analysis method">
+                    <option value="1">One block</option>
+                    <option value="2">Two blocks</option>
+                    <option value="m" selected="true">Multiple blocks</option>
+                    <option value="ms">Multiple blocks with superblock</option>
+                </param>
+                <when value="2">
+                    <param name="type" type="select" label=" ">
+                        <option value="pls">Partial Least Squares Regression</option>
+                        <option value="cca">Canonical Correlation Analysis</option>
+                        <option value="ifa">Interbattery Factor Analysis</option>
+                        <option value="ra">Redundancy analysis</option>
+                    </param>
+                </when>
+                <when value="m">
+                    <param name="type" type="select" label=" ">
+                        <option value="rgcca">Regularized Generalized CCA</option>
+                        <option value="sgcca">Sparse Generalized CCA</option>
+                        <option value="sumcor">SUM of CORrelations method</option>
+                        <option value="ssqcor">Sum of SQuared CORrelations method</option>
+                        <option value="sabscor">Sum of ABSolute value CORrelations method</option>
+                        <option value="sumcov">SUM of COVariances method</option>
+                        <option value="ssqcov">Sum of SQuared COVariances method</option>
+                        <option value="sabscov">Sum of ABSolute value COVariances method</option>
+                        <option value="maxbet">MAXBET</option>
+                        <option value="maxbet-b">MAXBET-B</option>
+                    </param>
+                </when>
+                <when value="ms">
+                    <param name="type" type="select" label=" ">
+                        <option value="gcca">Generalized CCA</option>
+                        <option value="hpca">Hierarchical PCA</option>
+                        <option value="mfa">Multiple Factor Analysis</option>
+                    </param>
+                </when>
+                <when value="1"/>
+            </conditional>
+            <param name="connection" optional="true" type="data" format="tsv,tabular,txt,csv" label="Load the design matrix (if superblock or supervised disabled)"
+                help="TSV file without header and without row names. This file describes the connections between the blocks. It should contain 1 (if two blocks are related) or 0 values otherwise. The columns are separated by tabulations. It is a symmetric matrix with the same dimension as the number of blocks."/>
+            <param name="superblock" type="boolean" truevalue="" falsevalue="--superblock" checked="true" label="Use a superblock"
+                help="A block defined as the concatenation of all the other blocks. The space spanned by global components is viewed as a compromise space that integrated all the modalities and facilitates the visualization of the results and their interpretation. If disabled, all blocks are assumed to be connected or a connection file could be used."/>
+            <conditional name="supervised">
+                <param name="learning_mode" type="select" display="radio" label="Learning mode">
+                    <option value="unsupervised">Unsupervised</option>
+                    <option value="supervised">Supervised</option>
+                </param>
+                <when value="supervised">
+                    <param name="block_response" type="integer" value="1" min="1" max="10" label="Use a block as response (supervised analysis)" help="@BLOCK_RULES@ By default, the first block is selected."/>
+                </when>
+                <when value="unsupervised"/>
+            </conditional>
+            <conditional name="tau">
+                <param name="bool" type="select" display="radio" label="Tau selection"
+                    help="For RGCCA, a regularization parameter for each block (i.e., tau) [default: 1]. Tau varies from 0 (maximizing the correlation) to 1 (maximizing the covariance). For SGCCA, tau is automatically set to 1 and a shrinkage parameter can be defined instead for automatic variable selection, varying from the square root of the variable number (the fewest selected variables) to 1 (all the variables are included).">
+                    <option value="false">Manual</option>
+                    <option value="optimal">Optimal</option>
+                </param>
+                <when value="false">
+                    <param name="value" type="float" label=" " value="1" min="0" max="1"/>
+                </when>
+                <when value="optimal"/>
+            </conditional>
+            <param name="scheme" type="select" label="Scheme function" help="Link (i.e. scheme) function for covariance maximization is calculated with: the identity function (horst scheme),
+the absolute values (centroid scheme), the squared values (factorial scheme). Only, the horst scheme penalizes structural
+negative correlation. The factorial scheme discriminates more strongly the blocks than the centroid one.">
+                <option value="1">Horst : f(x)</option>
+                <option value="2" selected="true">Factorial : f(x)^2</option>
+                <option value="3">Centroid : f|x|</option>
+                <option value="4">Other: f(x)^4</option>
+            </param>
+        </section>
+        <section name="graphic" title="Advanced graphic" help="By default, the x-axis and y-axis are respectively the first and the second components, the number of top variables is 100 and a superblock is used.">
+            <param name="response" optional="true" type="data" format="tsv,tabular,txt,csv" label="Color the individual plot with a response variable"
+                help="A TSV file containing either: (i) an only column with a qualitative or a quantitative variable; (ii) multiple columns corresponding to a disjunctive table."/>
+            <param name="text" type="boolean" truevalue="" falsevalue="--text" checked="true" label="Display the names of the points (in biplots)"/>
+            <param name="compx" type="integer" label="Component for the X-axis" help="The component used in the X-axis for biplots and the only component used for top variable plot. @COMP_RULES@" value="1" min="1" max="5"/>
+            <param name="compy" type="integer" label="Component for the Y-axis" help="The component used in the Y-axis for biplots. @COMP_RULES@" value="2" min="1" max="5"/>
+            <param name="blockx" type="integer" value="0" min="0" max="10" label="Visualise this block" help="Block used in the X-axis for individual plot and the only block used for corcircle and top variable plots. @BLOCK_RULES@"/>
+            <param name="blocky" type="integer" value="0" min="0" max="10" label="Visualise this block for the Y-axis (in individual plot)" help="0 corresponds to the superblock (or the last block loaded), @BLOCK_RULES@ By default, the superblock is selected."/>
+            <param name="nmark" type="integer" label="Number of top variables" value="100" min="10" max="300"/>
+        </section>
+        <param name="output_selector" type="select" multiple="true" label="Outputs">
+            <option value="individuals" selected="true">Individual plot</option>
+            <option value="corcircle" selected = "true">Corcircle plot</option>
+            <option value="top_variables">Top variables plot</option>
+            <option value="ave">Averages plot</option>
+            <option value="design">Design plot</option>
+            <option value="individual_table" selected="true">Individual table</option>
+            <option value="variable_table" selected="true">Variable table</option>
+            <option value="rdata">RData file</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="individual_plot" label="${tool.name} on ${on_string}: individuals.pdf" format="pdf">
+            <filter>"individuals" in output_selector</filter>
+        </data>
+        <data name="corcircle" label="${tool.name} on ${on_string}: corcircle.pdf" format="pdf">
+            <filter>"corcircle" in output_selector</filter>
+        </data>
+        <data name="top_variables"  label="${tool.name} on ${on_string}: top_variables.pdf" format="pdf">
+            <filter>"top_variables" in output_selector</filter>
+        </data>
+        <data name="ave"  label="${tool.name} on ${on_string}: ave.pdf" format="pdf">
+            <filter>"ave" in output_selector</filter>
+        </data>
+        <data name="design"  label="${tool.name} on ${on_string}: design.pdf" format="pdf">
+            <filter>"design" in output_selector</filter>
+        </data>
+        <data name="individual_table"  label="${tool.name} on ${on_string}: individuals.tsv" format="tsv">
+            <filter>"individual_table" in output_selector</filter>
+        </data>
+        <data name="variable_table"  label="${tool.name} on ${on_string}: variables.tsv" format="tsv">
+            <filter>"variable_table" in output_selector</filter>
+        </data>
+        <data name="rdata"  label="${tool.name} on ${on_string}: rgcca.result.RData" format="rdata">
+            <filter>"rdata" in output_selector</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="1block"/>
+            <param name="blocks" value="agriculture.tsv" ftype = "tsv"/>
+            <output name="individual_table">
+                <assert_contents>
+                    <has_n_columns n="4"/>
+                    <has_line_matching
+                            expression='"agriculture.axis1"\s"agriculture.axis2"\s"superblock.axis1"\s"superblock.axis2"'/>
+                    <has_line_matching
+                            expression='^.+(\s\-?\d+.\d+){4}$'/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="3blocks_connection"/>
+            <expand macro="output_tests_3blocks"/>
+            <section name="analyse">
+                <param name="connection" value="connection.tsv" ftype = "tsv"/>
+                <param name="superblock" value="false"/>
+            </section>
+            <assert_command>
+                <has_text text="-connection"/>
+                <has_text text="--superblock"/>
+            </assert_command>
+        </test>
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="3blocks_supervised"/>
+            <expand macro="output_tests_3blocks"/>
+            <section name="analyse">
+                <param name="superblock" value="false"/>
+                <conditional name="supervised" >
+                    <param name="learning_mode" value="supervised"/>
+                    <param name="block_response" value="3"/>
+                </conditional>
+            </section>
+            <assert_command>
+                <has_text text="--response 3"/>
+                <has_text text="--superblock"/>
+            </assert_command>
+        </test>
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="3blocks"/>
+            <expand macro="output_tests_3blocks"/>
+        </test>
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="3blocks_sgcca"/>
+            <expand macro="output_tests_3blocks"/>
+            <section name="analyse">
+                <conditional name="method">
+                    <param name="family" value="m"/>
+                    <param name="type" value="sgcca"/>
+                </conditional>
+            </section>
+            <assert_command>
+                <has_text text="sgcca"/>
+            </assert_command>
+        </test>
+        <test expect_num_outputs="8" expect_exit_code="0">
+            <expand macro="output_tests" path="2blocks" compx="3" compy="1"/>
+            <param name="blocks" value="agriculture.tsv,politic.tsv"/>
+            <section name="analyse">
+                <param name="scale" value="false"/>
+                <conditional name="tau">
+                    <param name="bool" value="false"/>
+                    <param name="value" value="0"/>
+                </conditional>
+                <param name="scheme" value="3"/>
+                <param name="ncomp" value="3"/>
+                <conditional name="method">
+                    <param name="family" value="2"/>
+                    <param name="type" value="pls"/>
+                </conditional>
+            </section>
+            <section name="graphic">
+                <param name="response" value="political_system.tsv" ftype = "tsv"/>
+                <param name="text" value="false"/>
+                <param name="compx" value="3"/>
+                <param name="compy" value="1"/>
+                <param name="blockx" value="2"/>
+                <param name="blocky" value="1"/>
+                <param name="nmark" value="11"/>
+            </section>
+            <assert_command>
+                <has_text text="pls"/>
+                <has_text text="--group"/>
+            </assert_command>
+        </test>
+    </tests>
+Etienne CAMENEN
+**R package:**
+The RGCCA package is available from the CRAN repository (https://cran.r-project.org/web/packages/RGCCA).
+A user-friendly multi-blocks analysis (Regularized Generalized Canonical Correlation Analysis, RGCCA) as described in [1] and [2] with all default settings predefined. The software produces figures to explore the analysis' results: individuals and variables projected on two components of the multi-block analysis, list of top variables and explained variance in the model.
+**Working example**
+    | From Russett data (RGCCA package): https://github.com/rgcca-factory/RGCCA/tree/master/inst/extdata
+    | Use *agriculture.tsv* as a block. Add *industry.tsv* and *politic.tsv* as new blocks. *connection.tsv* could be used as a design matrix and *political_system.tsv* as a response variable respectively in analysis and graphic settings.
+- RGCCA: https://cran.r-project.org/web/packages/RGCCA/vignettes/vignette_RGCCA.pdf
+- accepted input / output formats: https://github.com/rgcca-factory/RGCCA#input-files
+<!-- - tutorial: https://github.com/BrainAndSpineInstitute/rgcca_galaxy/blob/release/0.2/README.md-->
+    <citations>
+        <citation type="doi">10.1007/s11336-017-9573-x</citation>
+        <citation type="doi">10.1007/s11336-011-9206-8</citation>
+    </citations>