changeset 4:d4e9f7546dfa draft

"planemo upload for repository https://gitlab.com/bilille/galaxy-viscorvar commit 579dc54316e8ede493f86f434a87d3d7b692b023"
author ppericard
date Tue, 17 Nov 2020 13:01:44 +0000
parents df8428358b7f
children 88c1fd2ac110
files macros.xml macros_viscorvar.xml mixomics_blocksplsda.xml mixomics_plotindiv.xml mixomics_plotvar.xml test-data/mixomics_blocksplsda_output.rdata viscorvar_circlecor.xml viscorvar_computematsimilarity.xml viscorvar_matcoraddvar.xml viscorvar_networkvar.xml
diffstat 10 files changed, 90 insertions(+), 74 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Fri Oct 23 11:26:18 2020 +0000
+++ b/macros.xml	Tue Nov 17 13:01:44 2020 +0000
@@ -9,13 +9,6 @@
     <!-- COMMAND -->
     <token name="@COMMAND_RSCRIPT@">LC_ALL=C Rscript $__tool_directory__/</token>
 
-    <token name="@COMMAND_LOG_EXIT@">
-        ;
-        return=\$?;
-        cat 'log.txt';
-        sh -c "exit \$return"
-    </token>
-
     <token name="@HELP_AUTHORS_WRAPPERS@">
 
 .. class:: infomark
@@ -26,4 +19,4 @@
 
     </token>
 
-</macros>
\ No newline at end of file
+</macros>
--- a/macros_viscorvar.xml	Fri Oct 23 11:26:18 2020 +0000
+++ b/macros_viscorvar.xml	Tue Nov 17 13:01:44 2020 +0000
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <macros>
 
-    <token name="@TOOL_VERSION@">0.6</token>
+    <token name="@TOOL_VERSION@">0.7</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">r-viscorvar</requirement>
--- a/mixomics_blocksplsda.xml	Fri Oct 23 11:26:18 2020 +0000
+++ b/mixomics_blocksplsda.xml	Tue Nov 17 13:01:44 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="mixomics_blocksplsda" name="mixOmics block.splsda" version="@TOOL_VERSION@+galaxy1" profile="16.04" workflow_compatible="true">
+<tool id="mixomics_blocksplsda" name="mixOmics block.splsda" version="@TOOL_VERSION@+galaxy2" profile="16.04" workflow_compatible="true">
 
     <description>performs N-integration and feature selection with Projection to Latent Structures models (PLS) with sparse Discriminant Analysis</description>
 
@@ -46,8 +46,6 @@
         ##--sample_metadata_out ${sample_metadata_out}
         --variable_metadata_outdir outdir
 
-        @COMMAND_LOG_EXIT@
-
     ]]></command>
 
     <inputs>
@@ -55,28 +53,28 @@
             <param name="block_name" type="text" label="Block name" />
             <param name="keep" type="integer" value="0" min="0"
                    label="Number of variables to select for each component"
-                   help="[keep] Estimation of the number of variables in the block correlated with variables from the other blocks and correlated with response variables. If set to 0, all variables will be selected." />
+                   help="estimation of the number of variables of the block correlated with variables of the other blocks and correlated with response variables. If set to 0, all variables will be selected" />
             <param name="data_matrix" type="data" format="tabular"
                    label="Data matrix"
-                   help="Block data in tabular format (rows = variables, columns = samples). The first column contains the variables names and the first row contains the samples names. Samples names must be in the same order for all blocks and the sample metadata (transposed). The data must not contain missing values." />
+                   help="data matrix contains the values of the variables. For the file structure, see below in the section Input files" />
             <param name="variable_metadata" type="data" format="tabular" optional="true"
                    label="Variables metadata [optional]"
-                   help="Variables metadata in tabular format (rows = variables). The first colum contains the variables names. The first row contains the metadata column names. The number of rows in the metadata file must be the same than the number of rows in the block data file, and the variables need to be in the same order. If a metadata file is provided, block.splsda output will be appended as new columns, otherwise a new file will be created." />
+                   help="variables metadata contains the metadata of the variables. For the file structure, see below in the section Input files" />
         </repeat>
         <param name="sample_metadata_in" type="data" format="tabular"
                label="Samples metadata"
-               help="Samples metadata in tabular format (rows = samples). The first column contains the sample names. The first row contains the metadata column names. Samples names must be in the same order (transposed) than all the blocks. One of the column (the last by default) must contain the samples groups for integration." />
+               help="samples metadata contains the metadata of the samples. For the file structure, see below in the section Global input files" />
         <param name="sample_description_col" type="integer" value="0" min="0"
                label="Samples groups column number"
-               help="Column from the samples metadata file containing samples groups. If set to 0, the last column will be used." />
+               help="column from the samples metadata file containing samples groups. If set to 0, the last column will be used" />
         <param name="correlation" type="boolean" truevalue="--correlation"
                falsevalue="" checked="false"
                label="Correlation between all blocks"
-               help="[design] If set to Yes, data integration will take into account correlations between all the blocks (design matrix with diagonal coefficients set to 0 and the rest of the coefficients set to 1)." />
+               help="if set to Yes, data integration will take into account correlations between all the blocks. If set to No, data integration will only take into account correlations between the blocks and the response" />
         <section name="adv" title="Advanced Options" expanded="false">
             <param name="ncomp" type="integer" value="2" min="1"
                    label="Number of components to include in the model"
-                   help="[ncomp] Number of new variables (components) computed by the data integration." />
+                   help="number of new variables (components) computed by the data integration" />
             <!-- <param name="scheme" type="select" label="Scheme">
                 <option value="horst" selected="true">horst</option>
                 <option value="factorial"            >factorial</option>
@@ -90,7 +88,7 @@
             </param> -->
             <param name="maxiter" type="integer" value="100" min="1"
                    label="Maximum number of iterations"
-                   help="[max.iter] Maximum number of iterations performed by block.splsda." />
+                   help="maximum number of iterations performed by block.splsda" />
             <!-- <param name="scale" type="boolean" truevalue="-\-scale" falsevalue="" checked="true"
                    label="Scale"
                    help="if checked, each block is standardized to zero means and unit variances" /> -->
@@ -130,7 +128,8 @@
                 <param name="data_matrix" value="in_block2_data.tabular" />
             </repeat>
             <param name="sample_metadata_in" value="in_sample_meta.tabular" />
-            <output name="rdata_out" value="out_rdata.rdata" />
+            <!-- <param name="correlation" value=true /> -->
+            <output name="rdata_out" value="mixomics_blocksplsda_output.rdata" />
             <!-- <output name="sample_metadata_out" value="out_sample_meta.tabular" /> -->
         </test>
     </tests>
@@ -147,8 +146,12 @@
 Description
 -----------
 
-The blocks.splsda function is part of the mixOmics package for exploration and integration of Omics datasets.
-Performs N-integration and feature selection with Projection to Latent Structures models (PLS) with sparse Discriminant Analysis.
+The blocks.splsda function is part of the mixOmics package for exploration and integration of omics datasets.
+This data integration takes as input parameters different omics datasets
+(transcriptomic, metabolomic, metagenomic, ...) and a response variable (e.g. for a sample, the value of the response
+variable is equal to « Treated »  or « Control »). This data integration returns, for each omics dataset, variables
+which are correlated with the variables of the other omic datasets and the response variable. The other functions of
+this pipeline allow visualizing this correlated variables thanks to correlation circles and networks.
 
 -----------------
 Workflow position
@@ -183,18 +186,21 @@
 | 2 : [opt] Variables metadata |   tabular  |
 +------------------------------+------------+
 
+1. Data matrix structure
+The data matrix is in tabular format (.tsv).
+The first column contains the variables names.
+The first row contains the samples names.
+Samples names must be in the same order for all blocks and the sample metadata (transposed). The data must not contain missing values.
+
+2. Variables metadata structure
+The variables metadata is in tabular format (.tsv).
+The first colum contains the variables names.
+The first row contains the metadata column names.
+The number of rows in the metadata file must be the same than the number of rows in the block data file, and the variables need to be in the same order. If a metadata file is provided, block.splsda output will be appended as new columns, otherwise a new file will be created.
+
 Variables metadata files are optional.
 If a file is provided, output metadata will be appended to the input file, otherwise a new output file will be created.
 
-1. Data matrix format
-    * Rows = variables, Columns = samples
-    * First row = samples name. MUST be the same and in the same order in every block as well as in the sample metadata file (transposed)
-    * First column = variables name
-
-2. Variables metadata format
-    * Rows = variables, Columns = metadata
-    * First row = metadata column names
-    * First column = variables names. MUST be the same and in the same order than in the associated data matrix
 
 Global input files:
 -------------------
@@ -205,13 +211,11 @@
 | 1 : Samples metadata        |   tabular  |
 +-----------------------------+------------+
 
-By default, the last column of the samples metadata matrix will be used as samples description factors.
-If it's not the case, the column number can be inputed in the `Sample description column number` parameter.
-
-1. Samples metadata format
-    * Rows = samples, Columns = metadata
-    * First row = metadata column names
-    * First column = sample names. These names must be identical (transposed) and in the same order than for the blocks data matrices
+1. Samples metadata structure
+Samples metadata is in tabular format (.tsv).
+The first column contains the sample names.
+The first row contains the metadata column names.
+Samples names must be in the same order in samples metadata (transposed) and all the blocks. One of the column (the last by default) must contain the samples groups for integration.
 
 ----------
 Parameters
@@ -270,4 +274,4 @@
 
     <expand macro="citations" />
 
-</tool>
\ No newline at end of file
+</tool>
--- a/mixomics_plotindiv.xml	Fri Oct 23 11:26:18 2020 +0000
+++ b/mixomics_plotindiv.xml	Tue Nov 17 13:01:44 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="mixomics_plotindiv" name="mixOmics plotIndiv" version="@TOOL_VERSION@+galaxy1" profile="16.04" workflow_compatible="true">
+<tool id="mixomics_plotindiv" name="mixOmics plotIndiv" version="@TOOL_VERSION@+galaxy2" profile="16.04" workflow_compatible="true">
 
     <description>provides scatter plots for individuals (experimental units) representation in (sparse)(I)PCA,(regularized)CCA, (sparse)PLS(DA) and (sparse)(R)GCCA(DA)</description>
 
@@ -21,14 +21,12 @@
 
         --output_pdf $output_pdf
 
-        @COMMAND_LOG_EXIT@
-
     ]]></command>
 
     <inputs>
         <param name="input_rdata" type="data" format="rdata"
                label="Input RData file from (sparse)(I)PCA, (regularized)CCA, (sparse)PLS(DA) or (sparse)(R)GCCA(DA)"
-               help="This is the RData output file from the block.splsda function." />
+               help="this is the RData output file from the block.splsda function" />
         <section name="adv" title="Advanced Options" expanded="false">
             <param name="legend" type="boolean" checked="true" truevalue="--legend" falsevalue=""
                    label="Plot legend" />
@@ -60,8 +58,7 @@
 Description
 -----------
 
-The plotIndiv function is part of the mixOmics package for exploration and integration of Omics datasets.
-Provides scatter plots for individuals (experimental units) representation in (sparse)(I)PCA,(regularized)CCA, (sparse)PLS(DA) and (sparse)(R)GCCA(DA).
+This tool allows visualizing the samples on a two dimensionnal graphic. An effect can be visualized along the abscissa axis and along the ordinate axis.
 
 -----------------
 Workflow position
--- a/mixomics_plotvar.xml	Fri Oct 23 11:26:18 2020 +0000
+++ b/mixomics_plotvar.xml	Tue Nov 17 13:01:44 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="mixomics_plotvar" name="mixOmics plotVar" version="@TOOL_VERSION@+galaxy1" profile="16.04" workflow_compatible="true">
+<tool id="mixomics_plotvar" name="mixOmics plotVar" version="@TOOL_VERSION@+galaxy2" profile="16.04" workflow_compatible="true">
 
     <description>provides variables representation for (regularized) CCA, (sparse) PLS regression, PCA and (sparse) Regularized generalised CCA</description>
 
@@ -21,20 +21,18 @@
 
         --output_pdf $output_pdf
 
-        @COMMAND_LOG_EXIT@
-
     ]]></command>
 
     <inputs>
         <param name="input_rdata" type="data" format="rdata"
                label="Input RData file from (sparse)(I)PCA, (regularized)CCA, (sparse)PLS(DA) or (sparse)(R)GCCA(DA)"
-               help="This is the RData output file from the block.splsda function." />
+               help="this is the RData output file from the block.splsda function" />
         <section name="adv" title="Advanced Options" expanded="false">
             <param name="legend" type="boolean" checked="true" truevalue="--legend" falsevalue=""
                    label="Plot legend" />
             <param name="cutoff" type="float" value="0" min="0" max="1"
                    label="Cut-off"
-                   help="Only selected variables whose correlation with the first or second axis is greater than Cut-off in absolute value will be plotted." />
+                   help="only selected variables whose correlation with the first or second axis is greater than Cut-off in absolute value will be plotted" />
         </section>
     </inputs>
 
@@ -61,8 +59,8 @@
 Description
 -----------
 
-The plotVar function is part of the mixOmics package for exploration and integration of Omics datasets.
-Provides variables representation for (regularized) CCA, (sparse) PLS regression, PCA and (sparse) Regularized generalised CCA.
+This tool allows visualizing the variables of a omics dataset which are correlated with the variables
+of the other omic datasets and the response variable in a correlation circle.
 
 -----------------
 Workflow position
Binary file test-data/mixomics_blocksplsda_output.rdata has changed
--- a/viscorvar_circlecor.xml	Fri Oct 23 11:26:18 2020 +0000
+++ b/viscorvar_circlecor.xml	Tue Nov 17 13:01:44 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="viscorvar_circlecor" name="visCorVar circleCor" version="@TOOL_VERSION@+galaxy1" profile="16.04" workflow_compatible="true">
+<tool id="viscorvar_circlecor" name="visCorVar circleCor" version="@TOOL_VERSION@+galaxy0" profile="16.04" workflow_compatible="true">
 
     <description>plots a correlation circle for the datasets whose correlation circles can be superimposed. This correlation circle contains the selected variables of these datasets which are included in a rectangle and the response variables.</description>
 
@@ -29,17 +29,17 @@
         @COMMAND_LOG_EXIT@
 
     ]]></command>
-
     <inputs>
         <param name="input_rdata" type="data" format="rdata"
                label="Input RData file"
-               help="output RData from matCorAddVar"/>
+               help="this is the RData output file from the matCorAddVar function"/>
         <param name="blocks_vec_list" type="data" format="tabular"
                label="List of blocks vector."
-               help="output *_blocks_comb.tsv file from matCorAddVar"/>
+               help="output *_blocks_comb.tsv file from matCorAddVar."/>
         <param name="select_blocks" type="select"
                label="Blocks"
-               help="list of the blocks that are to be superimposed">
+               help="each element of List of blocks vector contain blocks for which selected variables can
+               be visualized in the correlation circle">
             <options from_dataset="blocks_vec_list">
                 <column name="value" index="0"/>
                 <filter type="unique_value" column="0"/>
@@ -47,11 +47,11 @@
             </options>
         </param>
         <param name="responses_var_list" type="data" format="tabular"
-               label="List of response variables"
+               label="List of response variables."
                help="output *_response_var.tsv file from matCorAddVar"/>
         <param name="select_responses_var" type="select" display="checkboxes"
                multiple="true" label="Response variables"
-               help="select the response variables which will be plotted in the correlation circle">
+               help="choose the response variables which will be plotted in the correlation circle">
         <!-- <param name="select_responses_var" type="select" multiple="true" label="Response variables"> -->
             <options from_dataset="responses_var_list">
                 <column name="value" index="0"/>
@@ -89,7 +89,11 @@
 Description
 -----------
 
-Bla bla...
+This tool allows visualizing variables of omics datasets which are correlated with
+the response variables thanks to correlation circles. The determination of the omics
+datasets which can be visualized is made by the tool matCorAddVar. This tool performs
+a zoom in a rectangle to retrieve omics datasets variables which are correlated with a
+response variable. 
 
 -----------------
 Workflow position
--- a/viscorvar_computematsimilarity.xml	Fri Oct 23 11:26:18 2020 +0000
+++ b/viscorvar_computematsimilarity.xml	Tue Nov 17 13:01:44 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="viscorvar_computematsimilarity" name="visCorVar computeMatSimilarity" version="@TOOL_VERSION@+galaxy1" profile="16.04" workflow_compatible="true">
+<tool id="viscorvar_computematsimilarity" name="visCorVar computeMatSimilarity" version="@TOOL_VERSION@+galaxy0" profile="16.04" workflow_compatible="true">
 
     <description>performs the computation of the similarities. The similarity between two variables is an approximation of the correlation between these two variables.</description>
 
@@ -21,11 +21,10 @@
         @COMMAND_LOG_EXIT@
 
     ]]></command>
-
     <inputs>
         <param name="input_rdata" type="data" format="rdata"
                label="Input RData file"
-               help="output RData file from matCorAddVar"/>
+               help="this is the RData output file from matCorAddVar function"/>
     </inputs>
 
     <outputs>
@@ -47,7 +46,9 @@
 Description
 -----------
 
-Bla bla...
+This tool is a pre-processing step in order to create networks. It computes an
+approximation of the correlation between a variable of a omics dataset and a variable of
+an other omics dataset.
 
 -----------------
 Workflow position
--- a/viscorvar_matcoraddvar.xml	Fri Oct 23 11:26:18 2020 +0000
+++ b/viscorvar_matcoraddvar.xml	Tue Nov 17 13:01:44 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="viscorvar_matcoraddvar" name="visCorVar matCorAddVar" version="@TOOL_VERSION@+galaxy1" profile="16.04" workflow_compatible="true">
+<tool id="viscorvar_matcoraddvar" name="visCorVar matCorAddVar" version="@TOOL_VERSION@+galaxy0" profile="16.04" workflow_compatible="true">
 
     <description>determine the correlation circles that can be overlaid and compute the correlations</description>
 
@@ -34,14 +34,14 @@
     <inputs>
         <param name="input_rdata" type="data" format="rdata"
                label="Input RData file from block.SPLSDA"
-               help="This is the RData output file from the block.splsda function." />
+               help="this is the RData output file from the block.splsda function" />
         <param name="block_Y" type="data" format="tabular"
                label="Block Y"
-               help="[block_Y] (tabular format) This table contains the name of the samples in the first column. The other columns correspond to phenotypes. For each of these other columns, a column determines which sample is associated with this phenotype (value equals to 1) or not (value equals to 0). The name of the samples in Block Y (transposed), in the sample metadata (transposed) and for all datasets have to be in the same order" />
+               help="Block Y is a table. A column determines which sample is associated with a phenotype (value equals to 1) or not (value equals to 0). For the file structure, see below in the section Input files" />
         <!-- Fichier avec noms de gènes/variables, donné par l'utilisateur -->
         <param name="var_of_interest_file" type="data" format="txt" optional="true"
                label="Variables of interest (Optional)"
-               help="variables not belonging to any block will not be considered"/>
+               help="variables not belonging to any block will not be considered. For the file structure, see below in the section Input files"/>
         <!-- <param name="cutoff_comp" type="float" value="0.775" min="0" max="1"
                label="Cutoff comp"
                help="[cutoff_comp] Two correlation circles will be superimposed if the correlation of their first and second axis is greater than cutoff_comp in absolute value." /> -->
@@ -68,7 +68,12 @@
 Description
 -----------
 
-Bla bla...
+This tool is a pre-processing step of the pipeline. It computes the correlations
+between omics datasets variables, variables of interest (optional), response variables and
+the components which are output of the data integration. The variables of interest are omics
+datasets variables that will be added to the network. It also determines the omics datasets
+for which the correlated variables of these omics datasets can be visualized with correlation
+circles and networks.
 
 -----------------
 Workflow position
@@ -110,6 +115,15 @@
 | 3 : [opt] Variables of interest         |  txt      |
 +-----------------------------------------+-----------+
 
+2. Block Y structure
+Block Y is in tabular format (.tsv).
+This table contains the name of the samples in the first column.
+The other columns correspond to phenotypes.
+For each of these other columns, a column determines which sample is associated with a phenotype (value equals to 1) or not (value equals to 0). The names of the samples in Block Y (transposed), in the sample metadata (transposed) and for all datasets have to be in the same order.
+
+3. Variables of interest structure
+All the variables of interest are in the same column.
+
 ----------
 Parameters
 ----------
--- a/viscorvar_networkvar.xml	Fri Oct 23 11:26:18 2020 +0000
+++ b/viscorvar_networkvar.xml	Tue Nov 17 13:01:44 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="viscorvar_networkvar" name="visCorVar networkVar" version="@TOOL_VERSION@+galaxy1" profile="16.04" workflow_compatible="true">
+<tool id="viscorvar_networkvar" name="visCorVar networkVar" version="@TOOL_VERSION@+galaxy0" profile="16.04" workflow_compatible="true">
 
     <description>creates a network between selected variables of datasets and the response variables. In the network, the similarity between two variables is associated with the link between these two variables.</description>
 
@@ -23,10 +23,10 @@
         @COMMAND_LOG_EXIT@
 
     ]]></command>
-
+   
     <inputs>
         <param name="mat_similarity_rdata" type="data" format="rdata"
-               label="Input computeMatSimilarity RData file"
+               label="this is the RData output file from the computeMatSimilarity function"
                help="output RData file from computeMatSimilarity"/>
         <param name="var_list_file" type="data" format="tabular"
                label="Variables list file"
@@ -71,7 +71,12 @@
 Description
 -----------
 
-Bla bla...
+This tool creates a network of correlated variables for omics datasets. The determination of
+the omics datasets which can be visualized is made by the tool matCorAddVar. This network can
+be exported to graphml format and visualized with Cytoscape. The link between two variables
+is associated with the correlation between two variables : a threshold can be used in
+Cytoscape to get a network of variables whose correlation is greater than this threshold
+in absolute value.
 
 -----------------
 Workflow position