changeset 1:4e73ea176c34 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit ce05b5eb018ae1c4d580ab5ce1a33896c1aa8c5b"
author iuc
date Sun, 18 Jul 2021 18:03:12 +0000
parents 067d45e6caa9
children
files launcher.R macro.xml rgcca.xml test-data/1block/ave.pdf test-data/1block/corcircle.pdf test-data/1block/design.pdf test-data/1block/individuals.pdf test-data/1block/rgcca.result.RData test-data/1block/top_variables.pdf test-data/2blocks/ave.pdf test-data/2blocks/corcircle.pdf test-data/2blocks/design.pdf test-data/2blocks/individuals.pdf test-data/2blocks/rgcca.result.RData test-data/2blocks/top_variables.pdf test-data/3blocks/ave.pdf test-data/3blocks/corcircle.pdf test-data/3blocks/design.pdf test-data/3blocks/individuals.pdf test-data/3blocks/rgcca.result.RData test-data/3blocks/top_variables.pdf test-data/3blocks_connection/ave.pdf test-data/3blocks_connection/corcircle.pdf test-data/3blocks_connection/design.pdf test-data/3blocks_connection/individuals.pdf test-data/3blocks_connection/rgcca.result.RData test-data/3blocks_connection/top_variables.pdf test-data/3blocks_sgcca/ave.pdf test-data/3blocks_sgcca/corcircle.pdf test-data/3blocks_sgcca/design.pdf test-data/3blocks_sgcca/individuals.pdf test-data/3blocks_sgcca/rgcca.result.RData test-data/3blocks_sgcca/top_variables.pdf test-data/3blocks_supervised/ave.pdf test-data/3blocks_supervised/corcircle.pdf test-data/3blocks_supervised/design.pdf test-data/3blocks_supervised/individuals.pdf test-data/3blocks_supervised/rgcca.result.RData test-data/3blocks_supervised/top_variables.pdf test-data/connection.tsv
diffstat 40 files changed, 110 insertions(+), 116 deletions(-) [+]
line wrap: on
line diff
--- a/launcher.R	Tue Jan 12 10:12:04 2021 +0000
+++ b/launcher.R	Sun Jul 18 18:03:12 2021 +0000
@@ -1,6 +1,8 @@
+#!/usr/bin/env Rscript
+
 # Author: Etienne CAMENEN
-# Date: 2020
-# Contact: arthur.tenenhaus@centralesupelec.fr
+# Date: 2021
+# Contact: etienne.camenen@gmail.com
 # Key-words: omics, RGCCA, multi-block
 # EDAM operation: analysis, correlation, visualisation
 #
@@ -74,11 +76,12 @@
             type = "character",
             metavar = "character",
             default = opt[2],
-            help = "Type of analysis [default: %default] (among: rgcca, pca,
-            cca, gcca, cpca-w, hpca, maxbet-b, maxbet, maxdiff-b, maxdiff,
-            maxvar-a, maxvar-b, maxvar, niles, r-maxvar, rcon-pca, ridge-gca,
-            sabscor, ssqcor, ssqcor, ssqcov-1, ssqcov-2, ssqcov, sum-pca,
-            sumcor, sumcov-1, sumcov-2, sumcov)"
+            help = "Type of analysis [default: %default] (among: rgcca, sgcca,
+            pca, spca, pls, spls, cca, ifa, ra, gcca, maxvar, maxvar-b,
+            maxvar-a, mcoa,cpca-1, cpca-2, cpca-4, hpca, maxbet-b, maxbet,
+            maxdiff-b, maxdiff, maxvar-a, sabscor, ssqcor, ssqcov-1, ssqcov-2,
+            ssqcov, sumcor, sumcov-1, sumcov-2, sumcov, sabscov, sabscov-1,
+            sabscov-2)"
         ),
         make_option(
             opt_str = "--ncomp",
@@ -245,10 +248,6 @@
     return(optparse::OptionParser(option_list = option_list))
 }
 
-char_to_list <- function(x) {
-    strsplit(gsub(" ", "", as.character(x)), ",")[[1]]
-}
-
 check_arg <- function(opt) {
     # Check the validity of the arguments opt : an optionParser object
 
@@ -318,71 +317,6 @@
     return(opt)
 }
 
-check_integer <- function(x, y = x, type = "scalar", float = FALSE, min = 1) {
-
-    if (is.null(y))
-        y <- x
-
-    if (type %in% c("matrix", "data.frame"))
-        y_temp <- y
-
-    y <- suppressWarnings(as.double(as.matrix(y)))
-
-    if (any(is.na(y)))
-        stop_rgcca(paste(x, "should not be NA."))
-
-    if (!is(y, "numeric"))
-        stop_rgcca(paste(x, "should be numeric."))
-
-    if (type == "scalar" && length(y) != 1)
-        stop_rgcca(paste(x, "should be of length 1."))
-
-    if (!float)
-        y <- as.integer(y)
-
-    if (all(y < min))
-        stop_rgcca(paste0(x, " should be higher than or equal to ", min, "."))
-
-    if (type %in% c("matrix", "data.frame"))
-        y <- matrix(
-            y,
-            dim(y_temp)[1],
-            dim(y_temp)[2],
-            dimnames = dimnames(y_temp)
-        )
-
-    if (type == "data.frame")
-        as.data.frame(y)
-
-    return(y)
-}
-
-load_libraries <- function(librairies) {
-    for (l in librairies) {
-        if (!(l %in% installed.packages()[, "Package"]))
-            utils::install.packages(l, repos = "cran.us.r-project.org")
-        suppressPackageStartupMessages(
-            library(
-                l,
-                character.only = TRUE,
-                warn.conflicts = FALSE,
-                quietly = TRUE
-        ))
-    }
-}
-
-stop_rgcca <- function(
-    message,
-    exit_code = "1",
-    call = NULL) {
-
-    base::stop(
-        structure(
-            class = c(exit_code, "simpleError", "error", "condition"),
-            list(message = message, call. = NULL)
-    ))
- }
-
 ########## Main ##########
 
 # Get arguments : R packaging install, need an opt variable with associated
@@ -411,7 +345,12 @@
         collapse = ",")
 )
 
-load_libraries(c("ggplot2", "optparse", "scales", "igraph", "MASS", "rlang", "Deriv"))
+# Load functions
+all_funcs <- unclass(lsf.str(envir = asNamespace("RGCCA"), all = TRUE))
+for (i in all_funcs)
+    eval(parse(text = paste0(i, "<-RGCCA:::", i)))
+
+load_libraries(c("ggplot2", "optparse", "scales", "igraph", "MASS", "Deriv"))
 try(load_libraries("ggrepel"), silent = TRUE)
 
 tryCatch(
@@ -423,16 +362,17 @@
         stop_rgcca(w[[1]], exit_code = 141)
 )
 
-# Load functions
-all_funcs <- unclass(lsf.str(envir = asNamespace("RGCCA"), all = T))
-for (i in all_funcs)
-    eval(parse(text = paste0(i, "<-RGCCA:::", i)))
-
 # Set missing parameters by default
 opt$header <- !("header" %in% names(opt))
 opt$superblock <- !("superblock" %in% names(opt))
 opt$scale <- !("scale" %in% names(opt))
 opt$text <- !("text" %in% names(opt))
+cex_lab <- 20
+cex_main <- 25
+cex_point <- 3
+cex_sub <- 20
+cex_axis <- 10
+cex <- 1.25
 
 status <- 0
 tryCatch({
@@ -450,7 +390,7 @@
             ncomp = opt$ncomp,
             scheme = opt$scheme,
             scale = opt$scale,
-            type = opt$type
+            method = opt$type
         )
     )
     if (tolower(opt$type) %in% c("sgcca", "spca", "spls")) {
@@ -477,7 +417,11 @@
                 opt$block,
                 opt$text,
                 opt$block_y,
-                "Response"
+                "Response",
+                cex_lab = cex_lab,
+                cex_point = cex_point,
+                cex_main = cex_main,
+                cex = cex
             )
         )
         save_plot(opt$o1, individual_plot)
@@ -491,7 +435,11 @@
                 opt$compy,
                 opt$block,
                 opt$text,
-                n_mark = opt$nmark
+                n_mark = opt$nmark,
+                cex_lab = cex_lab,
+                cex_point = cex_point,
+                cex_main = cex_main,
+                cex = cex
             )
         )
         save_plot(opt$o2, corcircle)
@@ -502,20 +450,34 @@
             opt$compx,
             opt$nmark,
             opt$block,
-            type = "cor"
+            type = "loadings",
+            title = paste0("Variable correlations", ": ", names(rgcca_out$call$blocks)[opt$block], " with "),
+            cex_sub = cex_sub,
+            cex_main = cex_main,
+            cex_axis = cex_axis,
+            cex = cex
         )
     save_plot(opt$o3, top_variables)
 
     # Average Variance Explained
-    (ave <- plot_ave(rgcca_out))
+    (ave <- plot_ave(
+        rgcca_out,
+        cex_main = cex_main,
+        cex_sub = cex_sub,
+        cex_axis = cex_axis,
+        cex = cex))
     save_plot(opt$o4, ave)
 
     # Creates design scheme
-    design <- function() plot_network(rgcca_out)
+    design <- function() plot_network(
+        rgcca_out,
+        cex_main = cex_main,
+        cex_point = cex_point,
+        cex = cex)
     save_plot(opt$o5, design)
 
-    save_ind(rgcca_out, opt$compx, opt$compy, opt$o6)
-    save_var(rgcca_out, opt$compx, opt$compy, opt$o7)
+    save_ind(rgcca_out, opt$o6)
+    save_var(rgcca_out, opt$o7)
     save(rgcca_out, file = opt$o8)
 
     }, error = function(e) {
@@ -523,6 +485,10 @@
             status <<- 1
         else
             status <<- class(e)[1]
+        msg <- "The design matrix C"
+        if (grepl(msg, e$message)) {
+            e$message <- gsub(msg, "The connection file", e$message)
+        }
         message(e$message)
 })
 quit(status = status)
--- a/macro.xml	Tue Jan 12 10:12:04 2021 +0000
+++ b/macro.xml	Sun Jul 18 18:03:12 2021 +0000
@@ -1,12 +1,14 @@
 <macros>
 
-    <token name="@TOOL_VERSION@">3.0.0</token>
+    <token name="@TOOL_VERSION@">3.0.2</token>
+
+    <token name="@VERSION_SUFFIX@">1</token>
 
     <token name="@BLOCK_RULES@">1 corresponds to the first block, 2 corresponds to the second one, etc. This number should not be greater than the number of blocks selected.</token>
 
     <token name="@COMP_RULES@">This number should not be greater than the selected number of component (2, by default).</token>
 
-    <xml name="output_tests" token_path="" token_compx="1" token_compy="2">
+    <xml name="output_tests" token_path="">
         <param name="output_selector" value="individuals,corcircle,top_variables,ave,design,individual_table,variable_table,rdata"/>
         <output name="individual_plot" file="@PATH@/individuals.pdf" ftype="pdf"/>
         <output name="top_variables" file="@PATH@/top_variables.pdf" ftype="pdf"/>
@@ -16,11 +18,10 @@
         <output name="rdata" file="@PATH@/rgcca.result.RData" compare="sim_size" delta="1000" ftype="rdata"/>
         <output name="variable_table">
             <assert_contents>
-                <has_n_columns n="5"/>
                 <has_line_matching
-                        expression='.*"cor.axis.@COMPX@"\s"cor.axis.@COMPY@"\s"weight.axis.@COMPX@"\s"weight.axis.@COMPY@".*\s"block"'/>
+                        expression='.*"correlation.component1"\s"correlation.component2".*\s"weight.component1"\s"weight.component2".*\s"blocks"'/>
                 <has_line_matching
-                        expression='^.+(\s\-?\d+.\d+){4}.+$'/>
+                        expression='^.+(\s\-?\d+.\d+){4,6}.+$'/>
             </assert_contents>
         </output>
     </xml>
--- a/rgcca.xml	Tue Jan 12 10:12:04 2021 +0000
+++ b/rgcca.xml	Sun Jul 18 18:03:12 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="rgcca" name="RGCCA" version="@TOOL_VERSION@+galaxy0">
+<tool id="rgcca" name="RGCCA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
 
     <description>performs multiblock data analysis of several sets of variables (blocks) observed on the same group of individuals.</description>
 
@@ -18,39 +18,64 @@
 
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">rgccacmd</requirement>
+        <requirement type="package" version="4.1">r-base</requirement>
     </requirements>
 
     <command detect_errors="exit_code"><![CDATA[
         #set data_paths = ",".join([str(_.file_name) for _ in $blocks])
         #set data_names = ",".join([str(_.element_identifier).replace(',', '_') for _ in $blocks])
+        #set out_files = str($output_selector).split(",")
         Rscript '$__tool_directory__/launcher.R'
             --datasets '${data_paths}'
             --names '${data_names}'
-            --o1 '$individual_plot' --o2 '$corcircle' --o3 '$top_variables' --o4 '$ave' --o5 '$design' --o6 '$individual_table' --o7 '$variable_table' --o8 '$rdata'
+            #if 'individuals' in $out_files
+                --o1 '$individual_plot'
+            #end if
+            #if 'corcircle' in $out_files
+                --o2 '$corcircle'
+            #end if
+            #if 'top_variables' in $out_files
+                --o3 '$top_variables'
+            #end if
+            #if 'ave' in $out_files
+                --o4 '$ave'
+            #end if
+            #if 'design' in $out_files
+                --o5 '$design'
+            #end if
+            #if 'individual_table' in $out_files
+                --o6 '$individual_table'
+            #end if
+            #if 'variable_table' in $out_files
+                --o7 '$variable_table'
+            #end if
+            #if 'rdata' in $out_files
+                --o8 '$rdata'
+            #end if
             $parse.header
             --separator $parse.separator
             $analyse.superblock
             $analyse.scale
             #if $analyse.tau.bool == 'false'
-            --penalty $analyse.tau.value
+                --penalty $analyse.tau.value
             #else
-            --penalty $analyse.tau.bool
+                --penalty $analyse.tau.bool
             #end if
             --ncomp $analyse.ncomp
             --scheme $analyse.scheme
             #if $analyse.method.family == '1'
-            --type pca
+                --type pca
             #else
-            --type $analyse.method.type
+                --type $analyse.method.type
             #end if
             #if $analyse.connection
-            --connection $analyse.connection
+                --connection $analyse.connection
             #end if
             #if $analyse.supervised.learning_mode == 'supervised'
-            --response $analyse.supervised.block_response
+                --response $analyse.supervised.block_response
             #end if
             #if $graphic.response
-            --group $graphic.response
+                --group $graphic.response
             #end if
             --compx $graphic.compx
             --compy $graphic.compy
@@ -181,7 +206,7 @@
             <option value="individuals" selected="true">Individual plot</option>
             <option value="corcircle" selected = "true">Corcircle plot</option>
             <option value="top_variables">Top variables plot</option>
-            <option value="ave">Averages plot</option>
+            <option value="ave">Explained variance plot</option>
             <option value="design">Design plot</option>
             <option value="individual_table" selected="true">Individual table</option>
             <option value="variable_table" selected="true">Variable table</option>
@@ -225,7 +250,7 @@
                 <assert_contents>
                     <has_n_columns n="4"/>
                     <has_line_matching
-                            expression='"agriculture.axis1"\s"agriculture.axis2"\s"superblock.axis1"\s"superblock.axis2"'/>
+                            expression='"agriculture.component1"\s"agriculture.component2"\s"superblock.component1"\s"superblock.component2"'/>
                     <has_line_matching
                             expression='^.+(\s\-?\d+.\d+){4}$'/>
                 </assert_contents>
@@ -281,7 +306,7 @@
         </test>
 
         <test expect_num_outputs="8" expect_exit_code="0">
-            <expand macro="output_tests" path="2blocks" compx="3" compy="1"/>
+            <expand macro="output_tests" path="2blocks"/>
             <param name="blocks" value="agriculture.tsv,politic.tsv"/>
             <section name="analyse">
                 <param name="scale" value="false"/>
@@ -324,11 +349,12 @@
 
 
 **Contact:**
-arthur.tenenhaus@centralesupelec.fr
+etienne.camenen@gmail.com
 
 
 **R package:**
-The RGCCA package is available from the CRAN repository (https://cran.r-project.org/web/packages/RGCCA).
+    | The RGCCA package is available from the CRAN repository (v2.1.2; https://cran.r-project.org/web/packages/RGCCA).
+    | This tool is based on a version available on github (v3.0; https://github.com/rgcca-factory/RGCCA).
 
 ---------------------------------------------------
 
@@ -340,14 +366,14 @@
 
 **Working example**
 
-    | From Russett data (RGCCA package): https://github.com/rgcca-factory/RGCCA/tree/master/inst/extdata
-    | Use *agriculture.tsv* as a block. Add *industry.tsv* and *politic.tsv* as new blocks. *connection.tsv* could be used as a design matrix and *political_system.tsv* as a response variable respectively in analysis and graphic settings.
+    | From Russett data (RGCCA package): https://github.com/BrainAndSpineInstitute/rgcca_ui/tree/master/inst/extdata
+    | Use *agriculture.tsv* as a block. Add *industry.tsv* and *politic.tsv* for multiblock analysis. *connection.tsv* could be used as a design matrix and *political_system.tsv* as a response variable respectively in analysis and graphic settings.
 
 **Documentation**
 
 - RGCCA: https://cran.r-project.org/web/packages/RGCCA/vignettes/vignette_RGCCA.pdf
-- accepted input / output formats: https://github.com/rgcca-factory/RGCCA#input-files
-<!-- - tutorial: https://github.com/BrainAndSpineInstitute/rgcca_galaxy/blob/release/0.2/README.md-->
+- accepted input / https://github.com/BrainAndSpineInstitute/rgcca_ui#input-files
+- tutorial: https://github.com/BrainAndSpineInstitute/rgcca_galaxy#readme
 
 </help>
 
Binary file test-data/1block/ave.pdf has changed
Binary file test-data/1block/corcircle.pdf has changed
Binary file test-data/1block/design.pdf has changed
Binary file test-data/1block/individuals.pdf has changed
Binary file test-data/1block/rgcca.result.RData has changed
Binary file test-data/1block/top_variables.pdf has changed
Binary file test-data/2blocks/ave.pdf has changed
Binary file test-data/2blocks/corcircle.pdf has changed
Binary file test-data/2blocks/design.pdf has changed
Binary file test-data/2blocks/individuals.pdf has changed
Binary file test-data/2blocks/rgcca.result.RData has changed
Binary file test-data/2blocks/top_variables.pdf has changed
Binary file test-data/3blocks/ave.pdf has changed
Binary file test-data/3blocks/corcircle.pdf has changed
Binary file test-data/3blocks/design.pdf has changed
Binary file test-data/3blocks/individuals.pdf has changed
Binary file test-data/3blocks/rgcca.result.RData has changed
Binary file test-data/3blocks/top_variables.pdf has changed
Binary file test-data/3blocks_connection/ave.pdf has changed
Binary file test-data/3blocks_connection/corcircle.pdf has changed
Binary file test-data/3blocks_connection/design.pdf has changed
Binary file test-data/3blocks_connection/individuals.pdf has changed
Binary file test-data/3blocks_connection/rgcca.result.RData has changed
Binary file test-data/3blocks_connection/top_variables.pdf has changed
Binary file test-data/3blocks_sgcca/ave.pdf has changed
Binary file test-data/3blocks_sgcca/corcircle.pdf has changed
Binary file test-data/3blocks_sgcca/design.pdf has changed
Binary file test-data/3blocks_sgcca/individuals.pdf has changed
Binary file test-data/3blocks_sgcca/rgcca.result.RData has changed
Binary file test-data/3blocks_sgcca/top_variables.pdf has changed
Binary file test-data/3blocks_supervised/ave.pdf has changed
Binary file test-data/3blocks_supervised/corcircle.pdf has changed
Binary file test-data/3blocks_supervised/design.pdf has changed
Binary file test-data/3blocks_supervised/individuals.pdf has changed
Binary file test-data/3blocks_supervised/rgcca.result.RData has changed
Binary file test-data/3blocks_supervised/top_variables.pdf has changed
--- a/test-data/connection.tsv	Tue Jan 12 10:12:04 2021 +0000
+++ b/test-data/connection.tsv	Sun Jul 18 18:03:12 2021 +0000
@@ -1,3 +1,4 @@
-0	1	1
-1	0	1
-1	1	0
+	agriculture	industry	politic
+agriculture	0	1	1
+industry	1	0	1
+politic	1	1	0