Mercurial > repos > iuc > rgcca
diff launcher.R @ 0:067d45e6caa9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit 00f9e92845737e05a4afb1c93043f35b7e4ea771"
author | iuc |
---|---|
date | Tue, 12 Jan 2021 10:12:04 +0000 |
parents | |
children | 4e73ea176c34 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/launcher.R Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,528 @@ +# Author: Etienne CAMENEN +# Date: 2020 +# Contact: arthur.tenenhaus@centralesupelec.fr +# Key-words: omics, RGCCA, multi-block +# EDAM operation: analysis, correlation, visualisation +# +# Abstract: Performs multi-variate analysis (PCA, CCA, PLS, R/SGCCA, etc.) +# and produces textual and graphical outputs (e.g. variables and individuals +# plots). + +rm(list = ls()) +graphics.off() +separator <- NULL + +########## Arguments ########## + +# Parse the arguments from a command line launch +get_args <- function() { + option_list <- list( + # File parameters + make_option( + opt_str = c("-d", "--datasets"), + type = "character", + metavar = "path list", + help = "List of comma-separated file paths corresponding to the + blocks to be analyzed (one per block and without spaces between + them; e.g., path/file1.txt,path/file2.txt) [required]" + ), + make_option( + opt_str = c("-c", "--connection"), + type = "character", + metavar = "path", + help = "Path of the file defining the connections between the blocks + [if not used, activates the superblock mode]" + ), + make_option( + opt_str = "--group", + type = "character", + metavar = "path", + help = "Path of the file coloring the individuals in the ad hoc + plot" + ), + make_option( + opt_str = c("-r", "--response"), + type = "integer", + metavar = "integer", + help = "Position of the response file for the supervised mode within + the block path list [actives the supervised mode]" + ), + make_option( + opt_str = "--names", + type = "character", + metavar = "character list", + help = "List of comma-separated block names to rename them (one per + block; without spaces between them) [default: the block file names]" + ), + make_option( + opt_str = c("-H", "--header"), + type = "logical", + action = "store_false", + help = "DO NOT consider the first row as the column header" + ), + make_option( + opt_str = "--separator", + type = "integer", + metavar = "integer", + default = opt[1], + help = "Character used to separate columns (1: tabulation, + 2: semicolon, 3: comma) [default: %default]" + ), + # Analysis parameter + make_option( + opt_str = "--type", + type = "character", + metavar = "character", + default = opt[2], + help = "Type of analysis [default: %default] (among: rgcca, pca, + cca, gcca, cpca-w, hpca, maxbet-b, maxbet, maxdiff-b, maxdiff, + maxvar-a, maxvar-b, maxvar, niles, r-maxvar, rcon-pca, ridge-gca, + sabscor, ssqcor, ssqcor, ssqcov-1, ssqcov-2, ssqcov, sum-pca, + sumcor, sumcov-1, sumcov-2, sumcov)" + ), + make_option( + opt_str = "--ncomp", + type = "character", + metavar = "integer list", + default = opt[3], + help = "Number of components in the analysis for each block + [default: %default]. The number should be higher than 1 and lower + than the minimum number of variables among the blocks. It can be a + single values or a comma-separated list (e.g 2,2,3,2)." + ), + make_option( + opt_str = "--penalty", + type = "character", + metavar = "float list", + default = opt[4], + help = "For RGCCA, a regularization parameter for each block (i.e., tau) + [default: %default]. Tau varies from 0 (maximizing the correlation) + to 1 (maximizing the covariance). For SGCCA, tau is automatically + set to 1 and shrinkage parameter can be defined instead for + automatic variable selection, varying from the square root of the + variable number (the fewest selected variables) to 1 (all the + variables are included). It can be a single value or a + comma-separated list (e.g. 0,1,0.75,1)." + ), + make_option( + opt_str = "--scheme", + type = "integer", + metavar = "integer", + default = opt[5], + help = "Link (i.e. scheme) function for covariance maximization + (1: x, 2: x^2, 3: |x|, 4: x^4) [default: %default]. Onnly, the x + function ('horst scheme') penalizes structural negative correlation. + The x^2 function ('factorial scheme') discriminates more strongly + the blocks than the |x| ('centroid scheme') one." + ), + make_option( + opt_str = "--scale", + type = "logical", + action = "store_false", + help = "DO NOT scale the blocks (i.e., a data centering step is + always performed). Otherwise, each block is normalised and divided + by the squareroot of its number of variables." + ), + make_option( + opt_str = "--superblock", + type = "logical", + action = "store_false", + help = "DO NOT use a superblock (i.e. a concatenation of all the + blocks to visualize them all together in a consensus space). In + this case, all blocks are assumed to be connected or a connection + file could be used." + ), + # Graphical parameters + make_option( + opt_str = "--text", + type = "logical", + action = "store_false", + help = "DO NOT display the name of the points instead of shapes when + plotting" + ), + make_option( + opt_str = "--block", + type = "integer", + metavar = "integer", + default = opt[6], + help = "Position in the path list of the plotted block (0: the + superblock or, if not activated, the last one, 1: the fist one, + 2: the 2nd, etc.)[default: the last one]" + ), + make_option( + opt_str = "--block_y", + type = "integer", + metavar = "integer", + help = "Position in the path list of the plotted block for the + Y-axis in the individual plot (0: the superblock or, if not + activated, the last one, 1: the fist one, 2: the 2nd, etc.) + [default: the last one]" + ), + make_option( + opt_str = "--compx", + type = "integer", + metavar = "integer", + default = opt[7], + help = "Component used in the X-axis for biplots and the only + component used for histograms [default: %default] (should not be + higher than the number of components of the analysis)" + ), + make_option( + opt_str = "--compy", + type = "integer", + metavar = "integer", + default = opt[8], + help = "Component used in the Y-axis for biplots + [default: %default] (should not be higher than the number of + components of the analysis)" + ), + make_option( + opt_str = "--nmark", + type = "integer", + metavar = "integer", + default = opt[9], + help = "Number maximum of top variables in ad hoc plot + [default: %default]" + ), + # output parameters + make_option( + opt_str = "--o1", + type = "character", + metavar = "path", + default = opt[10], + help = "Path for the individual plot [default: %default]" + ), + make_option( + opt_str = "--o2", + type = "character", + metavar = "path", + default = opt[11], + help = "Path for the variable plot [default: %default]" + ), + make_option( + opt_str = "--o3", + type = "character", + metavar = "path", + default = opt[12], + help = "Path for the top variables plot [default: %default]" + ), + make_option( + opt_str = "--o4", + type = "character", + metavar = "path", + default = opt[13], + help = "Path for the explained variance plot [default: %default]" + ), + make_option( + opt_str = "--o5", + type = "character", + metavar = "path", + default = opt[14], + help = "Path for the design plot [default: %default]" + ), + make_option( + opt_str = "--o6", + type = "character", + metavar = "path", + default = opt[15], + help = "Path for the individual table [default: %default]" + ), + make_option( + opt_str = "--o7", + type = "character", + metavar = "path", + default = opt[16], + help = "Path for the variable table [default: %default]" + ), + make_option( + opt_str = "--o8", + type = "character", + metavar = "path", + default = opt[17], + help = "Path for the analysis results in RData [default: %default]" + ) + ) + return(optparse::OptionParser(option_list = option_list)) +} + +char_to_list <- function(x) { + strsplit(gsub(" ", "", as.character(x)), ",")[[1]] +} + +check_arg <- function(opt) { + # Check the validity of the arguments opt : an optionParser object + + if (is.null(opt$datasets)) + stop_rgcca(paste0("datasets is required."), exit_code = 121) + + if (is.null(opt$scheme)) + opt$scheme <- "factorial" + else if (!opt$scheme %in% seq(4)) { + stop_rgcca( + paste0( + "scheme should be comprise between 1 and 4 [by default: 2], not be equal to ", + opt$scheme, + "." + ), + exit_code = 122 + ) + } else { + schemes <- c("horst", "factorial", "centroid") + if (opt$scheme == 4) + opt$scheme <- function(x) x ^ 4 + else + opt$scheme <- schemes[opt$scheme] + } + + if (!opt$separator %in% seq(3)) { + stop_rgcca( + paste0( + "separator should be comprise between 1 and 3 (1: Tabulation, 2: Semicolon, 3: Comma) [by default: 2], not be equal to ", + opt$separator, + "." + ), + exit_code = 123 + ) + } else { + separators <- c("\t", ";", ",") + opt$separator <- separators[opt$separator] + } + + nmark <- NULL + RGCCA:::check_integer("nmark", opt$nmark, min = 2) + + for (x in c("ncomp", "penalty")) + opt[[x]] <- char_to_list(opt[[x]]) + + return(opt) +} + +post_check_arg <- function(opt, rgcca) { +# Check the validity of the arguments after loading the blocks opt : an +# optionParser object blocks : a list of matrix + blocks <- NULL + for (x in c("block", "block_y")) { + if (!is.null(opt[[x]])) { + if (opt[[x]] == 0) + opt[[x]] <- length(rgcca$call$blocks) + opt[[x]] <- RGCCA:::check_blockx(x, opt[[x]], rgcca$call$blocks) + } + } + + if (any(opt$ncomp == 1)) + opt$compy <- 1 + + for (x in c("compx", "compy")) + opt[[x]] <- check_compx(x, opt[[x]], rgcca$call$ncomp, opt$block) + + return(opt) +} + +check_integer <- function(x, y = x, type = "scalar", float = FALSE, min = 1) { + + if (is.null(y)) + y <- x + + if (type %in% c("matrix", "data.frame")) + y_temp <- y + + y <- suppressWarnings(as.double(as.matrix(y))) + + if (any(is.na(y))) + stop_rgcca(paste(x, "should not be NA.")) + + if (!is(y, "numeric")) + stop_rgcca(paste(x, "should be numeric.")) + + if (type == "scalar" && length(y) != 1) + stop_rgcca(paste(x, "should be of length 1.")) + + if (!float) + y <- as.integer(y) + + if (all(y < min)) + stop_rgcca(paste0(x, " should be higher than or equal to ", min, ".")) + + if (type %in% c("matrix", "data.frame")) + y <- matrix( + y, + dim(y_temp)[1], + dim(y_temp)[2], + dimnames = dimnames(y_temp) + ) + + if (type == "data.frame") + as.data.frame(y) + + return(y) +} + +load_libraries <- function(librairies) { + for (l in librairies) { + if (!(l %in% installed.packages()[, "Package"])) + utils::install.packages(l, repos = "cran.us.r-project.org") + suppressPackageStartupMessages( + library( + l, + character.only = TRUE, + warn.conflicts = FALSE, + quietly = TRUE + )) + } +} + +stop_rgcca <- function( + message, + exit_code = "1", + call = NULL) { + + base::stop( + structure( + class = c(exit_code, "simpleError", "error", "condition"), + list(message = message, call. = NULL) + )) + } + +########## Main ########## + +# Get arguments : R packaging install, need an opt variable with associated +# arguments +opt <- list( + separator = 1, + type = "rgcca", + ncomp = 2, + penalty = 1, + scheme = 2, + block = 0, + compx = 1, + compy = 2, + nmark = 100, + o1 = "individuals.pdf", + o2 = "corcircle.pdf", + o3 = "top_variables.pdf", + o4 = "ave.pdf", + o5 = "design.pdf", + o6 = "individuals.tsv", + o7 = "variables.tsv", + o8 = "rgcca_result.RData", + datasets = paste0("inst/extdata/", + c("agriculture", "industry", "politic"), + ".tsv", + collapse = ",") +) + +load_libraries(c("ggplot2", "optparse", "scales", "igraph", "MASS", "rlang", "Deriv")) +try(load_libraries("ggrepel"), silent = TRUE) + +tryCatch( + opt <- check_arg(optparse::parse_args(get_args())), + error = function(e) { + if (length(grep("nextArg", e[[1]])) != 1) + stop_rgcca(e[[1]], exit_code = 140) + }, warning = function(w) + stop_rgcca(w[[1]], exit_code = 141) +) + +# Load functions +all_funcs <- unclass(lsf.str(envir = asNamespace("RGCCA"), all = T)) +for (i in all_funcs) + eval(parse(text = paste0(i, "<-RGCCA:::", i))) + +# Set missing parameters by default +opt$header <- !("header" %in% names(opt)) +opt$superblock <- !("superblock" %in% names(opt)) +opt$scale <- !("scale" %in% names(opt)) +opt$text <- !("text" %in% names(opt)) + +status <- 0 +tryCatch({ + + blocks <- load_blocks(opt$datasets, opt$names, opt$separator) + group <- load_response(blocks, opt$group, opt$separator, opt$header) + connection <- load_connection(file = opt$connection, separator = opt$separator) + + func <- quote( + rgcca( + blocks = blocks, + connection = connection, + response = opt$response, + superblock = opt$superblock, + ncomp = opt$ncomp, + scheme = opt$scheme, + scale = opt$scale, + type = opt$type + ) + ) + if (tolower(opt$type) %in% c("sgcca", "spca", "spls")) { + func[["sparsity"]] <- opt$penalty + }else { + func[["tau"]] <- opt$penalty + } + + rgcca_out <- eval(as.call(func)) + + opt <- post_check_arg(opt, rgcca_out) + + ########## Plot ########## + + if (rgcca_out$call$ncomp[opt$block] == 1 && is.null(opt$block_y)) { + warning("With a number of component of 1, a second block should be chosen to perform an individual plot") + } else { + ( + individual_plot <- plot_ind( + rgcca_out, + group, + opt$compx, + opt$compy, + opt$block, + opt$text, + opt$block_y, + "Response" + ) + ) + save_plot(opt$o1, individual_plot) + } + + if (rgcca_out$call$ncomp[opt$block] > 1) { + ( + corcircle <- plot_var_2D( + rgcca_out, + opt$compx, + opt$compy, + opt$block, + opt$text, + n_mark = opt$nmark + ) + ) + save_plot(opt$o2, corcircle) + } + + top_variables <- plot_var_1D( + rgcca_out, + opt$compx, + opt$nmark, + opt$block, + type = "cor" + ) + save_plot(opt$o3, top_variables) + + # Average Variance Explained + (ave <- plot_ave(rgcca_out)) + save_plot(opt$o4, ave) + + # Creates design scheme + design <- function() plot_network(rgcca_out) + save_plot(opt$o5, design) + + save_ind(rgcca_out, opt$compx, opt$compy, opt$o6) + save_var(rgcca_out, opt$compx, opt$compy, opt$o7) + save(rgcca_out, file = opt$o8) + + }, error = function(e) { + if (class(e)[1] %in% c("simpleError", "error", "condition")) + status <<- 1 + else + status <<- class(e)[1] + message(e$message) +}) +quit(status = status)