diff tests/biosigner_tests.R @ 0:48e4be935243 draft

planemo upload for repository https://github.com/workflow4metabolomics/biosigner.git commit b8af709c9fd6ed283fc4e4249dcf692556927b2d
author ethevenot
date Wed, 27 Jul 2016 11:40:20 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/biosigner_tests.R	Wed Jul 27 11:40:20 2016 -0400
@@ -0,0 +1,264 @@
+library(RUnit)
+
+wrapperF <- function(argVc) {
+
+
+#### Start_of_testing_code <- function() {}
+
+
+##------------------------------
+## Initializing
+##------------------------------
+
+## options
+##--------
+
+strAsFacL <- options()$stringsAsFactors
+options(stringsAsFactors = FALSE)
+
+## libraries
+##----------
+
+suppressMessages(library(biosigner))
+
+if(packageVersion("biosigner") < "1.0.0")
+    stop("Please use 'biosigner' versions of 1.0.0 and above")
+if(packageVersion("ropls") < "1.4.0")
+    stop("Please use 'ropls' versions of 1.4.0 and above")
+
+## constants
+##----------
+
+modNamC <- "Biosigner" ## module name
+
+topEnvC <- environment()
+flgC <- "\n"
+
+## functions
+##----------
+
+flgF <- function(tesC,
+                 envC = topEnvC,
+                 txtC = NA) { ## management of warning and error messages
+
+    tesL <- eval(parse(text = tesC), envir = envC)
+
+    if(!tesL) {
+
+        sink(NULL)
+        stpTxtC <- ifelse(is.na(txtC),
+                          paste0(tesC, " is FALSE"),
+                          txtC)
+
+        stop(stpTxtC,
+             call. = FALSE)
+
+    }
+
+} ## flgF
+
+
+## log file
+##---------
+
+sink(argVc["information"])
+
+cat("\nStart of the '", modNamC, "' Galaxy module call: ",
+    format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
+
+
+## arguments
+##----------
+
+xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"],
+                              check.names = FALSE,
+                              header = TRUE,
+                              row.names = 1,
+                              sep = "\t")))
+
+samDF <- read.table(argVc["sampleMetadata_in"],
+                    check.names = FALSE,
+                    header = TRUE,
+                    row.names = 1,
+                    sep = "\t")
+flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
+
+varDF <- read.table(argVc["variableMetadata_in"],
+                    check.names = FALSE,
+                    header = TRUE,
+                    row.names = 1,
+                    sep = "\t")
+flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
+
+flgF("argVc['respC'] %in% colnames(samDF)",
+     txtC = paste0("Class argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata"))
+respVc <- samDF[, argVc["respC"]]
+flgF("mode(respVc) == 'character'",
+     txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only characters"))
+respFc <- factor(respVc)
+flgF("length(levels(respFc)) == 2",
+     txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only 2 types of characters (e.g., 'case' and 'control')"))
+tierMaxC <- ifelse("tierC" %in% names(argVc), argVc["tierC"], "S")
+pvalN <- ifelse("pvalN" %in% names(argVc), as.numeric(argVc["pvalN"]), 0.05)
+
+
+##------------------------------
+## Computation and plot
+##------------------------------
+
+
+sink()
+
+optWrnN <- options()$warn
+options(warn = -1)
+
+if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
+    set.seed(as.integer(argVc["seedI"]))
+
+bsnLs <- biosign(x = xMN,
+                 y = respFc,
+                 methodVc = ifelse("methodC" %in% names(argVc), argVc["methodC"], "all"),
+                 bootI = ifelse("bootI" %in% names(argVc), as.numeric(argVc["bootI"]), 50),
+                 pvalN = pvalN,
+                 printL = FALSE,
+                 plotL = FALSE,
+                 .sinkC = argVc["information"])
+
+if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
+    set.seed(NULL)
+
+tierMC <- bsnLs@tierMC
+
+if(!is.null(tierMC)) {
+    plot(bsnLs,
+         tierMaxC = tierMaxC,
+         file.pdfC = argVc["figure_tier"],
+         .sinkC = argVc["information"])
+    plot(bsnLs,
+         tierMaxC = tierMaxC,
+         typeC = "boxplot",
+         file.pdfC = argVc["figure_boxplot"],
+         .sinkC = argVc["information"])
+} else {
+    pdf(argVc["figure_tier"])
+    plot(1, bty = "n", type = "n",
+         xaxt = "n", yaxt = "n", xlab = "", ylab = "")
+    text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
+         labels = "No significant variable to display")
+    dev.off()
+    pdf(argVc["figure_boxplot"])
+    plot(1, bty = "n", type = "n",
+         xaxt = "n", yaxt = "n", xlab = "", ylab = "")
+    text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
+         labels = "No significant variable to display")
+    dev.off()
+}
+
+
+options(warn = optWrnN)
+
+
+##------------------------------
+## Print
+##------------------------------
+
+sink(argVc["information"], append = TRUE)
+
+tierFullVc <- c("S", LETTERS[1:5])
+tierVc <- tierFullVc[1:which(tierFullVc == tierMaxC)]
+
+if(sum(tierMC %in% tierVc)) {
+    cat("\nSignificant features from '", paste(tierVc, collapse = "', '"), "' tiers:\n", sep = "")
+    print(tierMC[apply(tierMC, 1, function(rowVc) sum(rowVc %in% tierVc) > 0), ,
+                         drop = FALSE])
+    cat("\nAccuracy:\n")
+    print(round(getAccuracyMN(bsnLs), 3))
+} else
+    cat("\nNo significant variable found for any classifier\n")
+
+
+##------------------------------
+## Ending
+##------------------------------
+
+## Saving
+##-------
+
+if(!is.null(tierMC)) {
+    tierDF <- data.frame(tier = sapply(rownames(varDF),
+                             function(varC) {
+                                 varTirVc <- tierMC[varC, ]
+                                 varTirVc <- names(varTirVc)[varTirVc %in% tierVc]
+                                 paste(varTirVc, collapse = "|")
+                             }),
+                         stringsAsFactors = FALSE)
+    colnames(tierDF) <- paste(argVc["respC"],
+                              colnames(tierDF),
+                              paste(tierVc, collapse = ""),
+                              sep = "_")
+    varDF <- cbind.data.frame(varDF, tierDF)
+}
+
+## variableMetadata
+
+varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
+                          varDF)
+write.table(varDF,
+            file = argVc["variableMetadata_out"],
+            quote = FALSE,
+            row.names = FALSE,
+            sep = "\t")
+
+
+## Closing
+##--------
+
+cat("\nEnd of '", modNamC, "' Galaxy module call: ",
+    as.character(Sys.time()), "\n", sep = "")
+
+sink()
+
+options(stringsAsFactors = strAsFacL)
+
+
+#### End_of_testing_code <- function() {}
+
+
+    return(list(bsnLs = bsnLs))
+
+
+    rm(list = ls())
+
+
+}
+
+
+exaDirOutC <- "output"
+if(!file.exists(exaDirOutC))
+   stop("Please create an 'output' subfolder into the (current) 'tests' folder")
+
+tesArgLs <- list(sacurine_all = c(respC = "gender",
+                     methodC = "all",
+                     bootI = "5",
+                     pvalN = "0.05",
+                     seedI = "123",
+                     .chkC = "checkEqualsNumeric(getAccuracyMN(outLs[['bsnLs']])['AS', 'randomforest'], 0.8534348, tolerance = 1e-7)"))
+
+for(tesC in names(tesArgLs))
+    tesArgLs[[tesC]] <- c(tesArgLs[[tesC]],
+                          dataMatrix_in = file.path(unlist(strsplit(tesC, "_"))[1], "dataMatrix.tsv"),
+                          sampleMetadata_in = file.path(unlist(strsplit(tesC, "_"))[1], "sampleMetadata.tsv"),
+                          variableMetadata_in = file.path(unlist(strsplit(tesC, "_"))[1], "variableMetadata.tsv"),
+                          variableMetadata_out = file.path(exaDirOutC, "variableMetadata.tsv"),
+                          figure_tier = file.path(exaDirOutC, "figure-tier.pdf"),
+                          figure_boxplot = file.path(exaDirOutC, "figure-boxplot.pdf"),
+                          information = file.path(exaDirOutC, "information.txt"))
+
+for(tesC in names(tesArgLs)) {
+    print(tesC)
+    outLs <- wrapperF(tesArgLs[[tesC]])
+    if(".chkC" %in% names(tesArgLs[[tesC]]))
+        stopifnot(eval(parse(text = tesArgLs[[tesC]][[".chkC"]])))
+}
+
+message("Checks successfully completed")