view tests/biosigner_tests.R @ 0:48e4be935243 draft

planemo upload for repository https://github.com/workflow4metabolomics/biosigner.git commit b8af709c9fd6ed283fc4e4249dcf692556927b2d
author ethevenot
date Wed, 27 Jul 2016 11:40:20 -0400
parents
children
line wrap: on
line source

library(RUnit)

wrapperF <- function(argVc) {


#### Start_of_testing_code <- function() {}


##------------------------------
## Initializing
##------------------------------

## options
##--------

strAsFacL <- options()$stringsAsFactors
options(stringsAsFactors = FALSE)

## libraries
##----------

suppressMessages(library(biosigner))

if(packageVersion("biosigner") < "1.0.0")
    stop("Please use 'biosigner' versions of 1.0.0 and above")
if(packageVersion("ropls") < "1.4.0")
    stop("Please use 'ropls' versions of 1.4.0 and above")

## constants
##----------

modNamC <- "Biosigner" ## module name

topEnvC <- environment()
flgC <- "\n"

## functions
##----------

flgF <- function(tesC,
                 envC = topEnvC,
                 txtC = NA) { ## management of warning and error messages

    tesL <- eval(parse(text = tesC), envir = envC)

    if(!tesL) {

        sink(NULL)
        stpTxtC <- ifelse(is.na(txtC),
                          paste0(tesC, " is FALSE"),
                          txtC)

        stop(stpTxtC,
             call. = FALSE)

    }

} ## flgF


## log file
##---------

sink(argVc["information"])

cat("\nStart of the '", modNamC, "' Galaxy module call: ",
    format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")


## arguments
##----------

xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"],
                              check.names = FALSE,
                              header = TRUE,
                              row.names = 1,
                              sep = "\t")))

samDF <- read.table(argVc["sampleMetadata_in"],
                    check.names = FALSE,
                    header = TRUE,
                    row.names = 1,
                    sep = "\t")
flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")

varDF <- read.table(argVc["variableMetadata_in"],
                    check.names = FALSE,
                    header = TRUE,
                    row.names = 1,
                    sep = "\t")
flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")

flgF("argVc['respC'] %in% colnames(samDF)",
     txtC = paste0("Class argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata"))
respVc <- samDF[, argVc["respC"]]
flgF("mode(respVc) == 'character'",
     txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only characters"))
respFc <- factor(respVc)
flgF("length(levels(respFc)) == 2",
     txtC = paste0("'", argVc['respC'], "' column of sampleMetadata does not contain only 2 types of characters (e.g., 'case' and 'control')"))
tierMaxC <- ifelse("tierC" %in% names(argVc), argVc["tierC"], "S")
pvalN <- ifelse("pvalN" %in% names(argVc), as.numeric(argVc["pvalN"]), 0.05)


##------------------------------
## Computation and plot
##------------------------------


sink()

optWrnN <- options()$warn
options(warn = -1)

if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
    set.seed(as.integer(argVc["seedI"]))

bsnLs <- biosign(x = xMN,
                 y = respFc,
                 methodVc = ifelse("methodC" %in% names(argVc), argVc["methodC"], "all"),
                 bootI = ifelse("bootI" %in% names(argVc), as.numeric(argVc["bootI"]), 50),
                 pvalN = pvalN,
                 printL = FALSE,
                 plotL = FALSE,
                 .sinkC = argVc["information"])

if("seedI" %in% names(argVc) && argVc["seedI"] != "0")
    set.seed(NULL)

tierMC <- bsnLs@tierMC

if(!is.null(tierMC)) {
    plot(bsnLs,
         tierMaxC = tierMaxC,
         file.pdfC = argVc["figure_tier"],
         .sinkC = argVc["information"])
    plot(bsnLs,
         tierMaxC = tierMaxC,
         typeC = "boxplot",
         file.pdfC = argVc["figure_boxplot"],
         .sinkC = argVc["information"])
} else {
    pdf(argVc["figure_tier"])
    plot(1, bty = "n", type = "n",
         xaxt = "n", yaxt = "n", xlab = "", ylab = "")
    text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
         labels = "No significant variable to display")
    dev.off()
    pdf(argVc["figure_boxplot"])
    plot(1, bty = "n", type = "n",
         xaxt = "n", yaxt = "n", xlab = "", ylab = "")
    text(mean(par("usr")[1:2]), mean(par("usr")[3:4]),
         labels = "No significant variable to display")
    dev.off()
}


options(warn = optWrnN)


##------------------------------
## Print
##------------------------------

sink(argVc["information"], append = TRUE)

tierFullVc <- c("S", LETTERS[1:5])
tierVc <- tierFullVc[1:which(tierFullVc == tierMaxC)]

if(sum(tierMC %in% tierVc)) {
    cat("\nSignificant features from '", paste(tierVc, collapse = "', '"), "' tiers:\n", sep = "")
    print(tierMC[apply(tierMC, 1, function(rowVc) sum(rowVc %in% tierVc) > 0), ,
                         drop = FALSE])
    cat("\nAccuracy:\n")
    print(round(getAccuracyMN(bsnLs), 3))
} else
    cat("\nNo significant variable found for any classifier\n")


##------------------------------
## Ending
##------------------------------

## Saving
##-------

if(!is.null(tierMC)) {
    tierDF <- data.frame(tier = sapply(rownames(varDF),
                             function(varC) {
                                 varTirVc <- tierMC[varC, ]
                                 varTirVc <- names(varTirVc)[varTirVc %in% tierVc]
                                 paste(varTirVc, collapse = "|")
                             }),
                         stringsAsFactors = FALSE)
    colnames(tierDF) <- paste(argVc["respC"],
                              colnames(tierDF),
                              paste(tierVc, collapse = ""),
                              sep = "_")
    varDF <- cbind.data.frame(varDF, tierDF)
}

## variableMetadata

varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
                          varDF)
write.table(varDF,
            file = argVc["variableMetadata_out"],
            quote = FALSE,
            row.names = FALSE,
            sep = "\t")


## Closing
##--------

cat("\nEnd of '", modNamC, "' Galaxy module call: ",
    as.character(Sys.time()), "\n", sep = "")

sink()

options(stringsAsFactors = strAsFacL)


#### End_of_testing_code <- function() {}


    return(list(bsnLs = bsnLs))


    rm(list = ls())


}


exaDirOutC <- "output"
if(!file.exists(exaDirOutC))
   stop("Please create an 'output' subfolder into the (current) 'tests' folder")

tesArgLs <- list(sacurine_all = c(respC = "gender",
                     methodC = "all",
                     bootI = "5",
                     pvalN = "0.05",
                     seedI = "123",
                     .chkC = "checkEqualsNumeric(getAccuracyMN(outLs[['bsnLs']])['AS', 'randomforest'], 0.8534348, tolerance = 1e-7)"))

for(tesC in names(tesArgLs))
    tesArgLs[[tesC]] <- c(tesArgLs[[tesC]],
                          dataMatrix_in = file.path(unlist(strsplit(tesC, "_"))[1], "dataMatrix.tsv"),
                          sampleMetadata_in = file.path(unlist(strsplit(tesC, "_"))[1], "sampleMetadata.tsv"),
                          variableMetadata_in = file.path(unlist(strsplit(tesC, "_"))[1], "variableMetadata.tsv"),
                          variableMetadata_out = file.path(exaDirOutC, "variableMetadata.tsv"),
                          figure_tier = file.path(exaDirOutC, "figure-tier.pdf"),
                          figure_boxplot = file.path(exaDirOutC, "figure-boxplot.pdf"),
                          information = file.path(exaDirOutC, "information.txt"))

for(tesC in names(tesArgLs)) {
    print(tesC)
    outLs <- wrapperF(tesArgLs[[tesC]])
    if(".chkC" %in% names(tesArgLs[[tesC]]))
        stopifnot(eval(parse(text = tesArgLs[[tesC]][[".chkC"]])))
}

message("Checks successfully completed")