Mercurial > repos > ethevenot > qualitymetrics
diff qualitymetrics_script.R @ 0:b4f5b5bc01dd draft
planemo upload for repository https://github.com/workflow4metabolomics/qualitymetrics.git commit 73366dd3473c509341ab9ba1df8ba748d08a50a1
author | ethevenot |
---|---|
date | Sat, 06 Aug 2016 12:01:17 -0400 |
parents | |
children | 6d3b7b6573d8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/qualitymetrics_script.R Sat Aug 06 12:01:17 2016 -0400 @@ -0,0 +1,896 @@ +################################################################################################ +# ANALYSES FOR QUALITY CONTROL # +# # +# Author: Melanie PETERA # +# User: Galaxy # +# Starting date: 04-09-2014 # +# V-1.0: Restriction of old filter script to CV filter # +# V-1.1: Addition of data check # +# V-1.2: Substitution of deletion by addition of indicator variable # +# V-1.3: Handling special characters # +# # +# # +# Input files: dataMatrix ; sampleMetadata ; variableMetadata # +# Output files: dataMatrix ; sampleMetadata ; variableMetadata # +# # +################################################################################################ + +# Parameters (for dev) +if(FALSE){ + + ion.file.in <- "test/ressources/inputs/ex_data_IONS.txt" #tab file + meta.samp.file.in <- "test/ressources/inputs/ex_data_PROTOCOLE1.txt" #tab file + meta.ion.file.in <- "test/ressources/inputs/ex_data_METAION.txt" #tab file + + ## ion.file.out <- "test/ressources/outputs/QCtest_ex_data_IONS.txt" #tab file + meta.samp.file.out <- "test/ressources/outputs/QCtest_ex_data_PROTOCOLE1.txt" #tab file + meta.ion.file.out <- "test/ressources/outputs/QCtest_ex_data_METAION.txt" #tab file + + CV <- TRUE ; if(CV){Compa<-TRUE;seuil<-1.25}else{Compa<-NULL;seuil<-NULL} + + poolAsPool1L <- FALSE + + if(FALSE) { ## Sacuri dataset + + ## 'example' input dir + exaDirInpC <- "example/input" + + ion.file.in <- file.path(exaDirInpC, "dataMatrix.tsv") + meta.samp.file.in <- file.path(exaDirInpC, "sampleMetadata.tsv") + meta.ion.file.in <- file.path(exaDirInpC, "variableMetadata.tsv") + + poolAsPool1L <- FALSE + + ## 'example' output dir + exaDirOutC <- gsub("input", "output", exaDirInpC) + + mata.samp.file.out <- file.path(exaDirOutC, "sampleMetadata.tsv") + meta.ion.file_out <- file.path(exaDirOutC, "variableMetadata.tsv") + fig.out <- file.path(exaDirOutC, "figure.pdf") + log.out <- file.path(exaDirOutC, "information.txt") + + stopifnot(file.exists(exaDirOutC)) + + } + +} + +QualityControl <- function(ion.file.in, meta.samp.file.in, meta.ion.file.in, + CV, Compa, seuil, poolAsPool1L, + ion.file.out, meta.samp.file.out, meta.ion.file.out, fig.out, log.out){ + # This function allows to analyse data to check its quality + # It needs 3 datasets: the data matrix, the variables' metadata, the samples' metadata. + # It generates 3 new datasets corresponding to the 3 inputs with additional columns. + # + # Parameters: + # - xxx.in: input files' names + # - xxx.out: output files' names + # - CV: CV calculation yes/no + # | > Compa: comparing pool and sample CVs (TRUE) or simple pool CV calculation (FALSE) + # | > seuil: maximum ratio tolerated between pool and sample CVs or maximum pool CV + + +# Input ----------------------------------------------------------------------------------- + +ion.data <- read.table(ion.file.in,sep="\t",header=TRUE,check.names=FALSE, stringsAsFactors = FALSE) +meta.samp.data <- read.table(meta.samp.file.in,sep="\t",header=TRUE,check.names=FALSE, stringsAsFactors = FALSE) +meta.ion.data <- read.table(meta.ion.file.in,sep="\t",header=TRUE,check.names=FALSE, stringsAsFactors = FALSE) + +# Error vector +err.stock <- "\n" + +# Table match check +table.check <- match3(ion.data,meta.samp.data,meta.ion.data) +check.err(table.check) + +# StockID +samp.id <- stockID(ion.data,meta.samp.data,"sample") +ion.data <- samp.id$dataMatrix +meta.samp.data <- samp.id$Metadata +samp.id <- samp.id$id.match + + +# Function 1: CV calculation -------------------------------------------------------------- +# Allows to class ions according to the Coefficient of Variation (CV): +# Compa=TRUE: +# CV of pools and CV of samples are compared (ration between pools' one and samples' one) +# and confronted to a given ration. +# Compa=FALSE: +# only CV of pools are considered ; compared to a given threshold + +if(CV){ + + # Checking the sampleType variable + if(is.null(meta.samp.data$sampleType)){ + err.stock <- c(err.stock,"\n-------", + "\nWarning : no 'sampleType' variable detected in sample meta-data !", + "\nCV can not be calculated.\n-------\n") + }else{ + if(!("pool"%in%levels(factor(meta.samp.data$sampleType)))){ + err.stock <- c(err.stock,"\n-------", + "\nWarning : no 'pool' detected in 'sampleType' variable (sample meta-data) !", + "\nCV can not be calculated.\n-------\n") + }else{ + if((!("sample"%in%levels(factor(meta.samp.data$sampleType))))&(Compa)){ + err.stock <- c(err.stock,"\n-------", + "\nWarning : no 'sample' detected in 'sampleType' variable (sample meta-data) !", + "\nCV can not be calculated.\n-------\n") + }else{ + + # Statement + tmp.ion <- data.frame(CV.ind=rep(NA,nrow(ion.data)),CV.samp=rep(NA,nrow(ion.data)), + CV.pool=rep(NA,nrow(ion.data)),ion.data,stringsAsFactors=FALSE) + # CV samples + tmp.samp <- which(colnames(tmp.ion)%in%meta.samp.data[which(meta.samp.data$sampleType=="sample"),1]) + tmp.ion$CV.samp <- apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE)) / rowMeans(tmp.ion[,tmp.samp], na.rm = TRUE) + tmp.ion$CV.samp[which(apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE))==0)] <- 0 + # CV pools + tmp.samp <- which(colnames(tmp.ion)%in%meta.samp.data[which(meta.samp.data$sampleType=="pool"),1]) + tmp.ion$CV.pool <- apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE)) / rowMeans(tmp.ion[,tmp.samp], na.rm = TRUE) + tmp.ion$CV.pool[which(apply(tmp.ion[,tmp.samp],1,function(x)sd(x, na.rm = TRUE))==0)] <- 0 + # CV indicator + if(Compa){tmp.ion$CV.ind <- ifelse((tmp.ion$CV.pool)/(tmp.ion$CV.samp)>seuil,0,1) + }else{tmp.ion$CV.ind <- ifelse((tmp.ion$CV.pool)>seuil,0,1)} + # Addition of new columns in meta.ion.data + if(Compa){tmp.ion<-tmp.ion[,c(4,2,3,1,1)]}else{tmp.ion<-tmp.ion[,c(4,3,1,1)]} + tmp.ion[,ncol(tmp.ion)] <- 1:nrow(tmp.ion) + meta.ion.data <- merge(x=meta.ion.data,y=tmp.ion,by.x=1,by.y=1) + meta.ion.data <- meta.ion.data[order(meta.ion.data[,ncol(meta.ion.data)]),][,-ncol(meta.ion.data)] + rownames(meta.ion.data) <- NULL + + rm(tmp.ion,tmp.samp) + + }}} + +} # end if(CV) + +## complementary metrics (ET) + +datMN <- t(as.matrix(ion.data[, -1])) +colnames(datMN) <- ion.data[, 1] +datMN <- datMN[, meta.ion.data[, 1]] ## in case meta.ion.data has been re-ordered during the CV = TRUE computations +quaLs <- qualityMetricsF(datMN, + meta.samp.data, + meta.ion.data, + poolAsPool1L, + fig.out, + log.out) +meta.samp.data <- quaLs[["samDF"]] +meta.ion.data <- quaLs[["varDF"]] + + +# Output ---------------------------------------------------------------------------------- + +# Getting back original identifiers +id.ori <- reproduceID(ion.data,meta.samp.data,"sample",samp.id) +ion.data <- id.ori$dataMatrix +meta.samp.data <- id.ori$Metadata + + +# Error checking +if(length(err.stock)>1){ + stop(err.stock) +}else{ + +## write.table(ion.data, ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) +write.table(meta.samp.data, meta.samp.file.out, sep="\t", row.names=FALSE, quote=FALSE) +write.table(meta.ion.data, meta.ion.file.out, sep="\t", row.names=FALSE, quote=FALSE) + +} + + +} # end of QualityControl function + + +# Typical function call +# QualityControl(ion.file.in, meta.samp.file.in, meta.ion.file.in, +# CV, Compa, seuil, +# ion.file.out, meta.samp.file.out, meta.ion.file.out) + + +qualityMetricsF <- function(datMN, + samDF, + varDF, + pooAsPo1L = TRUE, + fig.pdfC = NULL, + log.txtC = NULL) { + + optWrnN <- options()$warn + options(warn = -1) + + + ##------------------------------ + ## Functions + ##------------------------------ + + + allDigF <- function (string) { ## from the Hmisc package (all.digits) + k <- length(string) + result <- logical(k) + for (i in 1:k) { + st <- string[i] + ls <- nchar(st) + ex <- substring(st, 1:ls, 1:ls) + result[i] <- all(match(ex, c("0", "1", "2", "3", "4", + "5", "6", "7", "8", "9"), nomatch = 0) > 0) + } + result + } + + datPloF <- function() { ## ploting data matrix + + thrVn <- c(pvalue=0.001, + poolCv=0.3) + + ## Constants + + marLs <- list(dri = c(2.1, 2.6, 1.1, 1.1), + ima = c(1.1, 2.6, 4.1, 1.1), + msd = c(2.1, 2.6, 1.1, 0.6), + sam = c(3.1, 3.6, 1.1, 0.6), + pca = c(2.6, 3.6, 1.1, 0.6), + sca = c(1.1, 4.1, 4.1, 0.6), + tit = c(0.1, 0.6, 1.1, 0.6)) + palHeaVc <- rev(rainbow(ceiling(256 * 1.5))[1:256]) + + ## Functions + + axiPreF <- function(valVn, + lenN) { + + if(NA %in% valVn) { + warning("NA in valVn") + valVn <- as.vector(na.omit(valVn)) + } + + if(lenN < length(valVn)) + stop("The length of in vector must be inferior to the length of the length parameter.") + + if(length(valVn) < lenN) + valVn <- seq(from = min(valVn), to = max(valVn), length.out = lenN) + + preValVn <- pretty(valVn) + + preLabVn <- preAtVn <- c() + + for(n in 1:length(preValVn)) + if(min(valVn) < preValVn[n] && preValVn[n] < max(valVn)) { + preLabVn <- c(preLabVn, preValVn[n]) + preAtVn <- c(preAtVn, which(abs(valVn - preValVn[n]) == min(abs(valVn - preValVn[n])))[1]) + } + + return(list(atVn = preAtVn, + labVn = preLabVn)) + + } + + colF <- function(vecVn) + sapply(vecVn, + function(outN) { + if(outN < ploRgeVn[1]) + return(palHeaVc[1]) + else if(outN > ploRgeVn[2]) + return(palHeaVc[256]) + else return(palHeaVc[round((outN - ploRgeVn[1]) / diff(ploRgeVn) * 256 + 1)])}) + + obsColF <- function(typVc) { + + ## available color palette + palVc <- palette() + + ## colors for common types are set aside + palVc <- palVc[!(palVc %in% c("black", "red", "green3"))] + + ## filling in the types with dedicated colors + samTypVc <- sort(unique(samDF[, "sampleType"])) + samColVc <- character(length(samTypVc)) + if("blank" %in% samTypVc) + samColVc[grepl("blank", samTypVc)] <- "black" + if("pool" %in% samTypVc) + samColVc[grepl("pool", samTypVc)] <- "red" + if("sample" %in% samTypVc) + samColVc[grepl("sample", samTypVc)] <- "green4" + + ## filling in the other types + palColI <- 1 + palColMaxI <- length(palVc) + + while(any(samColVc == "")) { + typToColI <- which(samColVc == "")[1] + if(palColI <= palColMaxI) + samColVc[typToColI] <- palVc[palColI] + else + samColVc[typToColI] <- "gray" + palColI <- palColI + 1 + } + + names(samColVc) <- samTypVc + + samColVc[typVc] + + } + + par(font = 2, + font.axis = 2, + font.lab = 2, + pch=18) + + layout(matrix(c(1, 3, 4, 5, 5, + 1, 7, 7, 7, 6, + 2, 7, 7, 7, 6), + byrow = TRUE, + nrow = 3), + heights = c(1.8, 1.2, 2.5), + widths = c(3.5, 1.8, 2.8, 1, 0.8)) + + ## Colors + ##------- + + if("sampleType" %in% colnames(samDF)) { + obsColVc <- obsColF(samDF[, "sampleType"]) + } else + obsColVc <- rep("black", nrow(samDF)) + + ## PCA and Hotelling ellipse + ##-------------------------- + + vVn <- getPcaVarVn(ropLs) + vRelVn <- vVn / ncol(datMN) + + par(mar = marLs[["pca"]]) + + plot(ropScoreMN, + type = "n", + xlab = "", + ylab = "", + xlim = range(ropScoreMN[, 1]) * 1.1) + mtext(paste("t1 (", round(vRelVn[1] * 100), "%)", sep = ""), + cex = 0.7, + line = 2, + side = 1) + mtext(paste("t2 (", round(vRelVn[2] * 100), "%)", sep = ""), + cex = 0.7, + las = 0, + line = 2, + side = 2) + abline(h = 0, lty = "dashed") + abline(v = 0, lty = "dashed") + radVn <- seq(0, 2 * pi, length.out = 100) + + hotFisN <- hotN * qf(1 - thrVn["pvalue"], 2, n - 2) + lines(sqrt(var(ropScoreMN[, 1]) * hotFisN) * cos(radVn), + sqrt(var(ropScoreMN[, 2]) * hotFisN) * sin(radVn)) + + text(ropScoreMN[, 1], + ropScoreMN[, 2], + cex = 0.7, + col = obsColVc, + labels = rownames(datMN)) + + if("sampleType" %in% colnames(samDF)) { + obsColVuc <- obsColVc[sort(unique(names(obsColVc)))] + legOrdVc <- c("blank", paste0("pool", 8:1), "pool", "other", "sample") + obsColVuc <- obsColVuc[legOrdVc[legOrdVc %in% names(obsColVuc)]] + + text(rep(par("usr")[1], times = length(obsColVuc)), + par("usr")[3] + (0.97 - length(obsColVuc) * 0.03 + 1:length(obsColVuc) * 0.03) * diff(par("usr")[3:4]), + col = obsColVuc, + font = 2, + labels = names(obsColVuc), + pos = 4) + } + + ## Missing/low intensities and decile values + ##------------------------------------------ + + par(mar = marLs[["sam"]]) + + plot(missZscoVn, + deciZscoMaxVn, + type = "n", + xlab = "", + ylab = "", + xlim = c(min(missZscoVn), + max(missZscoVn) + 0.5)) + mtext("amount of missing values (z-score)", + cex = 0.7, + line = 2, + side = 1) + mtext("deciles (zscore)", + cex = 0.7, + las = 0, + line = 2, + side = 2) + abline(h = qnorm(1 - thrVn["pvalue"] / 2) * c(-1, 1), lty = "dashed") + abline(v = qnorm(1 - thrVn["pvalue"] / 2) * c(-1, 1), lty = "dashed") + text(missZscoVn, + deciZscoMaxVn, + cex = 0.7, + col = obsColVc, + labels = rownames(datMN)) + + ## tit: Title + ##----------- + + par(mar = marLs[["tit"]]) + plot(0:1, bty = "n", type = "n", xaxt = "n", yaxt = "n", xlab = "", ylab = "") + text(1.5, 1, cex = 1.3, labels = "Quality Metrics") + text(1, 0.85, adj=0, cex = 1.1, labels = paste0("NAs: ", + round(length(which(is.na(c(datMN)))) / cumprod(dim(datMN))[2] * 100), "%")) + text(1, 0.75, adj=0, cex = 1.1, labels = paste0("0 values: ", + round(sum(abs(datMN) < epsN, na.rm=TRUE) / cumprod(dim(datMN))[2] * 100, 2), "%")) + text(1, 0.65, adj=0, cex = 1.1, labels = paste0("min: ", signif(min(datMN, na.rm=TRUE), 2))) + text(1, 0.55, adj=0, cex = 1.1, labels = paste0("median: ", signif(median(datMN, na.rm=TRUE), 2))) + text(1, 0.45, adj=0, cex = 1.1, labels = paste0("mean: ", signif(mean(datMN, na.rm=TRUE), 2))) + text(1, 0.35, adj=0, cex = 1.1, labels = paste0("max: ", signif(max(datMN, na.rm=TRUE), 2))) + if("sampleType" %in% colnames(samDF) && + "pool" %in% samDF[, "sampleType"]) + text(1, + 0.25, + adj=0, cex = 1.1, + labels = paste0("pool CV < ", + round(thrVn["poolCv"] * 100), "%: ", + round(sum(varDF[, "pool_CV"] < thrVn["poolCv"]) / nrow(varDF) * 100), + "%")) + + text(1, 0.1, adj=0, labels = paste0("Thresholds used in plots:")) + text(1, 0, adj=0, labels = paste0(" p-value = ", thrVn["pvalue"])) + + ## dri: Analytical drift + ##---------------------- + + par(mar = marLs[["dri"]]) + + ## ordering + + driDatMN <- datMN + driSamDF <- samDF + + driSamDF[, "ordIniVi"] <- 1:nrow(driDatMN) + + if("injectionOrder" %in% colnames(driSamDF)) { + if("batch" %in% colnames(driSamDF)) + ordVi <- order(driSamDF[, "batch"], + driSamDF[, "injectionOrder"]) + else + ordVi <- order(driSamDF[, "injectionOrder"]) + } else + ordVi <- 1:nrow(driDatMN) + + driDatMN <- driDatMN[ordVi, ] + driSamDF <- driSamDF[ordVi, ] + + driColVc <- rep("black", nrow(driDatMN)) + if("sampleType" %in% colnames(driSamDF)) + driColVc <- obsColF(driSamDF[, "sampleType"]) + + plot(rowSums(driDatMN, na.rm=TRUE), + col = driColVc, + pch = 18, + xlab = "", + ylab = "") + + mtext("injection order", + cex = 0.7, + line = 2, + side = 1) + + mtext("Sum of intens. for all variables", + cex = 0.7, + line = 2, + side = 2) + + ## msd: Sd vs Mean plot + ##--------------------- + + par(mar = marLs[["msd"]]) + plot(apply(datMN, 2, function(y) mean(y, na.rm = TRUE)), + apply(datMN, 2, function(y) sd(y, na.rm = TRUE)), + col=obsColVc, + pch = 18, + xlab = "", + ylab = "") + mtext("mean", + cex = 0.7, + line = 2, + side = 1) + mtext("sd", + cex = 0.7, + line = 2, + side = 2) + + ## sca-6: Color scale + ##------------------- + + par(mar = marLs[["sca"]]) + + ylimVn <- c(0, 256) + ybottomVn <- 0:255 + ytopVn <- 1:256 + + plot(x = 0, + y = 0, + font.axis = 2, + font.lab = 2, + type = "n", + xlim = c(0, 1), + ylim = ylimVn, + xlab = "", + ylab = "", + xaxs = "i", + yaxs = "i", + xaxt = "n", + yaxt = "n") + + rect(xleft = 0, + ybottom = ybottomVn, + xright = 1, + ytop = ytopVn, + col = palHeaVc, + border = NA) + + eval(parse(text = paste("axis(at = axiPreF(c(ifelse(min(datMN, na.rm = TRUE) == -Inf, yes = 0, no = min(datMN, na.rm = TRUE)) , max(datMN, na.rm = TRUE)), 256)$atVn, + font = 2, + font.axis = 2, + labels = axiPreF(c(ifelse(min(datMN, na.rm = TRUE) == -Inf, yes = 0, no = min(datMN, na.rm = TRUE)), max(datMN, na.rm = TRUE)), 256)$labVn, + las = 1, + lwd = 2, + lwd.ticks = 2, + side = 2, + xpd = TRUE)", sep = ""))) + + arrows(par("usr")[1], + par("usr")[4], + par("usr")[1], + par("usr")[3], + code = 0, + lwd = 2, + xpd = TRUE) + + ## ima: Image + ##----------- + + par(mar = marLs[["ima"]]) + + ploRgeVn <- range(datMN, na.rm = TRUE) + + imaMN <- t(datMN)[, rev(1:nrow(datMN)), drop = FALSE] + + image(x = 1:nrow(imaMN), + y = 1:ncol(imaMN), + z = imaMN, + col = palHeaVc, + font.axis = 2, + font.lab = 2, + xaxt = "n", + yaxt = "n", + xlab = "", + ylab = "") + + if(length(rownames(datMN)) == 0) { + rowNamVc <- rep("", times = nrow(datMN)) + } else + rowNamVc <- rownames(datMN) + + if(length(colnames(datMN)) == 0) { + colNamVc <- rep("", times = ncol(datMN)) + } else + colNamVc <- colnames(datMN) + + xlaVc <- paste(paste(rep("[", 2), + c(1, nrow(imaMN)), + rep("] ", 2), + sep = ""), + rep("\n", times = 2), + c(colNamVc[1], tail(colNamVc, 1)), + sep = "") + + for(k in 1:2) + axis(side = 3, + hadj = c(0, 1)[k], + at = c(1, nrow(imaMN))[k], + cex = 0.8, + font = 2, + labels = xlaVc[k], + line = -0.5, + tick = FALSE) + + + ylaVc <- paste(paste(rep("[", times = 2), + c(ncol(imaMN), 1), + rep("]", times = 2), + sep = ""), + rep("\n", times = 2), + c(tail(rowNamVc, 1), rowNamVc[1]), + sep = "") + + for(k in 1:2) + axis(side = 2, + at = c(1, ncol(imaMN))[k], + cex = 0.8, + font = 2, + hadj = c(0, 1)[k], + labels = ylaVc[k], + las = 0, + line = -0.5, + lty = "blank", + tick = FALSE) + + box(lwd = 2) + + + } + + + zScoreF <- function(x) { + sdxN <- sd(x, na.rm = TRUE) + if(sdxN < epsN) + return(rep(0, length(x))) + else + return((x - mean(x, na.rm = TRUE)) / sdxN) + } + + + ## Option + ##------- + + strAsFacL <- options()$stringsAsFactors + options(stingsAsFactors = FALSE) + + ## Constants + ##---------- + + epsN <- .Machine[["double.eps"]] ## [1] 2.22e-16 + + + ##------------------------------ + ## Start + ##------------------------------ + + if(!is.null(log.txtC)) + sink(log.txtC) + + ## Description + ##------------ + + cat("\n\nData description:\n\n", sep = "") + cat("observations:", nrow(datMN), "\n") + cat("variables:", ncol(datMN), "\n") + cat("missing:", sum(is.na(datMN)), "\n") + cat("0 values (%):", + sum(abs(datMN) < epsN, na.rm=TRUE) / cumprod(dim(datMN))[2] * 100, "\n") + cat("min:", min(datMN, na.rm=TRUE), "\n") + cat("mean:", signif(mean(datMN, na.rm=TRUE), 2), "\n") + cat("median:", signif(median(datMN, na.rm=TRUE), 2), "\n") + cat("max:", signif(max(datMN, na.rm=TRUE), 2), "\n") + + if("sampleType" %in% colnames(samDF)) { + cat("\nSample types:\n", sep = "") + print(table(samDF[, "sampleType"])) + cat("\n", sep="") + } + + + ##------------------------------ + ## Variable metrics + ##------------------------------ + + + ## 'blank' observations + + if("sampleType" %in% colnames(samDF) && "blank" %in% samDF[, "sampleType"]) { + + cat("\nVariables: Blank mean, sd, and CV\n", sep="") + + blkVl <- samDF[, "sampleType"] == "blank" + + if(sum(blkVl) == 1) + varDF[, "blank_mean"] <- datMN[blkVl, ] + else + varDF[, "blank_mean"] <- apply(datMN[blkVl, , drop=FALSE], 2, function(varVn) mean(varVn, na.rm=TRUE)) + + if(sum(blkVl) == 1) + varDF[, "blank_sd"] <- rep(0, nrow(varDF)) + else + varDF[, "blank_sd"] <- apply(datMN[blkVl, , drop=FALSE], 2, function(varVn) sd(varVn, na.rm=TRUE)) + + varDF[, "blank_CV"] <- varDF[, "blank_sd"] / varDF[, "blank_mean"] + + } + + + ## 'sample' observations + + if("sampleType" %in% colnames(samDF) && "sample" %in% samDF[, "sampleType"]) { + + cat("\nVariables: Sample mean, sd, and CV\n", sep="") + + samVl <- samDF[, "sampleType"] == "sample" + + if(sum(samVl) == 1) + varDF[, "sample_mean"] <- datMN[samVl, ] + else + varDF[, "sample_mean"] <- apply(datMN[samVl, , drop=FALSE], 2, function(varVn) mean(varVn, na.rm=TRUE)) + + if(sum(samVl) == 1) + varDF[, "sample_sd"] <- rep(0, nrow(varDF)) + else + varDF[, "sample_sd"] <- apply(datMN[samVl, , drop=FALSE], 2, function(varVn) sd(varVn, na.rm=TRUE)) + + varDF[, "sample_CV"] <- varDF[, "sample_sd"] / varDF[, "sample_mean"] + + } + + ## 'blank' mean / 'sample' mean ratio + + if(all(c("blank_mean", "sample_mean") %in% colnames(varDF))) { + + cat("\nVariables: Blank mean over sample mean\n", sep="") + + varDF[, "blankMean_over_sampleMean"] <- varDF[, "blank_mean"] / varDF[, "sample_mean"] + + } + + ## 'pool' observations + + if("sampleType" %in% colnames(samDF) && "pool" %in% samDF[, "sampleType"]) { + + cat("\nVariables: Pool mean, sd, and CV\n", sep="") + + pooVl <- samDF[, "sampleType"] == "pool" + + if(sum(pooVl) == 1) + varDF[, "pool_mean"] <- datMN[pooVl, ] + else + varDF[, "pool_mean"] <- apply(datMN[pooVl, , drop=FALSE], 2, function(varVn) mean(varVn, na.rm=TRUE)) + + if(sum(pooVl) == 1) + varDF[, "pool_sd"] <- rep(0, nrow(varDF)) + else + varDF[, "pool_sd"] <- apply(datMN[pooVl, , drop=FALSE], 2, function(varVn) sd(varVn, na.rm=TRUE)) + + varDF[, "pool_CV"] <- varDF[, "pool_sd"] / varDF[, "pool_mean"] + + } + + ## 'pool' CV / 'sample' CV ratio + + if(all(c("pool_CV", "sample_CV") %in% colnames(varDF))) { + + cat("\nVariables: Pool CV over sample CV\n", sep="") + + varDF[, "poolCV_over_sampleCV"] <- varDF[, "pool_CV"] / varDF[, "sample_CV"] + + } + + + ## 'pool' dilutions + + if("sampleType" %in% colnames(samDF) && any(grepl("pool.+", samDF[, "sampleType"]))) { + + pooVi <- grep("pool.*", samDF[, "sampleType"]) ## pool, pool2, pool4, poolInter, ... + + pooNamVc <- samDF[pooVi, "sampleType"] + + if(pooAsPo1L) { + + pooNamVc[pooNamVc == "pool"] <- "pool1" ## 'pool' -> 'pool1' + + } else { + + pooVl <- pooNamVc == "pool" + pooVi <- pooVi[!pooVl] + pooNamVc <- pooNamVc[!pooVl] + + } + + pooDilVc <- gsub("pool", "", pooNamVc) + + pooDilVl <- sapply(pooDilVc, allDigF) + + if(sum(pooDilVl)) { + + cat("\nVariables: Pool dilutions\n", sep="") + + pooNamVc <- pooNamVc[pooDilVl] ## for the plot + + pooVi <- pooVi[pooDilVl] + + dilVn <- 1 / as.numeric(pooDilVc[pooDilVl]) + + varDF[, "poolDil_correl"] <- apply(datMN[pooVi, , drop=FALSE], 2, + function(varVn) cor(dilVn, varVn)) + + varDF[, "poolDil_pval"] <- apply(datMN[pooVi, , drop=FALSE], 2, + function(varVn) cor.test(dilVn, varVn)[["p.value"]]) + + } + + } + + + ##------------------------------ + ## Sample metrics + ##------------------------------ + + + ## Hotelling: p-value associated to the distance from the center in the first PCA score plane + + cat("\nObservations: Hotelling ellipse\n", sep="") + + ropLs <- opls(datMN, predI = 2, plotL = FALSE, printL = FALSE) + + ropScoreMN <- getScoreMN(ropLs) + + invCovScoMN <- solve(cov(ropScoreMN)) + + n <- nrow(datMN) + hotN <- 2 * (n - 1) * (n^2 - 1) / (n^2 * (n - 2)) + + hotPvaVn <- apply(ropScoreMN, + 1, + function(x) + 1 - pf(1 / hotN * t(as.matrix(x)) %*% invCovScoMN %*% as.matrix(x), 2, n - 2)) + + samDF[, "hotelling_pval"] <- hotPvaVn + + ## p-value associated to number of missing values + + cat("\nObservations: Missing values\n", sep="") + + missZscoVn <- zScoreF(apply(datMN, + 1, + function(rowVn) { + sum(is.na(rowVn)) + })) + + samDF[, "missing_pval"] <- sapply(missZscoVn, function(zscoN) 2 * (1 - pnorm(abs(zscoN)))) + + ## p-value associated to the deciles of the profiles + + cat("\nObservations: Profile deciles\n", sep="") + + deciMN <- t(as.matrix(apply(datMN, + 1, + function(x) quantile(x, 0.1 * 1:9, na.rm = TRUE)))) + + deciZscoMN <- apply(deciMN, 2, zScoreF) + + deciZscoMaxVn <- apply(deciZscoMN, 1, function(rowVn) rowVn[which.max(abs(rowVn))]) + + samDF[, "decile_pval"] <- sapply(deciZscoMaxVn, function(zscoN) 2 * (1 - pnorm(abs(zscoN)))) + + + ##------------------------------ + ## Figure + ##------------------------------ + + cat("\nPlotting\n") + + if(!is.null(fig.pdfC)) { + pdf(fig.pdfC, width=11, height=7) + } else + dev.new(width=11, height=7) + + datPloF() + + if(!is.null(fig.pdfC)) + dev.off() + + + ##------------------------------ + ## End + ##------------------------------ + + + if(!is.null(log.txtC)) + sink() + + options(stingsAsFactors = strAsFacL) + options(warn = optWrnN) + + return(list(samDF=samDF, + varDF=varDF)) + + +} ## qualityMetricsF