# HG changeset patch # User workflow4metabolomics # Date 1652693101 0 # Node ID a4d89d47646f452b90ab2f14fbfd277397c665ba planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 8d2ca678d973501b60479a8dc3f212eecd56eab8 diff -r 000000000000 -r a4d89d47646f diagmfl.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/diagmfl.R Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,918 @@ +#' Calcul des grandeurs "diagnostiques" +#' +#' Script adapte de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonctionner +#' avec un modele lmer (et non lme), des sujets avec des identifiants non numeriques, +#' et des observations non ordonnees sujet par sujet (dernier point a verifier.) +#' +#' @detail Les graphiques, les calculs associés et les notations utilisees dans le script suivent +#' l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear +#' mixed model assumptions and some remedial measures, International Statistical Review +#' (doi:10.1111/insr.12178) +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @return A list +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export lmer.computeDiag + + + +lmer.computeDiag <- function(mfl) { + + ## Check arguments --------------------------------------------------------- + + if (length(mfl@flist) > 1) + stop("Several 'grouping level' for random effect not implemented yet.") + + + ## extracting information from mfl models ------------------------------------------------------------- + # data + df <- mfl@frame + responseC <- names(df)[1] + unitC <- names(mfl@flist)[1] + # observations + yVn <- df[, responseC] + nobsN <- length(yVn) + # units + idunitVc <- levels(mfl@flist[[1]]) + nunitN <- length(unique(idunitVc)) + #X + xMN <- mfl@pp$X + pN <- ncol(xMN) + #Z + zMN <- t(as.matrix(mfl@pp$Zt)) + # Estimated covariance matrix of random effects (Gam) + aux <- VarCorr(mfl)[[1]] ## assuming only one level of grouping + aux2 <- attr(aux, "stddev") + gMN <- attr(aux, "correlation") * (aux2 %*% t(aux2)) + gammaMN <- as.matrix(kronecker(diag(nunitN), gMN)) + q <- dim(gMN)[1] + # Estimated covariance matrix of conditonal error (homoskedastic conditional independance model) + sigsqN <- attr(VarCorr(mfl), "sc")^2 + rMN <- sigsqN * diag(nobsN) + # Estimated covariance matrix of Y + vMN <- (zMN %*% gammaMN %*% t(zMN)) + rMN + invvMN <- MASS::ginv(vMN) + # H and Q matrix + hMN <- MASS::ginv(t(xMN) %*% invvMN %*% xMN) + qMN <- invvMN - invvMN %*% xMN %*% (hMN) %*% t(xMN) %*% invvMN + # eblue and eblup + eblueVn <- mfl@beta + eblupVn <- gammaMN %*% t(zMN) %*% invvMN %*% (yVn - xMN %*% eblueVn) ## equivalent de ranef(mfl) + rownames(eblupVn) <- colnames(zMN) + ## Calculs of matrices and vectors used in graph diagnosics --------------------------------------------- + ## Marginal and individual predictions, residuals and variances + marpredVn <- xMN %*% eblueVn + marresVn <- yVn - marpredVn + marvarMN <- vMN - xMN %*% hMN %*% t(xMN) + condpredVn <- marpredVn + zMN %*% eblupVn + condresVn <- yVn - condpredVn + condvarMN <- rMN %*% qMN %*% rMN + ## Analysis of marginal and conditional residuals + stmarresVn <- stcondresVn <- rep(0, nobsN) + lesverVn <- rep(0, nunitN) + names(lesverVn) <- idunitVc + for (i in 1:nunitN) { + idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i + miN <- length(idxiVn) + ## standardization of marginal residual + stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn, idxiVn])) %*% marresVn[idxiVn]) + ##Standardized Lessafre and Verbeke's measure + auxMN <- diag(1, ncol = miN, nrow = miN) - stmarresVn[idxiVn] %*% t(stmarresVn[idxiVn]) + lesverVn[i] <- sum(diag(auxMN %*% t(auxMN))) + ## standardization of conditional residual + stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn, idxiVn])) %*% condresVn[idxiVn]) + } + lesverVn <- lesverVn / sum(lesverVn) + ## Least confounded conditional residuals + ## EBLUP analysis (Mahalanobis' distance) + varbMN <- gammaMN %*% t(zMN) %*% qMN %*% zMN %*% gammaMN + mdistVn <- rep(0, nunitN) + qm <- q - 1 + for (j in 1:nunitN) { + gbi <- varbMN[(q * j - qm):(q * j), (q * j - qm):(q * j)] + eblupi <- eblupVn[(q * j - qm):(q * j)] + mdistVn[j] <- t(eblupi) %*% ginv(gbi) %*% eblupi + } + names(mdistVn) <- levels(mfl@flist[[1]]) + ## output ---------------------------------------------- + return(list( + data = df, + q = q, + eblue = eblueVn, + eblup = eblupVn, + marginal.prediction = marpredVn, + conditional.prediction = condpredVn, + std.marginal.residuals = stmarresVn, + std.conditional.residuals = stcondresVn, + mahalanobis.distance = mdistVn, + std.mahalanobis.distance = mdistVn / sum(mdistVn), + std.lesaffreverbeke.measure = lesverVn + )) +} + + +#' Wrapper function for diagnostic plots of 'lmer' linear mixed models +#' +#' (W4M mixmod) +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @param title aa +#' @param outlier.limit aa +#' @param pvalCutof aa +#' @param resC aa +#' @param uniC aa +#' @param fixC aa +#' @param lest.confounded Not used yet. +#' @return NULL +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export diagmflF + + +diagmflF <- function(mfl, + title = "", + outlier.limit = 3, + pvalCutof = 0.05, + resC = "vd", + uniC = "subject", + timC = "time", + fixC = "fixfact", + least.confounded = FALSE) { + ## diagnostics + diagLs <- lmer.computeDiag(mfl) + ## plots + blank <- rectGrob(gp = gpar(col = "white")) + rectspacer <- rectGrob(height = unit(0.1, "npc"), gp = gpar(col = "grey")) + grid.arrange(blank, + plot_linearity(diagLs, hlimitN = outlier.limit, plotL = FALSE, + label_factor = c(uniC, fixC, timC)), + blank, + plot_conditionalResiduals(diagLs, hlimitN = outlier.limit, plotL = FALSE, + label_factor = c(uniC, fixC, timC)), + blank, + plot_condresQQplot(diagLs, plotL = FALSE), + blank, + plot_lesaffreVeerbeke(diagLs, plotL = FALSE), + blank, + plot_randomEffect(mfl, plotL = FALSE)[[1]], + blank, + plot_mahalanobisKhi2(diagLs, plotL = FALSE), + blank, + plot_mahalanobis(diagLs, plotL = FALSE), + blank, + blank, + blank, + top = textGrob(title, gp = gpar(fontsize = 40, font = 4)), + layout_matrix = matrix(c(rep(1, 7), + 2, 3, rep(4, 3), 20, 21, + rep(5, 7), + 6:12, + rep(13, 7), + 14:18, rep(19, 2)), + ncol = 7, nrow = 6, byrow = TRUE), + heights = c(0.1 / 3, 0.3, 0.1 / 3, 0.3, 0.1 / 3, 0.3), + widths = c(0.22, 0.04, 0.22, 0.04, 0.22, 0.04, 0.22)) +} + +####################################################################################################### +## Raw data time courses +####################################################################################################### + +#' Visualization of raw time course +#' +#' Une +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @param responseC Name of the 'response' variable +#' @param timeC Name of the 'time' variable +#' @param subjectC Name of the 'subject' variable +#' @param fixfactC Name of the 'fixed factor' variable (e.g.treatment) +#' @param offset_subject Boolean indicating if an offset value (subject's mean) should substracted to each data point. Default is FALSE +#' @param plotL Boolean +#' @param colorType One of NA, FIXFACT or SUBJECT +#' @param shapeType One of NA, FIXFACT or SUBJECT +#' @param lineType One of NA, FIXFACT or SUBJECT +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_timeCourse + + + +plot_timeCourse <- function(mfl, + responseC, + timeC, + subjectC, + fixfactC = NULL, + offset_subject = FALSE, + plotL = TRUE, + colorType = NA, ## subject, fixfact, none or NA + shapeType = NA, ## subject, fixfact, none or NA + lineType = NA ## subject, fixfact, none or NA +) { + ## Data ----- + if (class(mfl) %in% c("merModLmerTest", "lmerMod", "lmerModLmerTest")) { + DF <- mfl@frame + } else if (class(mfl) == "data.frame") { + DF <- mfl + } else { + stop("'mfl' argument must be a linear mixed effect model or a data frame.") + } + ## Format data ----- + if (is.null(fixfactC)) { + DF <- DF[, c(responseC, timeC, subjectC)] + colnames(DF) <- c("DV", "TIME", "SUBJECT") + meanDF <- aggregate(DF$DV, + by = list(SUBJECT = DF$SUBJECT, + TIME = DF$TIME), + FUN = mean, + na.rm = TRUE) + colnames(meanDF) <- c("SUBJECT", "TIME", "DV") + meanDF$GROUP <- meanDF$SUBJECT + } else{ + DF <- DF[, c(responseC, fixfactC, timeC, subjectC)] + colnames(DF) <- c("DV", "FIXFACT", "TIME", "SUBJECT") + meanDF <- aggregate(DF$DV, + by = list(SUBJECT = DF$SUBJECT, + TIME = DF$TIME, + FIXFACT = DF$FIXFACT), + FUN = mean, + na.rm = TRUE) + colnames(meanDF) <- c("SUBJECT", "TIME", "FIXFACT", "DV") + meanDF$GROUP <- paste(meanDF$SUBJECT, meanDF$FIXFACT, sep = "_") + } + ## Offset ----- + if (offset_subject) { + offsetMN <- aggregate(DF$DV, by = list(DF$SUBJECT), mean, na.rm = TRUE) + offsetVn <- offsetMN[, 2] + names(offsetVn) <- offsetMN[, 1] + rm(offsetMN) + DF$DV <- DF$DV - offsetVn[DF$SUBJECT] + meanDF$DV <- meanDF$DV - offsetVn[as.character(meanDF$SUBJECT)] + } + ## Graphical parameters ----- + xlabC <- timeC + ylabC <- responseC + titC <- "Individual time-courses" + if (offset_subject) { + ylabC <- paste(ylabC, "minus 'within-subject' empirical mean") + titC <- paste(titC, "('within-subject' empirical mean offset)") + } + ## color + if (is.na(colorType)) { ## automaticatical attribution + if (is.null(fixfactC)) { + colorType <- "SUBJECT" + } else { + colorType <- "FIXFACT" + } + colTxt <- paste(", colour=", colorType) + } else if (colorType == "none") { + colTxt <- "" + } else { + colTxt <- paste(", colour=", colorType) + } + ## lineType + if (is.na(lineType)) { ## automaticatical attribution + if (is.null(fixfactC)) { + linTxt <- "" + } else { + linTxt <- paste(", linetype=", + ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) + } + } else if (lineType == "none") { + linTxt <- "" + } else { + linTxt <- paste(", linetype=", lineType) + } + ## shapeType + if (is.na(shapeType)) { ## automaticatical attribution + if (is.null(fixfactC)) { + shaTxt <- "" + } else { + shaTxt <- paste(", shape=", + ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) + } + } else if (shapeType == "none") { + shaTxt <- "" + } else { + shaTxt <- paste(", shape=", shapeType) + } + ## aes mapping + txtMap <- paste("aes(x = TIME, y = DV", + colTxt, shaTxt, ")", sep = "") + txtLineMap <- paste("aes(x = TIME, y = DV, group = GROUP ", + colTxt, linTxt, ")", sep = "") + ## plot and output + p <- ggplot(data = DF, mapping = eval(parse(text = txtMap))) + + ggtitle(titC) + + xlab(xlabC) + ylab(ylabC) + + theme(legend.position = "left", + plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_point() + + geom_line(eval(parse(text = txtLineMap)), data = meanDF) + + theme_bw() + + NULL + if (plotL) plot(p) + invisible(p) +} + +####################################################################################################### +## Post-hoc estimate +####################################################################################################### +#' Visualization of fixed effects (post-hoc estimates) +#' +#' Description +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @param pvalCutof User pvalue cut of +#' @param plotL Boolean +#' @param titC Title of the plot +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_posthoc +plot_posthoc <- function(mfl, pvalCutof = 0.05, plotL = TRUE, titC = "Post-hoc estimates") { + ddlsm1 <- as.data.frame(difflsmeans(mfl, test.effs = NULL)) + colnames(ddlsm1)[ncol(ddlsm1)] <- "pvalue" + ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) + ## modif JF pour tenir compte du seuil de pvalues defini par le user + ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof)] <- paste("p-value < ", pvalCutof, sep = "") + ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 5)] <- paste("p-value < ", pvalCutof / 5, sep = "") + ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 10)] <- paste("p-value < ", pvalCutof / 10, sep = "") + ddlsm1$levels <- rownames(ddlsm1) + ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { + strsplit(namC, split = " ", fixed = TRUE)[[1]][1] + }) + colValue <- c("grey", "yellow", "orange", "red") + names(colValue) <- c("NS", + paste("p-value < ", pvalCutof, sep = ""), + paste("p-value < ", pvalCutof / 5, sep = ""), + paste("p-value < ", pvalCutof / 10, sep = "")) + p <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) + + facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + + geom_bar(aes(fill = Significance), stat = "identity") + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + scale_fill_manual(values = colValue) + + geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.25) + + ggtitle(titC) + xlab("") + + NULL + if (plotL) plot(p) + invisible(p) +} + +####################################################################################################### +## Visualisation des effets aléatoires +####################################################################################################### +#' Visualization of random effects +#' +#' Equivalent of dotplot(ranef) +#' +#' @param mfl A linear mixed model fitted via lmer or a data frame containing data +#' @param plotL Logical +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_randomEffect + + +plot_randomEffect <- function(mfl, plotL = TRUE) { + ## Estimation et format des effets aléatoires + randomEffect <- ranef(mfl, condVar = TRUE) + DF <- data.frame(randomEffect = rep(names(randomEffect), + times = sapply(seq_len(length(randomEffect)), + function(lsi) { + return(length(unlist(randomEffect[[lsi]])))}))) + DF$condVar <- DF$estimate <- DF$x2 <- DF$x1 <- rep(NA, nrow(DF)) + for (rafC in names(randomEffect)) { + eff <- randomEffect[[rafC]] + DF$x1[which(DF$randomEffect == rafC)] <- rep(colnames(eff), each = nrow(eff)) + DF$x2[which(DF$randomEffect == rafC)] <- rep(rownames(eff), ncol(eff)) + DF$estimate[which(DF$randomEffect == rafC)] <- unlist(eff) + condvar <- attr(randomEffect[[rafC]], "postVar") + se <- NULL + for (coli in seq_len(ncol(eff))) { + se <- c(se, + sapply(seq_len(nrow(eff)), + function(i) { + return(condvar[coli, coli, i])})) + } + DF$condVar[which(DF$randomEffect == rafC)] <- se + } + DF$se <- sqrt(DF$condVar) + DF$lower <- DF$estimate - 1.96 * DF$se + DF$upper <- DF$estimate + 1.96 * DF$se + ## Plot + plotLs <- vector("list", length(randomEffect)) + names(plotLs) <- names(randomEffect) + for (pi in seq_len(length(plotLs))) { + subDF <- DF[DF$randomEffect == names(plotLs)[pi], ] + subDF <- subDF[order(subDF$x1, subDF$estimate, decreasing = FALSE), ] + p <- ggplot(data = subDF, + mapping = aes(x = estimate, y = reorder(x2, estimate)) + ) + + geom_point(size = 3) + + geom_segment(aes(xend = lower, yend = x2)) + + geom_segment(aes(xend = upper, yend = x2)) + + facet_wrap(~x1, ncol = length(unique(subDF$x1))) + + ylab("") + xlab("") + + ggtitle(paste("Random effect - ", names(plotLs)[pi], sep = "")) + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_vline(xintercept = 0, linetype = "dashed") + + theme_bw() + plotLs[[pi]] <- p + if (plotL) plot(p) + } + invisible(plotLs) +} + +####################################################################################################### +## Linearité des effets et outlying observations +####################################################################################################### +#' Linarity of the fixed effect with regard to the continuous time +#' +#' @param diagLs diagnostic list +#' @param hlimitN Limit value for outliers (e.g.2 or 3) +#' @param plotL Boolean +#' @param label_factor Column of observation names used to label outlying values +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_linearity +#' + +plot_linearity <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { + df <- cbind.data.frame(diagLs$data, + marginal.prediction = diagLs$marginal.prediction, + standardized.marginal.residuals = diagLs$std.marginal.residuals) + # outlier annotation + df$outliers <- rep("", nrow(df)) + outidx <- which(abs(df$standardized.marginal.residuals) > hlimitN) + df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] + if (length(label_factor) >= 1) { + df[outidx, "outliers"] <- paste(df[outidx, "outliers"], + df[outidx, label_factor[1]], + sep = "_") + if (length(label_factor) > 1) { + for (li in 2:length(label_factor)) { + df[outidx, "outliers"] <- paste(df[outidx, "outliers"], + df[outidx, label_factor[li]], + sep = ".") + } + } + } + p <- ggplot(data = df, + aes(x = marginal.prediction, + y = standardized.marginal.residuals)) + + geom_point(size = 2) + + geom_hline(yintercept = 0, col = "grey") + + geom_smooth(aes(x = marginal.prediction, + y = standardized.marginal.residuals), data = df, se = FALSE, col = "blue", method = "loess") + + ggtitle("Linearity of effects/outlying obervations") + + xlab("Marginal predictions") + + ylab("Standardized marginal residuals") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + + geom_text(aes(label = outliers), hjust = 0, vjust = 0) + if (plotL) plot(p) + invisible(p) +} + + +####################################################################################################### +## EBLUP +####################################################################################################### + + +#' Mahalanobis distance +#' +#' @param diagLs diagnostic list +#' @param plotL Boolean +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_mahalanobis +#' + + +plot_mahalanobis <- function(diagLs, plotL = TRUE) { + unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), + mal = diagLs$std.mahalanobis.distance) + ## Outlying subjects + p <- + ggplot(aes(y = mal, x = unit), data = unitDf) + + geom_point(size = 3) + + ylab("Standardized Mahalanobis distance") + + geom_vline(xintercept = 0, linetype = "dashed") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_hline(yintercept = 2 * mean(unitDf$mal), linetype = "dashed") + + geom_text(aes(label = unit), + data = unitDf[unitDf$mal > 2 * mean(unitDf$mal), ], + hjust = 1, vjust = 0) + + ggtitle("Outlying unit") + + xlab("unit") + if (plotL) plot(p) + invisible(p) +} + + + + + + +#' Mahalanobis distance (Chi2) +#' +#' @param diagLs diagnostic list +#' @param plotL aa +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_mahalanobisKhi2 +#' + + +plot_mahalanobisKhi2 <- function(diagLs, plotL = TRUE) { + unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), + mal = diagLs$mahalanobis.distance) + p <- qqplotF(x = unitDf$mal, + distribution = "chisq", + df = diagLs$q, + line.estimate = NULL, + conf = 0.95) + + xlab("Chi-squared quantiles") + + ylab("Mahalanobis distance") + + ggtitle("Normality of random effect") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + if (plotL) plot(p) + invisible(p) +} + + + + + + + +####################################################################################################### +## Residus conditionels +####################################################################################################### + +## Presence of outlying observations and homoscedacity of residuals + +#' Homoskedacity of conditionalresiduals +#' +#' @param diagLs diagnostic list +#' @param hlimitN Limit value for outliers (e.g.2 or 3) +#' @param plotL Boolean +#' @param label_factor Column of observation names used to label outlying values +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_conditionalResiduals +#' + + +plot_conditionalResiduals <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { + df <- cbind.data.frame(diagLs$data, + conditional.prediction = diagLs$conditional.prediction, + standardized.conditional.residuals = diagLs$std.conditional.residuals) + # outlier annotation + df$outliers <- rep("", nrow(df)) + outidx <- which(abs(df$standardized.conditional.residuals) > hlimitN) + df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] + if (length(label_factor) >= 1) { + df[outidx, "outliers"] <- paste(df[outidx, "outliers"], + df[outidx, label_factor[1]], + sep = "_") + if (length(label_factor) > 1) { + for (li in 2:length(label_factor)) { + df[outidx, "outliers"] <- paste(df[outidx, "outliers"], + df[outidx, label_factor[li]], + sep = ".") + } + } + } + p <- ggplot(data = df, + aes(x = conditional.prediction, + y = standardized.conditional.residuals)) + + geom_point(size = 2) + + geom_hline(yintercept = 0, col = "grey") + + geom_smooth(aes(x = conditional.prediction, + y = standardized.conditional.residuals), + data = df, se = FALSE, col = "blue", method = "loess") + + ggtitle("Homoscedasticity of conditional residuals/outlying observations") + + xlab("Individual predictions") + + ylab("Standardized conditional residuals") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + + geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + + geom_text(aes(label = outliers), hjust = 0, vjust = 0) + if (plotL) plot(p) + invisible(p) +} + + + + +#' Normality of conditionalresiduals +#' +#' @param diagLs diagnostic list +#' @param plotL aa +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_condresQQplot +#' + + +plot_condresQQplot <- function(diagLs, plotL = TRUE) { + df <- cbind.data.frame(diagLs$data, + conditional.prediction = diagLs$conditional.prediction, + standardized.conditional.residuals = diagLs$std.conditional.residuals) + p <- qqplotF(x = df$standardized.conditional.residuals, + distribution = "norm", + line.estimate = NULL, + conf = 0.95) + + xlab("Standard normal quantiles") + + ylab("Standardized conditional residual quantiles") + + ggtitle("Normality of conditional error") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + if (plotL) plot(p) + invisible(p) +} + + + + + +####################################################################################################### +## Within-units covariance structure +####################################################################################################### + + +#' Lesaffre-Veerbeke measure +#' +#' @param diagLs diagnostic list +#' @param plotL aa +#' @return A plot +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export plot_lesaffreVeerbeke +#' + + +plot_lesaffreVeerbeke <- function(diagLs, plotL = TRUE) { + unitDf <- data.frame(unit = names(diagLs$std.lesaffreverbeke.measure), + lvm = diagLs$std.lesaffreverbeke.measure) + p <- ggplot(data = unitDf, + aes(x = unit, + y = lvm)) + + geom_point(size = 2) + + theme(legend.position = "none") + + xlab("units") + + ylab("Standardized Lesaffre-Verbeke measure") + + geom_hline(yintercept = 2 * mean(unitDf$lvm), linetype = "dashed") + + geom_text(aes(label = unit), + data = unitDf[unitDf$lvm > 2 * mean(unitDf$lvm), ], + hjust = 0, vjust = 0) + + ggtitle("Within-units covariance matrice") + + theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + if (plotL) plot(p) + invisible(p) +} + +##-------------------------------------------------------------------------------------------------## +## Helpers +##-------------------------------------------------------------------------------------------------## + + +## square root of a matrix +## From Rocha, Singer and Nobre + +#' square root of a matrix (Rocha) +#' +#' Description +#' +#' @param mat Matrix +#' @return A list +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export sqrt.matrix + +sqrt.matrix <- function(mat) { + mat <- as.matrix(mat) # new line of code + singular_dec <- svd(mat, LINPACK = F) + U <- singular_dec$u + V <- singular_dec$v + D <- diag(singular_dec$d) + sqrtmatrix <- U %*% sqrt(D) %*% t(V) +} + + +## square root of a matrix +## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6) +## (for matMN a n x n matrix that symetric and non-negative definite) + +#' square root of a matrix (Rocha) +#' +#' @param mat Matrix +#' @return A list +#' @author Natacha Lenuzza +#' @examples +#' print("hello !") +#' +#' @export sqrtmF + +sqrtmF <- function(matMN) { + matMN <- as.matrix(matMN) + ## check that matMN is symetric: if (!all(t(matMN == matMN))) stop("matMN must be symetric.") + svd_dec <- svd(matMN) + invisible(svd_dec$u %*% sqrt(diag(svd_dec$d)) %*% t(svd_dec$v)) +} + + +## qqplotF +## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c + +qqplotF <- function(x, + distribution = "norm", ..., + line.estimate = NULL, + conf = 0.95, + labels = names(x)) { + q.function <- eval(parse(text = paste0("q", distribution))) + d.function <- eval(parse(text = paste0("d", distribution))) + x <- na.omit(x) + ord <- order(x) + n <- length(x) + P <- ppoints(length(x)) + daf <- data.frame(ord.x = x[ord], z = q.function(P, ...)) + if (is.null(line.estimate)) { + Q.x <- quantile(daf$ord.x, c(0.25, 0.75)) + Q.z <- q.function(c(0.25, 0.75), ...) + b <- diff(Q.x) / diff(Q.z) + coef <- c(Q.x[1] - b * Q.z[1], b) + } else { + coef <- coef(line.estimate(ord.x ~ z)) + } + zz <- qnorm(1 - (1 - conf) / 2) + SE <- (coef[2] / d.function(daf$z, ...)) * sqrt(P * (1 - P) / n) + fit.value <- coef[1] + coef[2] * daf$z + daf$upper <- fit.value + zz * SE + daf$lower <- fit.value - zz * SE + if (!is.null(labels)) { + daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord], "") + } + p <- ggplot(daf, aes(x = z, y = ord.x)) + + geom_point() + + geom_abline(intercept = coef[1], slope = coef[2], col = "red") + + geom_line(aes(x = z, y = lower), daf, col = "red", linetype = "dashed") + + geom_line(aes(x = z, y = upper), daf, col = "red", linetype = "dashed") + + xlab("") + ylab("") + if (!is.null(labels)) p <- p + geom_text(aes(label = label)) + return(p) +} + + +## histogramm +histF <- function(x, sd_x = NULL, breaks = "scott") { + if (is.null(sd_x)) + sd_x <- sd(x) + ## Bandwith estimation (default is Scott) + if (!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd")) + breaks <- "scott" + if (breaks %in% c("sqrt", "sturges", "rice")) { + k <- switch(breaks, + sqrt = sqrt(length(x)), + sturges = floor(log2(x)) + 1, + rice = floor(2 * length(x) ^ (1 / 3)) + ) + bw <- diff(range(x)) / k + }else{ + bw <- switch(breaks, + scott = 3.5 * sd_x / length(x) ^ (1 / 3), + fd = diff(range(x)) / (2 * IQR(x) / length(x) ^ (1 / 3)) + ) + } + + + daf <- data.frame(x = x) + ## graph + return(ggplot(data = daf, aes(x)) + + geom_histogram(aes(y = ..density..), + col = "black", fill = "grey", binwidth = bw) + + geom_density(size = 1.2, + col = "blue", + linetype = "blank", + fill = rgb(0, 0, 1, 0.1)) + + stat_function(fun = dnorm, + args = list(mean = 0, sd = sd_x), + col = "blue", size = 1.2) + + theme(legend.position = "none") + + xlab("")) +} + + +plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) { + + ## define subscript of the different columns depending if we have only time (ncol(df)=3) or not + if (ncol(df) > 3) { + varidx <- 4 + ffidx <- 1 + timidx <- 2 + individx <- 3 + } else { + varidx <- 3 + ffidx <- 1 + timidx <- 1 + individx <- 2 + } + nameVar <- colnames(df)[varidx] + fflab <- colnames(df)[ffidx] + ## Individual time-course + rawPlot <- + ggplot(data = df, aes(x = df[[timidx]], y = df[[varidx]], colour = df[[ffidx]], group = df[[individx]])) + + geom_point() + + geom_line() + ggtitle("Individual time-courses (raw data)") + + ylab(nameVar) + + xlab(label = colnames(df)[2]) + + theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + ## Boxplot of fixed factor + bPlot <- + ggplot(data = df, aes(y = df[[varidx]], x = df[[ffidx]], color = df[[ffidx]])) + + geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 4) + + ggtitle(paste("Boxplot by ", fflab, sep = "")) + xlab("") + ylab("") + + theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold")) + ## Post-hoc estimates + ddlsm1 <- mfl + ddlsm1$name <- rownames(ddlsm1) + ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) + ## modif JF pour tenir compte du seuil de pvalues defini par le user + options("scipen" = 100, "digits" = 5) + pvalCutof <- as.numeric(pvalCutof) + bs <- 0.05; bm <- 0.01; bi <- 0.005 + if (pvalCutof > bm) { + bs <- pvalCutof + } else + if (pvalCutof < bm & pvalCutof > bi) { + bm <- pvalCutof; bs <- pvalCutof + } else + if (pvalCutof < bi) { + bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof + } + lbs <- paste("p-value < ", bs, sep = "") + lbm <- paste("p-value < ", bm, sep = "") + lbi <- paste("p-value < ", bi, sep = "") + cols <- paste("p-value < ", bs, sep = "") + colm <- paste("p-value < ", bm, sep = "") + coli <- paste("p-value < ", bi, sep = "") + valcol <- c("grey", "yellow", "orange", "red") + names(valcol) <- c("NS", lbs, lbm, lbi) + ddlsm1$Significance[which(ddlsm1$p.value <= bs)] <- lbs + ddlsm1$Significance[which(ddlsm1$p.value < bs & ddlsm1$p.value >= bm)] <- lbm + ddlsm1$Significance[which(ddlsm1$p.value < bi)] <- lbi + ddlsm1$levels <- rownames(ddlsm1) + ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { + strsplit(namC, split = " ", fixed = TRUE)[[1]][1] + }) + + phPlot <- + ggplot(ddlsm1, aes(x = levels, y = Estimate)) + + facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + + geom_bar(aes(fill = Significance), stat = "identity") + + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + + scale_fill_manual( + values = valcol) + + geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI), width = 0.25) + + ggtitle("Post-hoc estimates ") + xlab("") + + theme(plot.title = element_text(size = rel(1.2), face = "bold")) + + ## Final plotting + grid.arrange(arrangeGrob(rawPlot, bPlot, ncol = 2), + phPlot, nrow = 2, + top = textGrob(title, gp = gpar(fontsize = 32, font = 4)) + ) + +} diff -r 000000000000 -r a4d89d47646f mixmodel.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel.xml Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,264 @@ + + ANOVA for repeated measures statistics + + + r-lme4 + r-lmertest + r-ggplot2 + r-gridExtra + bioconductor-multtest + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diaR == 'yes' + + + + + + + + + + + + + + + + + + + + + + + + + 10.18637/jss.v082.i13. + @ARTICLE{fisher, + author = {Benjamini Y. and Hochberg Y., + title = {Controlling the false discovery rate: a practical and powerful approach for multiple testing. Journal of the Royal Statistical Society}, + journal = {Series B (Methodological)}, + year = {1995}, + volume = {57}, + pages = {289-300} + } + 10.1093/bioinformatics/btu813 + + + diff -r 000000000000 -r a4d89d47646f mixmodel_script.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel_script.R Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,481 @@ +####### R functions to perform linear mixed model for repeated measures +####### on a multi var dataset using 3 files as used in W4M +############################################################################################################## +lmRepeated2FF <- function(ids, ifixfact, itime, isubject, ivd, ndim, nameVar=colnames(ids)[[ivd]], + pvalCutof = 0.05, dffOption, visu = visu, tit = "", least.confounded = FALSE, outlier.limit = 3) { + ### function to perform linear mixed model with 1 Fixed factor + Time + random factor subject + ### based on lmerTest package providing functions giving the same results as SAS proc mixed + options(scipen = 50, digits = 5) + + if (!is.numeric(ids[[ivd]])) stop("Dependant variable is not numeric") + if (!is.factor(ids[[ifixfact]])) stop("fixed factor is not a factor") + if (!is.factor(ids[[itime]])) stop("Repeated factor is not a factor") + if (!is.factor(ids[[isubject]])) stop("Random factor is not a factor") + + ## factors + time <- ids[[itime]] + fixfact <- ids[[ifixfact]] + subject <- ids[[isubject]] + # dependant variables + vd <- ids[[ivd]] + + # argument of the function instead of re re-running ndim <- defColRes(ids, ifixfact, itime) + # nfp : number of main factors + model infos (REML, varSubject) + normality test + nfp <- ndim[1]; + # ncff number of comparison of the fixed factor + nlff <- ndim[2]; ncff <- ndim[3] + # nct number of comparison of the time factor + nlt <- ndim[4]; nct <- ndim[5] + # nci number of comparison of the interaction + nli <- ndim[6]; nci <- ndim[7] + # number of all lmer results + nresT <- ncff + nct + nci + ## initialization of the result vector (1 line) + ## 4 * because nresf for : pvalues + Etimates + lower CI + Upper CI + res <- data.frame(array(rep(NA, (nfp + 4 * nresT)))) + colnames(res)[1] <- "resultLM" + + ### if at least one subject have data for only 1 time, mixed model is not possible and variable must be skip + ### after excluding NA, table function is used to seek subjects with only 1 data + ids <- ids[!is.na(ids[[ivd]]), ] + skip <- length(which(table(ids[[isubject]]) == 1)) + + if (skip == 0) { + + mfl <- lmer(vd ~ time + fixfact + time:fixfact + (1 | subject), ids) # lmer remix + + rsum <- summary(mfl, ddf = dffOption) + ## test Shapiro Wilks on the residus of the model + rShapiro <- shapiro.test(rsum$residuals) + raov <- anova(mfl, ddf = dffOption) + dlsm1 <- data.frame(difflsmeans(mfl, test.effs = NULL)) + ddlsm1 <- dlsm1 + ## save rownames and factor names + rn <- rownames(ddlsm1) + fn <- ddlsm1[, c(1, 2)] + ## writing the results on a single line + namesFactEstim <- paste("estimate ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + namesFactPval <- paste("pvalue ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + namesInter <- rownames(ddlsm1)[-c(1:(nct + ncff))] + namesEstimate <- paste("estimate ", namesInter) + namespvalues <- paste("pvalue ", namesInter) + namesFactprinc <- c("pval_time", "pval_trt", "pval_inter") + namesFactEstim <- paste("estimate ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + + namesFactLowerCI <- paste("lowerCI ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + namesLowerCI <- paste("lowerCI ", namesInter, sep = "") + + namesFactUpperCI <- paste("UpperCI ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "") + namesUpperCI <- paste("UpperCI ", namesInter, sep = "") + + + ### lmer results on 1 vector row + # pvalue of shapiro Wilks test of the residuals + res[1, ] <- rShapiro$p.value; rownames(res)[1] <- "Shapiro.pvalue.residuals" + res[2, ] <- rsum$varcor$subject[1] ;rownames(res)[2] <- "Subject.Variance" + res[3, ] <- rsum$devcomp$cmp[7] ; rownames(res)[3] <- "REML" + ### 3 principal factors pvalues results + shapiro test => nfp <- 4 + res[c((nfp - 2):nfp), ] <- raov[, 6] + rownames(res)[c((nfp - 2):nfp)] <- namesFactprinc + + #################### Residuals diagnostics for significants variables ######################### + ### Il at least 1 factor is significant and visu=TRUE NL graphics add to pdf + ## ajout JF du passage de la valeur de p-value cutoff + if (length(which(raov[, 6] <= pvalCutof)) > 0 & visu == "yes") { + diagmflF(mfl, title = tit, pvalCutof = pvalCutof, least.confounded = least.confounded, + outlier.limit = outlier.limit) + } + + # pvalue of fixed factor comparisons + nresf <- nresT + res[(nfp + 1):(nfp + nct), ] <- ddlsm1[c(1:nct), 9] + res[(nfp + nct + 1):(nfp + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 9] + rownames(res)[(nfp + 1):(nfp + nct + ncff)] <- namesFactPval + res[(nfp + nct + ncff + 1):(nfp + nresf), ] <- ddlsm1[(nct + ncff + 1):(nresT), 9] + rownames(res)[(nfp + nct + ncff + 1):(nfp + nresT)] <- namespvalues + # Estimate of the difference between levels of factors + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 3] + res[(nfp + nresf + nct + 1):(nfp + nresf + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 3] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct + ncff)] <- namesFactEstim + res[(nfp + nresf + nct + ncff + 1):(nfp + 2 * nresf), ] <- ddlsm1[(nct + ncff + 1):(nresT), 3] + rownames(res)[(nfp + nresf + nct + ncff + 1):(nfp + 2 * nresf)] <- namesEstimate + # lower CI of the difference between levels of factors + nresf <- nresf + nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 7] + res[(nfp + nresf + nct + 1):(nfp + nresf + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 7] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct + ncff)] <- namesFactLowerCI + res[(nfp + nresf + nct + ncff + 1):(nfp + 2 * nresf), ] <- ddlsm1[(nct + ncff + 1):(nresf), 7] + rownames(res)[(nfp + nresf + nct + ncff + 1):(nfp + nresf + (nresf / 2))] <- namesLowerCI + # Upper CI of the difference between levels of factors + nresf <- nresf + nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 8] + res[(nfp + nresf + nct + 1):(nfp + nresf + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 8] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct + ncff)] <- namesFactUpperCI + res[(nfp + nresf + nct + ncff + 1):(nfp + nresf + (nresT)), ] <- ddlsm1[(nct + ncff + 1):(nresT), 8] + rownames(res)[(nfp + nresf + nct + ncff + 1):(nfp + nresf + (nresT))] <- namesUpperCI + + } else { + ## one of the subject has only one time, subject can't be a random variable + ## A repeated measure could be run instead function lme of package nlme, in next version? + res[1, ] <- NA + cat("\n Computing impossible for feature ", tit, ": at least one subject has only one time.\n") + } + tres <- data.frame(t(res)) + rownames(tres)[1] <- nameVar + cres <- list(tres, rn, fn) + return(cres) +} + +############################################################################################################## +lmRepeated1FF <- function(ids, ifixfact = 0, itime, isubject, ivd, ndim, nameVar = colnames(ids)[[ivd]], + dffOption, pvalCutof = 0.05) { + ### function to perform linear mixed model with factor Time + random factor subject + ### based on lmerTest package providing functions giving the same results as SAS proc mixed + + + if (!is.numeric(ids[[ivd]])) stop("Dependant variable is not numeric") + if (!is.factor(ids[[itime]])) stop("Repeated factor is not a factor") + if (!is.factor(ids[[isubject]])) stop("Random factor is not a factor") + # Could be interesting here to add an experience plan check to give back a specific error message + # in case time points are missing for some individuals + + time <- ids[[itime]] + subject <- ids[[isubject]] + vd <- ids[[ivd]] ## dependant variables (quatitative) + + # nfp : nombre de facteurs principaux + model infos + normality test + nfp <- ndim[1] + # nlt number of time levels; nct number of comparisons of the time factor + nlt <- ndim[4] + nct <- ndim[5] + # number of all lmer results + nresT <- nct + ## initialization of the result vector (1 line) + res <- data.frame(array(rep(NA, (nfp + 4 * nresT)))) + colnames(res)[1] <- "resultLM" + + ### if at least one subject have data for only 1 time, mixed model is not possible and variable must be skip + ### after excluding NA, table function is used to seek subjects with only 1 data + ids <- ids[!is.na(ids[[ivd]]), ] + skip <- length(which(table(ids[[isubject]]) == 1)) + + if (skip == 0) { + + mfl <- lmer(vd ~ time + (1 | subject), ids) # lmer remix + rsum <- summary(mfl, ddf = dffOption) + ## test Shapiro Wilks on the residus of the model + rShapiro <- shapiro.test(rsum$residuals) + raov <- anova(mfl, ddf = dffOption) + ## Sum of square : aov$'Sum Sq', Mean square : aov$`Mean Sq`, proba : aov$`Pr(>F)` + + ## Test of all differences estimates between levels as SAS proc mixed. + ## results are in diffs.lsmeans.table dataframe + ## test.effs=NULL perform all pairs comparisons including interaction effect + dlsm1 <- data.frame(difflsmeans(mfl, test.effs = NULL)) + ddlsm1 <- dlsm1 + ## save rownames and factor names + rn <- rownames(ddlsm1) + fn <- ddlsm1[, c(1, 2)] + ## writing the results on a single line + namesFactEstim <- paste("estimate ", rownames(ddlsm1)[c(1:(nct))], sep = "") + namesFactPval <- paste("pvalue ", rownames(ddlsm1)[c(1:(nct))], sep = "") + namesFactprinc <- "pval_time" + namesLowerCI <- paste("lowerCI ", rownames(ddlsm1)[c(1:(nct))], sep = "") + namesUpperCI <- paste("upperCI ", rownames(ddlsm1)[c(1:(nct))], sep = "") + + ### lmer results on 1 vector + # pvalue of shapiro Wilks test of the residuals + res[1, ] <- rShapiro$p.value; rownames(res)[1] <- "Shapiro.pvalue.residuals" + res[2, ] <- rsum$varcor$subject[1] ;rownames(res)[2] <- "Subject.Variance" + res[3, ] <- rsum$devcomp$cmp[7] ; rownames(res)[3] <- "REML" + + ### factor time pvalue results + shapiro test + res[nfp, ] <- raov[, 6] + rownames(res)[nfp] <- namesFactprinc + + # pvalues of time factor levels comparisons + res[(nfp + 1):(nfp + nct), ] <- ddlsm1[c(1:nct), 9] + rownames(res)[(nfp + 1):(nfp + nct)] <- namesFactPval + + # Estimates of time factor levels + nresf <- nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 3] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct)] <- namesFactEstim + + # Lower CI of the difference between levels of factors + # nresf is incremeted + nresf <- nresf + nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 7] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct)] <- namesLowerCI + # Lower CI of the difference between levels of factors + # nresf is incremeted + nresf <- nresf + nresT + res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 8] + rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct)] <- namesUpperCI + + + } else { + ## one of the subject has only one time, subject can't be a random variable + ## A repeated measure could be run instead function lme of package nlme, next version + res[1, ] <- NA + cat("\n Computing impossible for feature ", colnames(ids)[4], ": at least one subject has only one time.\n") + } + tres <- data.frame(t(res)) + rownames(tres)[1] <- nameVar + cres <- list(tres, rn, fn) + return(cres) + +} + +############################################################################################################## +defColRes <- function(ids, ifixfact, itime) { + ## define the size of the result file depending on the numbers of levels of the fixed and time factor. + ## Numbers of levels define the numbers of comparisons with pvalue and estimate of the difference. + ## The result file also contains the pvalue of the fixed factor, time factor and interaction + ## plus Shapiro normality test. This is define by nfp + ## subscript of fixed factor=0 means no other fixed factor than "time" + if (ifixfact > 0) { + nfp <- 6 # shapiro + subject variance + REML + time + fixfact + interaction + time <- ids[[itime]] + fixfact <- ids[[ifixfact]] + + cat("\n Levels fixfact: ", levels(fixfact)) + cat("\n Levels time: ", levels(time)) + + # ncff number of comparisons of the fixed factor (nlff number of levels of fixed factor) + nlff <- length(levels(fixfact)) + ncff <- (nlff * (nlff - 1)) / 2 + # nct number of comparison of the time factor (nlt number of levels of time factor) + nlt <- length(levels(time)) + nct <- (nlt * (nlt - 1)) / 2 + # nci number of comparison of the interaction + nli <- nlff * nlt + nci <- (nli * (nli - 1)) / 2 + ndim <- c(NA, NA, NA, NA, NA, NA, NA) + + ndim[1] <- nfp # pvalues of fixed factor, time factor and interaction (3columns) and shapiro test pvalue + ndim[2] <- nlff # number of levels of fixed factor + ndim[3] <- ncff # number of comparisons (2by2) of the fixed factor + ndim[4] <- nlt # number of levels of time factor + ndim[5] <- nct # number of comparisons (2by2) of the time factor + ndim[6] <- nli # number of levels of interaction + ndim[7] <- nci # number of comparisons (2by2) of the interaction + + } + else { + nfp <- 4 # Mandatory columns: shapiro + subject variance + REML + time + time <- ids[[itime]] + # nct number of comparison of the time factor + nlt <- length(levels(time)) + nct <- (nlt * (nlt - 1)) / 2 + ndim <- c(NA, NA, NA, NA, NA, NA, NA) + + ndim[1] <- nfp # pvalues of shapiro test + subject variance + REML + time factor + ndim[4] <- nlt # number of levels of time factor + ndim[5] <- nct # number of comparisons (2by2) of the time factor + } + return(ndim) +} + +############################################################################################################## +lmixedm <- function(datMN, + samDF, + varDF, + fixfact, time, subject, + logtr = "none", + pvalCutof = 0.05, + pvalcorMeth = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7], + dffOption, + visu = "no", + least.confounded = FALSE, + outlier.limit = 3, + pdfC, + pdfE + ) { + sampids <- samDF + dataMatrix <- datMN + varids <- varDF + + options("scipen" = 50, "digits" = 5) + pvalCutof <- as.numeric(pvalCutof) + + cat("\n dff computation method=", dffOption) + ### Function running lmer function on a set of variables described in + ### 3 different dataframes as used by W4M + ### results are merge with the metadata variables varids + ### ifixfact, itime, isubject are subscripts of the dependant variables. if only time factor the ifixfat is set to 0 and no diag is performed (visu="no") + if (fixfact == "none") { + ifixfact <- 0 ; visu <- "no" + } else ifixfact <- which(colnames(sampids) == fixfact) + itime <- which(colnames(sampids) == time) + isubject <- which(colnames(sampids) == subject) + + lmmds <- dataMatrix + if (logtr != "log10" & logtr != "log2") logtr <- "none" + if (logtr == "log10") lmmds <- log10(lmmds + 1) + if (logtr == "log2") lmmds <- log2(lmmds + 1) + + dslm <- cbind(sampids, lmmds) + + nvar <- ncol(lmmds) + firstvar <- ncol(sampids) + 1 + lastvar <- firstvar + ncol(lmmds) - 1 + + if (ifixfact > 0) dslm[[ifixfact]] <- factor(dslm[[ifixfact]]) + dslm[[itime]] <- factor(dslm[[itime]]) + dslm[[isubject]] <- factor(dslm[[isubject]]) + ## call defColres to define the numbers of test and so the number of columns of results + ## depends on whether or not there is a fixed factor with time. If only time factor ifixfact=0 + if (ifixfact > 0) { + ndim <- defColRes(dslm[, c(ifixfact, itime)], ifixfact = 1, itime = 2) + nColRes <- ndim[1] + (4 * (ndim[3] + ndim[5] + ndim[7])) + firstpval <- ndim[1] - 2 + lastpval <- ndim[1] + ndim[3] + ndim[5] + ndim[7] + } else { + ndim <- defColRes(dslm[, itime], ifixfact = 0, itime = 1) + nColRes <- ndim[1] + (4 * (ndim[5])) + firstpval <- ndim[1] + lastpval <- ndim[1] + ndim[5] + } + ## initialisation of the result file + resLM <- data.frame(array(rep(NA, nvar * nColRes), dim = c(nvar, nColRes))) + rownames(resLM) <- rownames(varids) + + ## PDF initialisation + if (visu == "yes") { + pdf(pdfC, onefile = TRUE, height = 15, width = 30) + par(mfrow = c(1, 3)) + } + + + for (i in firstvar:lastvar) { + + subds <- dslm[, c(ifixfact, itime, isubject, i)] + + tryCatch({ + if (ifixfact > 0) + reslmer <- lmRepeated2FF(subds, ifixfact = 1, itime = 2, isubject = 3, ivd = 4, ndim = ndim, visu = visu, + tit = colnames(dslm)[i], pvalCutof = pvalCutof, + dffOption = dffOption, least.confounded = least.confounded, + outlier.limit = outlier.limit) + else + reslmer <- lmRepeated1FF(subds, ifixfact = 0, itime = 1, isubject = 2, ivd = 3, ndim = ndim, + pvalCutof = pvalCutof, dffOption = dffOption) + + resLM[i - firstvar + 1, ] <- reslmer[[1]] + }, error = function(e) { + cat("\nERROR with ", rownames(resLM)[i - firstvar + 1], ": ", conditionMessage(e), "\n") + } + ) + + } + + if (exists("reslmer")) { + colnames(resLM) <- colnames(reslmer[[1]]) + labelRow <- reslmer[[2]] + factorRow <- reslmer[[3]] + } else { + stop("\n- - - - -\nModel computation impossible for every single variables in the dataset: no result returned.\n- - - - -\n") + } + + if (visu == "yes") dev.off() + + + ## pvalue correction with p.adjust library multtest + ## Possible methods of pvalue correction + AdjustMeth <- c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none") + if (length(which(pvalcorMeth == AdjustMeth)) == 0) pvalcorMeth <- "none" + + if (pvalcorMeth != "none") { + for (k in firstpval:lastpval) { + resLM[[k]] <- p.adjust(resLM[[k]], method = pvalcorMeth, n = dim(resLM[k])[[1]]) + + } + } + + ## for each variables, set pvalues to NA and estimates = 0 when pvalue of factor > pvalCutof value define by user + if (ifixfact > 0) { + ## time effect + resLM[which(resLM[, firstpval] > pvalCutof), c((lastpval + 1):(lastpval + ndim[5]))] <- 0 + resLM[which(resLM[, firstpval] > pvalCutof), c((ndim[1] + 1):(ndim[1] + ndim[5]))] <- NA + ## treatment effect + resLM[which(resLM[, firstpval + 1] > pvalCutof), c((lastpval + ndim[5] + 1):(lastpval + ndim[5] + ndim[3]))] <- 0 + resLM[which(resLM[, firstpval + 1] > pvalCutof), c((ndim[1] + ndim[5] + 1):(ndim[1] + ndim[5] + ndim[3]))] <- NA + ## interaction effect + resLM[which(resLM[, firstpval + 2] > pvalCutof), c((lastpval + ndim[5] + ndim[3] + 1):(lastpval + ndim[5] + ndim[3] + ndim[7]))] <- 0 + resLM[which(resLM[, firstpval + 2] > pvalCutof), c((ndim[1] + ndim[5] + ndim[3] + 1):(ndim[1] + ndim[5] + ndim[3] + ndim[7]))] <- NA + } else { + ## time effect only + resLM[which(resLM[, firstpval] > pvalCutof), c((lastpval + 1):(lastpval + ndim[5]))] <- 0 + resLM[which(resLM[, firstpval] > pvalCutof), c((firstpval + 1):(firstpval + ndim[5]))] <- NA + } + + ## for each variable, estimates plots are performed if at least one factor is significant after p-value correction + pdf(pdfE, onefile = TRUE, height = 15, width = 30) + + ## for each variable (in row) + for (i in seq_len(nrow(resLM))) { + + ## if any fixed factor + time factor + if (ifixfact > 0) + + ## if any main factor after p-value correction is significant -> plot estimates and time course + if (length(which(resLM[i, c(4:6)] < pvalCutof)) > 0) { + + ## Plot of time course by fixfact : data prep with factors and quantitative var to be plot + subv <- dslm[, colnames(dslm) == rownames(resLM)[i]] + subds <- data.frame(dslm[[ifixfact]], dslm[[itime]], dslm[[isubject]], subv) + libvar <- c(fixfact, time, subject) + colnames(subds) <- c(libvar, rownames(resLM)[i]) + + ## Plot of estimates with error bars for all fixed factors and interaction + rddlsm1 <- t(resLM[i, ]) + pval <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "pvalue"] + esti <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "estima"] + loci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "lowerC"] + upci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "UpperC"] + rddlsm1 <- data.frame(pval, esti, loci, upci, factorRow) + colnames(rddlsm1) <- c("p.value", "Estimate", "Lower.CI", "Upper.CI", colnames(factorRow)) + rownames(rddlsm1) <- labelRow + + ## function for plotting these 2 graphs + plot.res.Lmixed(rddlsm1, subds, title = rownames(resLM)[i], pvalCutof = pvalCutof) + + } + + ## if only a time factor + if (ifixfact == 0) + + ## if time factor after p-value correction is significant -> plot time course + if (length(which(resLM[i, 4] < pvalCutof)) > 0) { + + ## Plot of time course : data prep with factors and quantitative var to be plot + subv <- dslm[, colnames(dslm) == rownames(resLM)[i]] + subds <- data.frame(dslm[[itime]], dslm[[isubject]], subv) + libvar <- c(time, subject) + colnames(subds) <- c(libvar, rownames(resLM)[i]) + + ## Plot of estimates with error bars for all fixed factors and interaction + rddlsm1 <- t(resLM[i, ]) + pval <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "pvalue"] + esti <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "estima"] + loci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "lowerC"] + upci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "upperC"] + rddlsm1 <- data.frame(pval, esti, loci, upci, factorRow) + colnames(rddlsm1) <- c("p.value", "Estimate", "Lower.CI", "Upper.CI", colnames(factorRow)) + rownames(rddlsm1) <- labelRow + + ## function for plotting these 2 graphs + plot.res.Lmixed(rddlsm1, subds, title = rownames(resLM)[i], pvalCutof = pvalCutof) + + } + + } + dev.off() + + ## return result file with pvalues and estimates (exclude confidence interval used for plotting) + iCI <- which(substr(colnames(resLM), 4, 7) == "erCI") + resLM <- resLM[, - iCI] + resLM <- cbind(varids, resLM) + return(resLM) +} diff -r 000000000000 -r a4d89d47646f mixmodel_wrapper.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mixmodel_wrapper.R Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,200 @@ +#!/usr/bin/env Rscript + +library(lme4) ## mixed model computing +library(Matrix) +library(MASS) +library(lmerTest) ## computing pvalue and lsmeans from results of lme4 package +library(multtest) ## multiple testing +library(ggplot2) +library(gridExtra) +library(grid) + +source_local <- function(fname) { + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + source(paste(base_dir, fname, sep = "/")) +} + +source_local("mixmodel_script.R") +source_local("diagmfl.R") + +parse_args <- function() { + args <- commandArgs() + start <- which(args == "--args")[1] + 1 + if (is.na(start)) { + return(list()) + } + seq_by2 <- seq(start, length(args), by = 2) + result <- as.list(args[seq_by2 + 1]) + names(result) <- args[seq_by2] + return(result) +} + +argVc <- unlist(parse_args()) + +##------------------------------ +## Initializing +##------------------------------ + +## options +##-------- + +strAsFacL <- options()$stringsAsFactors +options(stringsAsFactors = FALSE) + +## constants +##---------- + +modNamC <- "mixmodel" ## module name + +topEnvC <- environment() +flagC <- "\n" + +## functions +##---------- + +flgF <- function(tesC, + envC = topEnvC, + txtC = NA) { ## management of warning and error messages + + tesL <- eval(parse(text = tesC), envir = envC) + + if (!tesL) { + + sink(NULL) + stpTxtC <- ifelse(is.na(txtC), + paste0(tesC, " is FALSE"), + txtC) + + stop(stpTxtC, + call. = FALSE) + + } + +} ## flgF + +## log file +##--------- + +sink(argVc["information"]) + +cat("\nStart of the '", modNamC, "' Galaxy module call: ", format(Sys.time(), "%a %d %b %Y %X"), "\n", sep = "") +cat("\nParameters used:\n\n") +print(argVc) +cat("\n\n") + +## loading +##-------- + +datMN <- t(as.matrix(read.table(argVc["dataMatrix_in"], + check.names = FALSE, + header = TRUE, + row.names = 1, + sep = "\t"))) + +samDF <- read.table(argVc["sampleMetadata_in"], + check.names = FALSE, + header = TRUE, + row.names = 1, + sep = "\t") + +varDF <- read.table(argVc["variableMetadata_in"], + check.names = FALSE, + header = TRUE, + row.names = 1, + sep = "\t") + + +## checking +##--------- + +flgF("identical(rownames(datMN), rownames(samDF))", txtC = "Column names of the dataMatrix are not identical to the row names of the sampleMetadata; check your data with the 'Check Format' module in the 'Quality Control' section") +flgF("identical(colnames(datMN), rownames(varDF))", txtC = "Row names of the dataMatrix are not identical to the row names of the variableMetadata; check your data with the 'Check Format' module in the 'Quality Control' section") + +flgF("argVc['time'] %in% colnames(samDF)", txtC = paste0("Required time factor '", argVc["time"], "' could not be found in the column names of the sampleMetadata")) +flgF("argVc['subject'] %in% colnames(samDF)", txtC = paste0("Required subject factor '", argVc["subject"], "' could not be found in the column names of the sampleMetadata")) + +flgF("mode(samDF[, argVc['time']]) %in% c('character', 'numeric')", txtC = paste0("The '", argVc["time"], "' column of the sampleMetadata should contain either number only, or character only")) +flgF("mode(samDF[, argVc['subject']]) %in% c('character', 'numeric')", txtC = paste0("The '", argVc["subject"], "' column of the sampleMetadata should contain either number only, or character only")) + +flgF("argVc['adjC'] %in% c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')") +flgF("argVc['trf'] %in% c('none', 'log10', 'log2')") + +flgF("0 <= as.numeric(argVc['thrN']) && as.numeric(argVc['thrN']) <= 1", txtC = "(corrected) p-value threshold must be between 0 and 1") +flgF("argVc['diaR'] %in% c('no', 'yes')") + + +##------------------------------ +## Formating +##------------------------------ + +if (argVc["dff"] == "Satt") { + dffmeth <- "Satterthwaite" +} else { + dffmeth <- "Kenward-Roger" +} + + +##------------------------------ +## Computation +##------------------------------ + + +varDFout <- lmixedm(datMN = datMN, + samDF = samDF, + varDF = varDF, + fixfact = argVc["fixfact"], + time = argVc["time"], + subject = argVc["subject"], + logtr = argVc["trf"], + pvalCutof = argVc["thrN"], + pvalcorMeth = argVc["adjC"], + dffOption = dffmeth, + visu = argVc["diaR"], + least.confounded = FALSE, + outlier.limit = 3, + pdfC = argVc["out_graph_pdf"], + pdfE = argVc["out_estim_pdf"] + ) + + + + +##------------------------------ +## Rounding +##------------------------------ + +if (argVc["rounding"] == "yes") { + varDFout[, which(!(colnames(varDFout) %in% colnames(varDF)))] <- apply(varDFout[, which(!(colnames(varDFout) %in% colnames(varDF)))], 2, round, digits = as.numeric(argVc["decplaces"])) +} + +##------------------------------ +## Ending +##------------------------------ + + +## saving +##-------- + +varDFout <- cbind.data.frame(variableMetadata = rownames(varDFout), + varDFout) + +write.table(varDFout, + file = argVc["variableMetadata_out"], + quote = FALSE, + row.names = FALSE, + sep = "\t") + +## closing +##-------- + +cat("\n\nEnd of '", modNamC, "' Galaxy module call: ", + as.character(Sys.time()), "\n", sep = "") +cat("\nInformation about R (version, Operating System, attached or loaded packages):\n\n") +sessionInfo() + +sink() + +options(stringsAsFactors = strAsFacL) + +rm(list = ls()) diff -r 000000000000 -r a4d89d47646f test-data/OneFactor_dataMatrix.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/OneFactor_dataMatrix.tsv Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,23 @@ +variableMetadata s1D2 s2D2 s3D2 s1D3 s2D3 s3D3 s1D4 s2D4 s3D4 s1D5 s2D5 s3D5 s1D6 s2D6 s3D6 +V1 1.06E+06 1.07E+06 8.93E+05 9.11E+05 8.07E+05 6.90E+05 7.07E+05 7.66E+05 6.82E+05 6.25E+05 7.25E+05 6.78E+05 7.59E+05 6.70E+05 7.41E+05 +V2 4.67E+06 4.78E+06 4.17E+06 4.84E+06 4.50E+06 4.41E+06 6.96E+06 7.05E+06 6.65E+06 4.12E+06 4.42E+06 4.39E+06 1.26E+07 1.28E+07 1.32E+07 +V3 9.03E+05 8.44E+05 7.31E+05 9.97E+05 7.97E+05 7.47E+05 7.18E+05 8.28E+05 7.81E+05 9.08E+05 8.35E+05 8.37E+05 1.12E+06 1.11E+06 1.16E+06 +V4 1.62E+05 1.27E+05 1.21E+05 1.41E+05 8.48E+04 1.06E+05 9.72E+04 1.52E+05 1.27E+05 1.29E+05 1.42E+05 1.17E+05 1.72E+05 1.67E+05 1.87E+05 +V5 5.22E+04 6.20E+04 5.55E+04 4.78E+04 3.41E+04 5.84E+04 6.30E+04 1.11E+05 1.16E+05 4.59E+04 3.34E+04 4.08E+04 9.14E+04 6.78E+04 1.08E+05 +V6 1.68E+05 1.83E+05 2.76E+05 2.30E+05 1.62E+05 3.13E+05 4.47E+05 5.34E+05 6.59E+05 2.38E+05 2.30E+05 1.81E+05 4.93E+05 3.83E+05 6.30E+05 +V7 8.85E+06 8.39E+06 8.56E+06 5.51E+06 5.07E+06 4.90E+06 2.90E+06 3.10E+06 3.01E+06 2.36E+06 2.40E+06 2.30E+06 5.06E+06 4.94E+06 4.93E+06 +V8 2.53E+05 2.26E+05 1.90E+05 2.75E+05 2.67E+05 2.22E+05 2.12E+05 2.43E+05 1.72E+05 1.51E+05 1.56E+05 1.50E+05 2.49E+05 2.22E+05 2.48E+05 +V9 7.91E+05 7.16E+05 6.57E+05 6.08E+05 4.62E+05 4.88E+05 5.49E+05 6.13E+05 5.54E+05 7.56E+05 7.81E+05 7.59E+05 1.47E+06 1.46E+06 1.60E+06 +V10 1.76E+04 1.79E+04 1.86E+04 4.00E+04 4.27E+04 3.30E+04 5.34E+04 6.26E+04 6.09E+04 1.04E+05 8.24E+04 7.16E+04 2.09E+05 2.07E+05 2.19E+05 +V11 1.03E+05 9.28E+04 9.34E+04 8.70E+04 5.73E+04 8.05E+04 1.82E+06 1.98E+06 1.76E+06 8.41E+04 7.80E+04 7.31E+04 5.62E+04 5.01E+04 7.48E+04 +V12 4.48E+04 6.71E+04 4.27E+04 3.54E+04 3.04E+04 2.73E+04 2.47E+04 2.85E+04 3.26E+04 2.11E+04 1.55E+04 2.71E+04 3.44E+04 4.14E+04 5.87E+04 +V13 4.48E+04 6.71E+04 4.27E+04 3.54E+04 3.04E+04 2.73E+04 2.47E+04 2.85E+04 3.26E+04 2.11E+04 1.55E+04 2.71E+04 3.44E+04 4.14E+04 5.87E+04 +V14 1.16E+04 6.64E+03 1.29E+04 2.59E+04 1.55E+04 2.21E+04 3.15E+04 4.05E+04 3.10E+04 4.37E+04 2.90E+04 2.67E+04 6.62E+04 1.00E+05 8.75E+04 +V15 3.04E+05 3.05E+05 2.69E+05 2.57E+05 1.99E+05 2.16E+05 1.97E+05 1.97E+05 2.03E+05 2.65E+05 2.71E+05 2.60E+05 1.67E+05 1.89E+05 1.99E+05 +V16 2.69E+05 2.45E+05 2.18E+05 1.23E+05 1.27E+05 1.04E+05 2.91E+05 2.82E+05 2.79E+05 2.37E+05 2.11E+05 2.18E+05 5.60E+05 5.89E+05 6.08E+05 +V17 9.30E+04 8.55E+04 7.01E+04 4.84E+04 4.06E+04 3.14E+04 8.77E+04 1.03E+05 9.48E+04 9.88E+04 6.91E+04 7.81E+04 3.19E+05 3.25E+05 3.76E+05 +V18 2.30E+04 3.87E+04 2.01E+04 9.56E+03 1.27E+04 1.69E+04 2.38E+04 2.66E+04 2.99E+04 5.98E+04 4.93E+04 5.13E+04 1.08E+05 1.30E+05 1.36E+05 +V19 6.25E+04 7.43E+04 4.71E+04 1.62E+04 1.05E+04 1.57E+04 3.73E+04 3.08E+04 3.67E+04 3.50E+04 3.90E+04 2.78E+04 4.64E+05 1.60E+05 1.75E+05 +V20 1.72E+04 1.58E+04 1.35E+04 1.20E+04 1.02E+04 8.85E+03 1.35E+04 1.71E+04 1.62E+04 1.64E+04 8.41E+03 6.02E+03 5.07E+04 7.33E+04 6.57E+04 +V21 1.86E+06 1.90E+06 1.90E+06 2.05E+06 2.06E+06 2.07E+06 1.97E+06 1.94E+06 1.90E+06 1.64E+06 1.67E+06 1.78E+06 1.65E+06 1.73E+06 1.77E+06 +V22 1.52E+04 8.17E+03 1.37E+04 1.39E+04 1.34E+04 1.36E+04 1.40E+04 1.67E+04 1.60E+04 1.33E+04 1.69E+04 1.20E+04 1.05E+04 1.15E+04 1.11E+04 diff -r 000000000000 -r a4d89d47646f test-data/OneFactor_sampleMetadata.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/OneFactor_sampleMetadata.tsv Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,16 @@ +sampleMetadata time subject +s1D2 D2 s1 +s2D2 D2 s2 +s3D2 D2 s3 +s1D3 D3 s1 +s2D3 D3 s2 +s3D3 D3 s3 +s1D4 D4 s1 +s2D4 D4 s2 +s3D4 D4 s3 +s1D5 D5 s1 +s2D5 D5 s2 +s3D5 D5 s3 +s1D6 D6 s1 +s2D6 D6 s2 +s3D6 D6 s3 diff -r 000000000000 -r a4d89d47646f test-data/OneFactor_variableMetadata.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/OneFactor_variableMetadata.tsv Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,23 @@ +variableMetadata name +V1 name1 +V2 name2 +V3 name3 +V4 name4 +V5 name5 +V6 name6 +V7 name7 +V8 name8 +V9 name9 +V10 name10 +V11 name11 +V12 name12 +V13 name13 +V14 name14 +V15 name15 +V16 name16 +V17 name17 +V18 name18 +V19 name19 +V20 name20 +V21 name21 +V22 name22 diff -r 000000000000 -r a4d89d47646f test-data/TwoFactor_dataMatrix.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TwoFactor_dataMatrix.txt Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,6 @@ +variable S01T0 S01T1 S01T2 S01T3 S02T0 S02T1 S02T2 S02T3 S03T0 S03T1 S03T2 S03T3 S04T0 S04T1 S04T2 S04T3 S05T0 S05T1 S05T2 S05T3 S06T0 S06T1 S06T2 S06T3 S07T0 S07T1 S07T2 S07T3 S08T0 S08T1 S08T2 S08T3 S09T0 S09T1 S09T2 S09T3 S10T0 S10T1 S10T2 S10T3 S11T0 S11T1 S11T2 S11T3 S12T0 S12T1 S12T2 S12T3 S13T0 S13T1 S13T2 S13T3 S14T0 S14T1 S14T2 S14T3 S15T0 S15T1 S15T2 S15T3 S16T0 S16T1 S16T2 S16T3 S17T0 S17T1 S17T2 S17T3 S18T0 S18T1 S18T2 S18T3 +v0.4425_385 126.39 234.11 162.68 115.29 172.44 171.53 180.31 114.94 224.14 195.09 189.45 122.79 212.02 243.48 188.42 138.91 241.59 192.95 214.74 98.69 270.03 176.79 208.73 78.04 247.82 176.51 175.78 98.94 260.63 181.93 178.06 111.58 218.95 176.24 200.29 112.77 140.23 157.38 124.62 114.34 147.23 165.29 125.85 128.41 126.35 139.75 127.95 74.86 124.92 155.12 109.4 86.88 145.57 142 135.29 83.08 116.95 142.28 113.43 118.26 80.81 146.03 130.52 103.4 79.58 168 109.35 88.19 138.12 166.61 115.05 83.05 +v0.4498_625 108.9 133.53 128.66 205.38 121.48 112.23 142.97 199.35 127.44 102.45 173.31 252.14 123.96 96.93 118.7 234.54 146.33 109.1 178.69 233.61 154.27 138.81 157.97 213.18 176.06 108.79 142.11 116.14 138.78 131.42 172.65 214.14 159.19 102.17 137.67 211.04 120.1 69.85 65.75 92.68 121.22 70.04 76.06 91.89 81.18 73.14 80.93 92.69 110.68 66.84 79.72 92.89 93.5 62.98 87.39 98.32 84.59 65.18 99.23 36.94 85.1 80.49 95.17 115.22 113.47 65.32 53.29 113.25 106.68 74.04 66.5 88.96 +v0.4711_463 124.1 166.84 156.18 82.57 158.4 190.09 152.89 100.76 155.75 174.85 153.14 112.76 171.54 150.37 163.49 116.06 198.78 161.53 247.61 122.94 167.38 186.12 194.59 93.05 208.24 150.15 140.97 55.62 200.32 138.59 145.99 89.44 192.49 131.44 145.89 93.38 102.94 67.11 83.99 86.86 110.09 98.61 77.91 100.75 136.32 92.56 86.79 75.62 144.39 65.27 110.24 94.26 136.48 77.62 95.43 60.3 135.98 87.54 65.37 92.04 112.09 58.9 92.35 72.92 132.84 97.12 80.71 79.01 144.75 85.78 73.61 83.68 +v0.4715_659 688.13 739.41 529.62 354.46 828.48 660.76 545.32 318.51 798.21 714.6 617.95 400.94 815.2 696.85 611.31 354.28 909.08 792.05 650.85 386.48 897.32 729.1 638.99 420.19 967.92 765.14 579.41 369.82 929.8 759.62 617.5 339.81 989.48 781.37 606.77 386.24 289.67 359.43 240.13 294.74 314.71 338.43 203.29 264.08 349.01 365.76 245.06 283.41 327.8 354.82 228.03 263.83 300.57 362.51 242.41 282.21 280.17 344.99 253.88 273.4 304.37 325.89 292.93 292.98 317.8 318.16 205.93 245.92 310.23 330.51 230.27 270.3 +v0.4766_757 117.8 110.31 114.56 99.99 144.39 155.33 114.47 122 147.16 132.82 129.78 94.86 128.26 161.38 119.12 96.73 177.26 134.94 149.39 134.12 163.22 146.12 148.26 108.68 159.39 140.75 119.11 65.83 170.45 131.79 163.58 104.63 167.33 112.88 142.58 85.38 80.53 84.3 42.33 62.04 73.74 97.78 58 79.33 91.7 82.39 69.71 83.58 77.86 65.56 74.11 82.9 90.53 75.27 80.46 81.88 92.77 52.94 67.99 84.21 91.27 69.54 77.13 85.27 93.18 83.68 57.01 65.09 92.12 82.63 70.9 75.53 diff -r 000000000000 -r a4d89d47646f test-data/TwoFactor_sampleMetadata.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TwoFactor_sampleMetadata.txt Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,73 @@ +sample sujet phenotype time +S01T0 S01 WT T0 +S01T1 S01 WT T1 +S01T2 S01 WT T2 +S01T3 S01 WT T3 +S02T0 S02 WT T0 +S02T1 S02 WT T1 +S02T2 S02 WT T2 +S02T3 S02 WT T3 +S03T0 S03 WT T0 +S03T1 S03 WT T1 +S03T2 S03 WT T2 +S03T3 S03 WT T3 +S04T0 S04 WT T0 +S04T1 S04 WT T1 +S04T2 S04 WT T2 +S04T3 S04 WT T3 +S05T0 S05 WT T0 +S05T1 S05 WT T1 +S05T2 S05 WT T2 +S05T3 S05 WT T3 +S06T0 S06 WT T0 +S06T1 S06 WT T1 +S06T2 S06 WT T2 +S06T3 S06 WT T3 +S07T0 S07 WT T0 +S07T1 S07 WT T1 +S07T2 S07 WT T2 +S07T3 S07 WT T3 +S08T0 S08 WT T0 +S08T1 S08 WT T1 +S08T2 S08 WT T2 +S08T3 S08 WT T3 +S09T0 S09 WT T0 +S09T1 S09 WT T1 +S09T2 S09 WT T2 +S09T3 S09 WT T3 +S10T0 S10 MT T0 +S10T1 S10 MT T1 +S10T2 S10 MT T2 +S10T3 S10 MT T3 +S11T0 S11 MT T0 +S11T1 S11 MT T1 +S11T2 S11 MT T2 +S11T3 S11 MT T3 +S12T0 S12 MT T0 +S12T1 S12 MT T1 +S12T2 S12 MT T2 +S12T3 S12 MT T3 +S13T0 S13 MT T0 +S13T1 S13 MT T1 +S13T2 S13 MT T2 +S13T3 S13 MT T3 +S14T0 S14 MT T0 +S14T1 S14 MT T1 +S14T2 S14 MT T2 +S14T3 S14 MT T3 +S15T0 S15 MT T0 +S15T1 S15 MT T1 +S15T2 S15 MT T2 +S15T3 S15 MT T3 +S16T0 S16 MT T0 +S16T1 S16 MT T1 +S16T2 S16 MT T2 +S16T3 S16 MT T3 +S17T0 S17 MT T0 +S17T1 S17 MT T1 +S17T2 S17 MT T2 +S17T3 S17 MT T3 +S18T0 S18 MT T0 +S18T1 S18 MT T1 +S18T2 S18 MT T2 +S18T3 S18 MT T3 diff -r 000000000000 -r a4d89d47646f test-data/TwoFactor_variableMetadata.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/TwoFactor_variableMetadata.txt Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,6 @@ +variable num +v0.4425_385 1 +v0.4498_625 2 +v0.4711_463 3 +v0.4715_659 4 +v0.4766_757 5 diff -r 000000000000 -r a4d89d47646f test-data/mixmodel_TwoFactor_variableMetadata.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mixmodel_TwoFactor_variableMetadata.txt Mon May 16 09:25:01 2022 +0000 @@ -0,0 +1,6 @@ +variableMetadata num Shapiro.pvalue.residuals Subject.Variance REML pval_time pval_trt pval_inter pvalue.timeT0...timeT1 pvalue.timeT0...timeT2 pvalue.timeT0...timeT3 pvalue.timeT1...timeT2 pvalue.timeT1...timeT3 pvalue.timeT2...timeT3 pvalue.fixfactMT...fixfactWT pvalue..timeT0.fixfactMT...timeT1.fixfactMT pvalue..timeT0.fixfactMT...timeT2.fixfactMT pvalue..timeT0.fixfactMT...timeT3.fixfactMT pvalue..timeT0.fixfactMT...timeT0.fixfactWT pvalue..timeT0.fixfactMT...timeT1.fixfactWT pvalue..timeT0.fixfactMT...timeT2.fixfactWT pvalue..timeT0.fixfactMT...timeT3.fixfactWT pvalue..timeT1.fixfactMT...timeT2.fixfactMT pvalue..timeT1.fixfactMT...timeT3.fixfactMT pvalue..timeT1.fixfactMT...timeT0.fixfactWT pvalue..timeT1.fixfactMT...timeT1.fixfactWT pvalue..timeT1.fixfactMT...timeT2.fixfactWT pvalue..timeT1.fixfactMT...timeT3.fixfactWT pvalue..timeT2.fixfactMT...timeT3.fixfactMT pvalue..timeT2.fixfactMT...timeT0.fixfactWT pvalue..timeT2.fixfactMT...timeT1.fixfactWT pvalue..timeT2.fixfactMT...timeT2.fixfactWT pvalue..timeT2.fixfactMT...timeT3.fixfactWT pvalue..timeT3.fixfactMT...timeT0.fixfactWT pvalue..timeT3.fixfactMT...timeT1.fixfactWT pvalue..timeT3.fixfactMT...timeT2.fixfactWT pvalue..timeT3.fixfactMT...timeT3.fixfactWT pvalue..timeT0.fixfactWT...timeT1.fixfactWT pvalue..timeT0.fixfactWT...timeT2.fixfactWT pvalue..timeT0.fixfactWT...timeT3.fixfactWT pvalue..timeT1.fixfactWT...timeT2.fixfactWT pvalue..timeT1.fixfactWT...timeT3.fixfactWT pvalue..timeT2.fixfactWT...timeT3.fixfactWT estimate.timeT0...timeT1 estimate.timeT0...timeT2 estimate.timeT0...timeT3 estimate.timeT1...timeT2 estimate.timeT1...timeT3 estimate.timeT2...timeT3 estimate.fixfactMT...fixfactWT estimate..timeT0.fixfactMT...timeT1.fixfactMT estimate..timeT0.fixfactMT...timeT2.fixfactMT estimate..timeT0.fixfactMT...timeT3.fixfactMT estimate..timeT0.fixfactMT...timeT0.fixfactWT estimate..timeT0.fixfactMT...timeT1.fixfactWT estimate..timeT0.fixfactMT...timeT2.fixfactWT estimate..timeT0.fixfactMT...timeT3.fixfactWT estimate..timeT1.fixfactMT...timeT2.fixfactMT estimate..timeT1.fixfactMT...timeT3.fixfactMT estimate..timeT1.fixfactMT...timeT0.fixfactWT estimate..timeT1.fixfactMT...timeT1.fixfactWT estimate..timeT1.fixfactMT...timeT2.fixfactWT estimate..timeT1.fixfactMT...timeT3.fixfactWT estimate..timeT2.fixfactMT...timeT3.fixfactMT estimate..timeT2.fixfactMT...timeT0.fixfactWT estimate..timeT2.fixfactMT...timeT1.fixfactWT estimate..timeT2.fixfactMT...timeT2.fixfactWT estimate..timeT2.fixfactMT...timeT3.fixfactWT estimate..timeT3.fixfactMT...timeT0.fixfactWT estimate..timeT3.fixfactMT...timeT1.fixfactWT estimate..timeT3.fixfactMT...timeT2.fixfactWT estimate..timeT3.fixfactMT...timeT3.fixfactWT estimate..timeT0.fixfactWT...timeT1.fixfactWT estimate..timeT0.fixfactWT...timeT2.fixfactWT estimate..timeT0.fixfactWT...timeT3.fixfactWT estimate..timeT1.fixfactWT...timeT2.fixfactWT estimate..timeT1.fixfactWT...timeT3.fixfactWT estimate..timeT2.fixfactWT...timeT3.fixfactWT +v0.4425_385 1 0.004001 0 -140.931331 0 0 0.000264 0.191909 0.277993 0 0.018695 0 0 0 0.001835 0.869646 0.006728 0 0 0 0.238471 0.002992 0 0.000044 0.003836 0.009486 0.000036 0.004238 0 0 0 0.180279 0 0 0 0.112058 0.170546 0.091717 0 0.745427 0 0 -0.030835 0.02558 0.192247 0.056415 0.223081 0.166666 -0.148875 -0.107496 -0.005448 0.092614 -0.252536 -0.206709 -0.195927 0.039343 0.102048 0.200111 -0.14504 -0.099213 -0.088431 0.146839 0.098062 -0.247088 -0.201261 -0.19048 0.044791 -0.34515 -0.299323 -0.288542 -0.053272 0.045827 0.056608 0.291879 0.010781 0.246052 0.23527 +v0.4498_625 2 0.000002 0 -120.084702 0 0 0.000801 0.000045 0.135374 0.047823 0.005607 0 0.000776 0 0.000128 0.004174 0.135488 0.000855 0.170794 0.000053 0 0.272071 0.012599 0 0.000001 0 0 0.149351 0 0.000049 0 0 0.000005 0.00515 0 0 0.038427 0.408218 0.000047 0.00448 0 0.000769 0.120489 0.041611 -0.055515 -0.078879 -0.176005 -0.097126 -0.249448 0.158724 0.115616 0.058829 -0.136156 -0.053901 -0.16855 -0.306015 -0.043108 -0.099895 -0.29488 -0.212625 -0.327274 -0.46474 -0.056787 -0.251772 -0.169517 -0.284166 -0.421632 -0.194985 -0.11273 -0.227379 -0.364845 0.082255 -0.032394 -0.16986 -0.114649 -0.252114 -0.137466 +v0.4711_463 3 0.474695 0.000093 -135.57425 0 0 0.000006 0.000013 0.000128 0 0.488485 0.000043 0.000004 0 0 0.000004 0.000001 0.000295 0.006152 0.002433 0.000335 0.51173 0.755875 0 0 0 0.046325 0.729076 0 0 0 0.173445 0 0 0 0.089918 0.319884 0.50045 0 0.745698 0 0 0.11757 0.100696 0.22644 -0.016874 0.108869 0.125743 -0.194248 0.200783 0.178185 0.190094 -0.13207 -0.097712 -0.108863 0.130715 -0.022598 -0.010689 -0.332853 -0.298495 -0.309645 -0.070068 0.011909 -0.310255 -0.275897 -0.287048 -0.04747 -0.322164 -0.287806 -0.298957 -0.059379 0.034358 0.023208 0.262785 -0.01115 0.228427 0.239578 +v0.4715_659 4 0.623926 0.000349 -238.084202 0 0 0 0.219999 0 0 0 0 0 0 0.001773 0 0.000326 0 0 0 0.000025 0 0 0 0 0 0.070602 0.000027 0 0 0 0 0 0 0 0 0.000006 0 0 0 0 0 0.012039 0.138015 0.211119 0.125976 0.19908 0.073104 -0.326171 -0.045354 0.116577 0.053052 -0.44462 -0.375188 -0.285168 -0.075434 0.161931 0.098406 -0.399266 -0.329834 -0.239814 -0.03008 -0.063525 -0.561197 -0.491765 -0.401744 -0.192011 -0.497672 -0.42824 -0.33822 -0.128486 0.069432 0.159453 0.369186 0.09002 0.299754 0.209734 +v0.4766_757 5 0.005552 0.000365 -149.179654 0.00001 0 0.00028 0.015826 0.000092 0.000002 0.083272 0.004665 0.23638 0 0.064552 0.000169 0.102895 0 0 0 0.062636 0.033569 0.819707 0 0 0 0.000447 0.019489 0 0 0 0 0 0 0 0.000898 0.106307 0.055879 0 0.755242 0.000055 0.000152 0.052746 0.090042 0.115324 0.037296 0.062579 0.025283 -0.224878 0.056412 0.121655 0.049578 -0.240112 -0.191032 -0.181683 -0.059041 0.065243 -0.006834 -0.296524 -0.247444 -0.238095 -0.115453 -0.072077 -0.361767 -0.312687 -0.303338 -0.180696 -0.28969 -0.24061 -0.231262 -0.10862 0.04908 0.058428 0.181071 0.009349 0.131991 0.122642