mixmodel4repeated_measures: diagmfl.R comparison

comparison diagmfl.R @ 0:a4d89d47646f draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 8d2ca678d973501b60479a8dc3f212eecd56eab8

author	workflow4metabolomics
date	Mon, 16 May 2022 09:25:01 +0000
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:a4d89d47646f
+#' Calcul des grandeurs "diagnostiques"
+#'
+#'  Script adapte de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonctionner
+#'  avec un modele lmer (et non lme), des sujets avec des identifiants non numeriques,
+#'  et des observations non ordonnees sujet par sujet (dernier point a verifier.)
+#'
+#'  @detail Les graphiques, les calculs associés et les notations utilisees dans le script suivent
+#'   l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear
+#'    mixed model assumptions and some remedial measures, International Statistical Review
+#'       (doi:10.1111/insr.12178)
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @return A list
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export lmer.computeDiag
+lmer.computeDiag <- function(mfl) {
+## Check arguments ---------------------------------------------------------
+if (length(mfl@flist) > 1)
+stop("Several 'grouping level' for random effect not implemented yet.")
+## extracting information from mfl models -------------------------------------------------------------
+# data
+df <- mfl@frame
+responseC <- names(df)[1]
+unitC <- names(mfl@flist)[1]
+# observations
+yVn <- df[, responseC]
+nobsN <- length(yVn)
+# units
+idunitVc <- levels(mfl@flist[[1]])
+nunitN <- length(unique(idunitVc))
+#X
+xMN <- mfl@pp$X
+pN <- ncol(xMN)
+#Z
+zMN <- t(as.matrix(mfl@pp$Zt))
+# Estimated covariance matrix of random effects (Gam)
+aux <- VarCorr(mfl)[[1]] ## assuming only one level of grouping
+aux2 <- attr(aux, "stddev")
+gMN <- attr(aux, "correlation") * (aux2 %*% t(aux2))
+gammaMN <- as.matrix(kronecker(diag(nunitN), gMN))
+q <- dim(gMN)[1]
+# Estimated covariance matrix of conditonal error (homoskedastic conditional independance model)
+sigsqN <- attr(VarCorr(mfl), "sc")^2
+rMN <- sigsqN * diag(nobsN)
+# Estimated covariance matrix of Y
+vMN <- (zMN %*% gammaMN %*% t(zMN)) + rMN
+invvMN <- MASS::ginv(vMN)
+# H and Q matrix
+hMN <- MASS::ginv(t(xMN) %*% invvMN %*% xMN)
+qMN <- invvMN - invvMN %*% xMN %*% (hMN) %*% t(xMN) %*% invvMN
+# eblue and eblup
+eblueVn <- mfl@beta
+eblupVn <- gammaMN %*% t(zMN) %*% invvMN %*% (yVn - xMN %*% eblueVn) ## equivalent de ranef(mfl)
+rownames(eblupVn) <- colnames(zMN)
+##  Calculs of matrices and vectors used in graph diagnosics ---------------------------------------------
+## Marginal and individual predictions, residuals and variances
+marpredVn <- xMN %*% eblueVn
+marresVn <- yVn - marpredVn
+marvarMN <- vMN - xMN %*% hMN %*% t(xMN)
+condpredVn <- marpredVn + zMN %*% eblupVn
+condresVn <- yVn - condpredVn
+condvarMN <- rMN %*% qMN %*% rMN
+## Analysis of marginal and conditional residuals
+stmarresVn <- stcondresVn <- rep(0, nobsN)
+lesverVn <- rep(0, nunitN)
+names(lesverVn) <- idunitVc
+for (i in 1:nunitN) {
+idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i
+miN <- length(idxiVn)
+## standardization of marginal residual
+stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn, idxiVn])) %*% marresVn[idxiVn])
+##Standardized Lessafre and Verbeke's measure
+auxMN <- diag(1, ncol = miN, nrow = miN) - stmarresVn[idxiVn] %*% t(stmarresVn[idxiVn])
+lesverVn[i] <- sum(diag(auxMN %*% t(auxMN)))
+## standardization of conditional residual
+stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn, idxiVn])) %*% condresVn[idxiVn])
+}
+lesverVn <- lesverVn / sum(lesverVn)
+## Least confounded conditional residuals
+## EBLUP analysis (Mahalanobis' distance)
+varbMN <- gammaMN %*% t(zMN) %*% qMN %*% zMN %*% gammaMN
+mdistVn <- rep(0, nunitN)
+qm <- q - 1
+for (j in 1:nunitN) {
+gbi <- varbMN[(q * j - qm):(q * j), (q * j - qm):(q * j)]
+eblupi <- eblupVn[(q * j - qm):(q * j)]
+mdistVn[j] <- t(eblupi) %*% ginv(gbi) %*% eblupi
+}
+names(mdistVn) <- levels(mfl@flist[[1]])
+## output ----------------------------------------------
+return(list(
+data = df,
+q = q,
+eblue = eblueVn,
+eblup = eblupVn,
+marginal.prediction = marpredVn,
+conditional.prediction = condpredVn,
+std.marginal.residuals = stmarresVn,
+std.conditional.residuals = stcondresVn,
+mahalanobis.distance = mdistVn,
+std.mahalanobis.distance = mdistVn / sum(mdistVn),
+std.lesaffreverbeke.measure = lesverVn
+))
+}
+#' Wrapper function for diagnostic plots of 'lmer' linear mixed models
+#'
+#' (W4M mixmod)
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param title aa
+#' @param outlier.limit aa
+#' @param pvalCutof aa
+#' @param resC aa
+#' @param uniC aa
+#' @param fixC aa
+#' @param lest.confounded Not used yet.
+#' @return NULL
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export diagmflF
+diagmflF <- function(mfl,
+title = "",
+outlier.limit = 3,
+pvalCutof = 0.05,
+resC = "vd",
+uniC = "subject",
+timC = "time",
+fixC = "fixfact",
+least.confounded = FALSE) {
+## diagnostics
+diagLs <- lmer.computeDiag(mfl)
+## plots
+blank <- rectGrob(gp = gpar(col = "white"))
+rectspacer <- rectGrob(height = unit(0.1, "npc"), gp = gpar(col = "grey"))
+grid.arrange(blank,
+plot_linearity(diagLs, hlimitN = outlier.limit, plotL = FALSE,
+label_factor = c(uniC, fixC, timC)),
+blank,
+plot_conditionalResiduals(diagLs, hlimitN = outlier.limit, plotL = FALSE,
+label_factor = c(uniC, fixC, timC)),
+blank,
+plot_condresQQplot(diagLs,  plotL = FALSE),
+blank,
+plot_lesaffreVeerbeke(diagLs,  plotL = FALSE),
+blank,
+plot_randomEffect(mfl, plotL = FALSE)[[1]],
+blank,
+plot_mahalanobisKhi2(diagLs,  plotL = FALSE),
+blank,
+plot_mahalanobis(diagLs,  plotL = FALSE),
+blank,
+blank,
+blank,
+top = textGrob(title, gp = gpar(fontsize = 40, font = 4)),
+layout_matrix = matrix(c(rep(1, 7),
+2, 3, rep(4, 3), 20, 21,
+rep(5, 7),
+6:12,
+rep(13, 7),
+14:18, rep(19, 2)),
+ncol = 7, nrow = 6, byrow = TRUE),
+heights = c(0.1 / 3, 0.3, 0.1 / 3, 0.3, 0.1 / 3, 0.3),
+widths = c(0.22, 0.04, 0.22, 0.04, 0.22, 0.04, 0.22))
+}
+#######################################################################################################
+## Raw data time courses
+#######################################################################################################
+#' Visualization of raw time course
+#'
+#' Une
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param responseC Name of the 'response' variable
+#' @param timeC Name of the 'time' variable
+#' @param subjectC  Name of the 'subject' variable
+#' @param fixfactC  Name of the 'fixed factor' variable (e.g.treatment)
+#' @param offset_subject Boolean indicating if an offset value (subject's mean) should substracted to each data point. Default is FALSE
+#' @param plotL Boolean
+#' @param colorType One of NA, FIXFACT or SUBJECT
+#' @param shapeType One of NA, FIXFACT or SUBJECT
+#' @param lineType One of NA, FIXFACT or SUBJECT
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_timeCourse
+plot_timeCourse <- function(mfl,
+responseC,
+timeC,
+subjectC,
+fixfactC = NULL,
+offset_subject = FALSE,
+plotL = TRUE,
+colorType = NA, ## subject, fixfact, none or NA
+shapeType = NA, ## subject, fixfact, none or NA
+lineType = NA ## subject, fixfact, none or NA
+) {
+## Data -----
+if (class(mfl) %in% c("merModLmerTest", "lmerMod", "lmerModLmerTest")) {
+DF <- mfl@frame
+} else if (class(mfl) == "data.frame") {
+DF <- mfl
+} else {
+stop("'mfl' argument must be a linear mixed effect model or a data frame.")
+}
+## Format data -----
+if (is.null(fixfactC)) {
+DF <- DF[, c(responseC,  timeC, subjectC)]
+colnames(DF) <- c("DV", "TIME", "SUBJECT")
+meanDF <- aggregate(DF$DV,
+by = list(SUBJECT = DF$SUBJECT,
+TIME = DF$TIME),
+FUN = mean,
+na.rm = TRUE)
+colnames(meanDF) <- c("SUBJECT", "TIME", "DV")
+meanDF$GROUP <- meanDF$SUBJECT
+} else{
+DF <- DF[, c(responseC, fixfactC, timeC, subjectC)]
+colnames(DF) <- c("DV", "FIXFACT", "TIME", "SUBJECT")
+meanDF <- aggregate(DF$DV,
+by = list(SUBJECT = DF$SUBJECT,
+TIME = DF$TIME,
+FIXFACT = DF$FIXFACT),
+FUN = mean,
+na.rm = TRUE)
+colnames(meanDF) <- c("SUBJECT", "TIME", "FIXFACT", "DV")
+meanDF$GROUP <- paste(meanDF$SUBJECT, meanDF$FIXFACT, sep = "_")
+}
+## Offset -----
+if (offset_subject) {
+offsetMN <- aggregate(DF$DV, by = list(DF$SUBJECT), mean, na.rm = TRUE)
+offsetVn <- offsetMN[, 2]
+names(offsetVn) <- offsetMN[, 1]
+rm(offsetMN)
+DF$DV <- DF$DV - offsetVn[DF$SUBJECT]
+meanDF$DV <- meanDF$DV - offsetVn[as.character(meanDF$SUBJECT)]
+}
+## Graphical parameters -----
+xlabC <-  timeC
+ylabC <- responseC
+titC <- "Individual time-courses"
+if (offset_subject) {
+ylabC <- paste(ylabC, "minus 'within-subject' empirical mean")
+titC <- paste(titC, "('within-subject' empirical mean offset)")
+}
+## color
+if (is.na(colorType)) { ## automaticatical attribution
+if (is.null(fixfactC)) {
+colorType <- "SUBJECT"
+} else {
+colorType <- "FIXFACT"
+}
+colTxt <- paste(", colour=", colorType)
+} else if (colorType == "none") {
+colTxt  <- ""
+} else {
+colTxt <- paste(", colour=", colorType)
+}
+## lineType
+if (is.na(lineType)) { ## automaticatical attribution
+if (is.null(fixfactC)) {
+linTxt  <- ""
+} else {
+linTxt <- paste(", linetype=",
+ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT"))
+}
+} else if (lineType == "none") {
+linTxt  <- ""
+} else {
+linTxt  <-  paste(", linetype=", lineType)
+}
+## shapeType
+if (is.na(shapeType)) { ## automaticatical attribution
+if (is.null(fixfactC)) {
+shaTxt  <- ""
+} else {
+shaTxt <- paste(", shape=",
+ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT"))
+}
+} else if (shapeType == "none") {
+shaTxt  <- ""
+} else {
+shaTxt  <-  paste(", shape=", shapeType)
+}
+## aes mapping
+txtMap <- paste("aes(x = TIME, y = DV",
+colTxt, shaTxt, ")", sep = "")
+txtLineMap <- paste("aes(x = TIME, y = DV, group = GROUP ",
+colTxt, linTxt,  ")", sep = "")
+## plot and output
+p <- ggplot(data = DF, mapping = eval(parse(text = txtMap))) +
+ggtitle(titC) +
+xlab(xlabC) + ylab(ylabC) +
+theme(legend.position = "left",
+plot.title = element_text(size = rel(1.2), face = "bold")) +
+geom_point() +
+geom_line(eval(parse(text = txtLineMap)), data = meanDF) +
+theme_bw() +
+NULL
+if (plotL) plot(p)
+invisible(p)
+}
+#######################################################################################################
+## Post-hoc estimate
+#######################################################################################################
+#' Visualization of fixed effects (post-hoc estimates)
+#'
+#' Description
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param pvalCutof User pvalue cut of
+#' @param plotL Boolean
+#' @param titC Title of the plot
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_posthoc
+plot_posthoc <- function(mfl, pvalCutof = 0.05, plotL = TRUE, titC = "Post-hoc estimates") {
+ddlsm1 <- as.data.frame(difflsmeans(mfl, test.effs = NULL))
+colnames(ddlsm1)[ncol(ddlsm1)] <- "pvalue"
+ddlsm1$Significance <- rep("NS", nrow(ddlsm1))
+## modif JF pour tenir compte du seuil de pvalues defini par le user
+ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof)] <- paste("p-value < ", pvalCutof, sep = "")
+ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 5)] <- paste("p-value < ", pvalCutof / 5, sep = "")
+ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 10)] <- paste("p-value < ", pvalCutof / 10, sep = "")
+ddlsm1$levels <- rownames(ddlsm1)
+ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) {
+strsplit(namC, split = " ", fixed = TRUE)[[1]][1]
+})
+colValue <- c("grey", "yellow", "orange", "red")
+names(colValue) <- c("NS",
+paste("p-value < ", pvalCutof, sep = ""),
+paste("p-value < ", pvalCutof / 5, sep = ""),
+paste("p-value < ", pvalCutof / 10, sep = ""))
+p <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) +
+facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") +
+geom_bar(aes(fill = Significance), stat = "identity") +
+theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
+scale_fill_manual(values = colValue) +
+geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.25) +
+ggtitle(titC) + xlab("") +
+NULL
+if (plotL) plot(p)
+invisible(p)
+}
+#######################################################################################################
+## Visualisation des effets aléatoires
+#######################################################################################################
+#' Visualization of random effects
+#'
+#' Equivalent of dotplot(ranef)
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param  plotL Logical
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_randomEffect
+plot_randomEffect <- function(mfl, plotL = TRUE) {
+## Estimation et format des effets aléatoires
+randomEffect <- ranef(mfl, condVar = TRUE)
+DF <- data.frame(randomEffect = rep(names(randomEffect),
+times = sapply(seq_len(length(randomEffect)),
+function(lsi) {
+return(length(unlist(randomEffect[[lsi]])))})))
+DF$condVar <- DF$estimate <- DF$x2 <- DF$x1 <- rep(NA, nrow(DF))
+for (rafC in names(randomEffect)) {
+eff <- randomEffect[[rafC]]
+DF$x1[which(DF$randomEffect == rafC)] <- rep(colnames(eff), each = nrow(eff))
+DF$x2[which(DF$randomEffect == rafC)] <- rep(rownames(eff), ncol(eff))
+DF$estimate[which(DF$randomEffect == rafC)] <- unlist(eff)
+condvar <- attr(randomEffect[[rafC]], "postVar")
+se <- NULL
+for (coli in seq_len(ncol(eff))) {
+se <- c(se,
+sapply(seq_len(nrow(eff)),
+function(i) {
+return(condvar[coli, coli, i])}))
+}
+DF$condVar[which(DF$randomEffect == rafC)] <- se
+}
+DF$se <- sqrt(DF$condVar)
+DF$lower <- DF$estimate - 1.96 * DF$se
+DF$upper <- DF$estimate + 1.96 * DF$se
+## Plot
+plotLs <- vector("list", length(randomEffect))
+names(plotLs) <- names(randomEffect)
+for (pi in seq_len(length(plotLs))) {
+subDF <- DF[DF$randomEffect == names(plotLs)[pi], ]
+subDF <- subDF[order(subDF$x1, subDF$estimate, decreasing = FALSE), ]
+p <- ggplot(data = subDF,
+mapping = aes(x = estimate, y = reorder(x2, estimate))
+) +
+geom_point(size = 3) +
+geom_segment(aes(xend = lower, yend = x2)) +
+geom_segment(aes(xend = upper, yend = x2)) +
+facet_wrap(~x1, ncol = length(unique(subDF$x1))) +
+ylab("") + xlab("") +
+ggtitle(paste("Random effect - ", names(plotLs)[pi], sep = "")) +
+theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+geom_vline(xintercept = 0, linetype = "dashed") +
+theme_bw()
+plotLs[[pi]] <- p
+if (plotL) plot(p)
+}
+invisible(plotLs)
+}
+#######################################################################################################
+## Linearité des effets et outlying observations
+#######################################################################################################
+#' Linarity of the fixed effect with regard to the continuous time
+#'
+#' @param diagLs diagnostic list
+#' @param hlimitN Limit value for outliers (e.g.2 or 3)
+#' @param plotL Boolean
+#' @param label_factor Column of observation names used to label outlying values
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_linearity
+#'
+plot_linearity <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) {
+df <- cbind.data.frame(diagLs$data,
+marginal.prediction = diagLs$marginal.prediction,
+standardized.marginal.residuals = diagLs$std.marginal.residuals)
+# outlier annotation
+df$outliers <- rep("", nrow(df))
+outidx <- which(abs(df$standardized.marginal.residuals) > hlimitN)
+df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx]
+if (length(label_factor) >= 1) {
+df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+df[outidx, label_factor[1]],
+sep = "_")
+if (length(label_factor) > 1) {
+for (li in 2:length(label_factor)) {
+df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+df[outidx, label_factor[li]],
+sep = ".")
+}
+}
+}
+p <- ggplot(data = df,
+aes(x = marginal.prediction,
+y = standardized.marginal.residuals)) +
+geom_point(size = 2) +
+geom_hline(yintercept = 0, col = "grey") +
+geom_smooth(aes(x = marginal.prediction,
+y = standardized.marginal.residuals), data = df,  se = FALSE, col = "blue", method = "loess") +
+ggtitle("Linearity of effects/outlying obervations") +
+xlab("Marginal predictions") +
+ylab("Standardized marginal residuals") +
+theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") +
+geom_text(aes(label = outliers), hjust = 0, vjust = 0)
+if (plotL) plot(p)
+invisible(p)
+}
+#######################################################################################################
+## EBLUP
+#######################################################################################################
+#' Mahalanobis distance
+#'
+#' @param diagLs diagnostic list
+#' @param plotL Boolean
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_mahalanobis
+#'
+plot_mahalanobis <- function(diagLs,  plotL = TRUE) {
+unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance),
+mal = diagLs$std.mahalanobis.distance)
+## Outlying subjects
+p <-
+ggplot(aes(y = mal, x = unit), data = unitDf) +
+geom_point(size = 3) +
+ylab("Standardized Mahalanobis distance") +
+geom_vline(xintercept = 0, linetype = "dashed") +
+theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+geom_hline(yintercept = 2 * mean(unitDf$mal), linetype = "dashed") +
+geom_text(aes(label = unit),
+data = unitDf[unitDf$mal > 2 * mean(unitDf$mal), ],
+hjust = 1, vjust = 0) +
+ggtitle("Outlying unit") +
+xlab("unit")
+if (plotL) plot(p)
+invisible(p)
+}
+#' Mahalanobis distance (Chi2)
+#'
+#' @param diagLs diagnostic list
+#' @param plotL aa
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_mahalanobisKhi2
+#'
+plot_mahalanobisKhi2 <- function(diagLs,  plotL = TRUE) {
+unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance),
+mal = diagLs$mahalanobis.distance)
+p <- qqplotF(x = unitDf$mal,
+distribution = "chisq",
+df = diagLs$q,
+line.estimate = NULL,
+conf = 0.95) +
+xlab("Chi-squared quantiles") +
+ylab("Mahalanobis distance") +
+ggtitle("Normality of random effect") +
+theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+if (plotL) plot(p)
+invisible(p)
+}
+#######################################################################################################
+## Residus conditionels
+#######################################################################################################
+## Presence of outlying observations and homoscedacity of residuals
+#' Homoskedacity of conditionalresiduals
+#'
+#' @param diagLs diagnostic list
+#' @param hlimitN Limit value for outliers (e.g.2 or 3)
+#' @param plotL Boolean
+#' @param label_factor Column of observation names used to label outlying values
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_conditionalResiduals
+#'
+plot_conditionalResiduals <-  function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) {
+df <- cbind.data.frame(diagLs$data,
+conditional.prediction = diagLs$conditional.prediction,
+standardized.conditional.residuals = diagLs$std.conditional.residuals)
+# outlier annotation
+df$outliers <- rep("", nrow(df))
+outidx <- which(abs(df$standardized.conditional.residuals) > hlimitN)
+df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx]
+if (length(label_factor) >= 1) {
+df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+df[outidx, label_factor[1]],
+sep = "_")
+if (length(label_factor) > 1) {
+for (li in 2:length(label_factor)) {
+df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+df[outidx, label_factor[li]],
+sep = ".")
+}
+}
+}
+p <- ggplot(data = df,
+aes(x = conditional.prediction,
+y = standardized.conditional.residuals)) +
+geom_point(size = 2) +
+geom_hline(yintercept = 0, col = "grey") +
+geom_smooth(aes(x = conditional.prediction,
+y = standardized.conditional.residuals),
+data = df,  se = FALSE, col = "blue", method = "loess") +
+ggtitle("Homoscedasticity of conditional residuals/outlying observations") +
+xlab("Individual predictions") +
+ylab("Standardized conditional residuals") +
+theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") +
+geom_text(aes(label = outliers), hjust = 0, vjust = 0)
+if (plotL) plot(p)
+invisible(p)
+}
+#' Normality of conditionalresiduals
+#'
+#' @param diagLs diagnostic list
+#' @param plotL aa
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_condresQQplot
+#'
+plot_condresQQplot <-  function(diagLs, plotL = TRUE) {
+df <- cbind.data.frame(diagLs$data,
+conditional.prediction = diagLs$conditional.prediction,
+standardized.conditional.residuals = diagLs$std.conditional.residuals)
+p <- qqplotF(x = df$standardized.conditional.residuals,
+distribution = "norm",
+line.estimate = NULL,
+conf = 0.95) +
+xlab("Standard normal quantiles") +
+ylab("Standardized conditional residual quantiles") +
+ggtitle("Normality of conditional error") +
+theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+if (plotL) plot(p)
+invisible(p)
+}
+#######################################################################################################
+## Within-units covariance structure
+#######################################################################################################
+#' Lesaffre-Veerbeke measure
+#'
+#' @param diagLs diagnostic list
+#' @param plotL aa
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_lesaffreVeerbeke
+#'
+plot_lesaffreVeerbeke <- function(diagLs,  plotL = TRUE) {
+unitDf <- data.frame(unit = names(diagLs$std.lesaffreverbeke.measure),
+lvm = diagLs$std.lesaffreverbeke.measure)
+p <- ggplot(data = unitDf,
+aes(x = unit,
+y = lvm)) +
+geom_point(size = 2) +
+theme(legend.position = "none") +
+xlab("units") +
+ylab("Standardized Lesaffre-Verbeke measure") +
+geom_hline(yintercept = 2 * mean(unitDf$lvm), linetype = "dashed") +
+geom_text(aes(label = unit),
+data = unitDf[unitDf$lvm > 2 * mean(unitDf$lvm), ],
+hjust = 0, vjust = 0) +
+ggtitle("Within-units covariance matrice") +
+theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+if (plotL) plot(p)
+invisible(p)
+}
+##-------------------------------------------------------------------------------------------------##
+## Helpers
+##-------------------------------------------------------------------------------------------------##
+## square root of a matrix
+## From Rocha, Singer and Nobre
+#' square root of a matrix (Rocha)
+#'
+#' Description
+#'
+#' @param mat Matrix
+#' @return A list
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export sqrt.matrix
+sqrt.matrix <- function(mat) {
+mat <- as.matrix(mat)  # new line of code
+singular_dec <- svd(mat, LINPACK = F)
+U <- singular_dec$u
+V <- singular_dec$v
+D <- diag(singular_dec$d)
+sqrtmatrix <- U %*% sqrt(D) %*% t(V)
+}
+## square root of a matrix
+## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6)
+## (for matMN a n x n matrix that symetric and non-negative definite)
+#' square root of a matrix (Rocha)
+#'
+#' @param mat Matrix
+#' @return A list
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export sqrtmF
+sqrtmF <- function(matMN) {
+matMN <- as.matrix(matMN)
+## check that matMN is symetric: if (!all(t(matMN == matMN))) stop("matMN must be symetric.")
+svd_dec <- svd(matMN)
+invisible(svd_dec$u %*% sqrt(diag(svd_dec$d)) %*% t(svd_dec$v))
+}
+## qqplotF
+## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c
+qqplotF <- function(x,
+distribution = "norm", ...,
+line.estimate = NULL,
+conf = 0.95,
+labels = names(x)) {
+q.function <- eval(parse(text = paste0("q", distribution)))
+d.function <- eval(parse(text = paste0("d", distribution)))
+x <- na.omit(x)
+ord <- order(x)
+n <- length(x)
+P <- ppoints(length(x))
+daf <- data.frame(ord.x = x[ord], z = q.function(P, ...))
+if (is.null(line.estimate)) {
+Q.x <- quantile(daf$ord.x, c(0.25, 0.75))
+Q.z <- q.function(c(0.25, 0.75), ...)
+b <- diff(Q.x) / diff(Q.z)
+coef <- c(Q.x[1] - b * Q.z[1], b)
+} else {
+coef <- coef(line.estimate(ord.x ~ z))
+}
+zz <- qnorm(1 - (1 - conf) / 2)
+SE <- (coef[2] / d.function(daf$z, ...)) * sqrt(P * (1 - P) / n)
+fit.value <- coef[1] + coef[2] * daf$z
+daf$upper <- fit.value + zz * SE
+daf$lower <- fit.value - zz * SE
+if (!is.null(labels)) {
+daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord], "")
+}
+p <- ggplot(daf, aes(x = z, y = ord.x)) +
+geom_point() +
+geom_abline(intercept = coef[1], slope = coef[2], col = "red") +
+geom_line(aes(x = z, y = lower), daf,  col = "red", linetype = "dashed") +
+geom_line(aes(x = z, y = upper), daf,  col = "red", linetype = "dashed") +
+xlab("") + ylab("")
+if (!is.null(labels)) p <- p + geom_text(aes(label = label))
+return(p)
+}
+## histogramm
+histF <- function(x, sd_x = NULL, breaks = "scott") {
+if (is.null(sd_x))
+sd_x <- sd(x)
+## Bandwith estimation (default is Scott)
+if (!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd"))
+breaks <- "scott"
+if (breaks %in% c("sqrt", "sturges", "rice")) {
+k <- switch(breaks,
+sqrt = sqrt(length(x)),
+sturges = floor(log2(x)) + 1,
+rice = floor(2 * length(x) ^ (1 / 3))
+)
+bw <- diff(range(x)) / k
+}else{
+bw <- switch(breaks,
+scott = 3.5 * sd_x / length(x) ^ (1 / 3),
+fd = diff(range(x)) / (2 * IQR(x) / length(x) ^ (1 / 3))
+)
+}
+daf <- data.frame(x = x)
+## graph
+return(ggplot(data = daf, aes(x)) +
+geom_histogram(aes(y = ..density..),
+col = "black", fill = "grey", binwidth = bw) +
+geom_density(size = 1.2,
+col = "blue",
+linetype = "blank",
+fill = rgb(0, 0, 1, 0.1)) +
+stat_function(fun = dnorm,
+args = list(mean = 0, sd = sd_x),
+col = "blue", size = 1.2) +
+theme(legend.position = "none") +
+xlab(""))
+}
+plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) {
+## define subscript of the different columns depending if we have only time (ncol(df)=3) or not
+if (ncol(df) > 3) {
+varidx <- 4
+ffidx <- 1
+timidx <- 2
+individx <- 3
+} else {
+varidx <- 3
+ffidx <- 1
+timidx <- 1
+individx <- 2
+}
+nameVar <- colnames(df)[varidx]
+fflab <- colnames(df)[ffidx]
+## Individual time-course
+rawPlot <-
+ggplot(data = df, aes(x = df[[timidx]], y = df[[varidx]], colour = df[[ffidx]], group = df[[individx]])) +
+geom_point() +
+geom_line() +  ggtitle("Individual time-courses (raw data)") +
+ylab(nameVar) +
+xlab(label = colnames(df)[2]) +
+theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+## Boxplot of fixed factor
+bPlot <-
+ggplot(data = df, aes(y = df[[varidx]], x = df[[ffidx]], color = df[[ffidx]])) +
+geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 4) +
+ggtitle(paste("Boxplot by ", fflab, sep = "")) + xlab("") + ylab("") +
+theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold"))
+## Post-hoc estimates
+ddlsm1  <- mfl
+ddlsm1$name <- rownames(ddlsm1)
+ddlsm1$Significance <- rep("NS", nrow(ddlsm1))
+## modif JF pour tenir compte du seuil de pvalues defini par le user
+options("scipen" = 100, "digits" = 5)
+pvalCutof <- as.numeric(pvalCutof)
+bs <- 0.05; bm <- 0.01; bi <- 0.005
+if (pvalCutof > bm) {
+bs <- pvalCutof
+} else
+if (pvalCutof < bm & pvalCutof > bi) {
+bm <- pvalCutof; bs <- pvalCutof
+} else
+if (pvalCutof < bi) {
+bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof
+}
+lbs <- paste("p-value < ", bs, sep = "")
+lbm <- paste("p-value < ", bm, sep = "")
+lbi <- paste("p-value < ", bi, sep = "")
+cols <- paste("p-value < ", bs, sep = "")
+colm <- paste("p-value < ", bm, sep = "")
+coli <- paste("p-value < ", bi, sep = "")
+valcol <- c("grey", "yellow", "orange", "red")
+names(valcol) <- c("NS", lbs, lbm, lbi)
+ddlsm1$Significance[which(ddlsm1$p.value <= bs)] <- lbs
+ddlsm1$Significance[which(ddlsm1$p.value < bs & ddlsm1$p.value >= bm)] <- lbm
+ddlsm1$Significance[which(ddlsm1$p.value < bi)] <- lbi
+ddlsm1$levels <- rownames(ddlsm1)
+ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) {
+strsplit(namC, split = " ", fixed = TRUE)[[1]][1]
+})
+phPlot <-
+ggplot(ddlsm1, aes(x = levels, y = Estimate)) +
+facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") +
+geom_bar(aes(fill = Significance), stat = "identity") +
+theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
+scale_fill_manual(
+values = valcol) +
+geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI), width = 0.25) +
+ggtitle("Post-hoc estimates ") + xlab("") +
+theme(plot.title = element_text(size = rel(1.2), face = "bold"))
+## Final plotting
+grid.arrange(arrangeGrob(rawPlot, bPlot, ncol = 2),
+phPlot, nrow = 2,
+top = textGrob(title, gp = gpar(fontsize = 32, font = 4))
+)
+}

Mercurial > repos > workflow4metabolomics > mixmodel4repeated_measures

comparison diagmfl.R @ 0:a4d89d47646f draft default tip