Mercurial > repos > workflow4metabolomics > mixmodel4repeated_measures
comparison diagmfl.R @ 0:a4d89d47646f draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 8d2ca678d973501b60479a8dc3f212eecd56eab8
| author | workflow4metabolomics |
|---|---|
| date | Mon, 16 May 2022 09:25:01 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a4d89d47646f |
|---|---|
| 1 #' Calcul des grandeurs "diagnostiques" | |
| 2 #' | |
| 3 #' Script adapte de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonctionner | |
| 4 #' avec un modele lmer (et non lme), des sujets avec des identifiants non numeriques, | |
| 5 #' et des observations non ordonnees sujet par sujet (dernier point a verifier.) | |
| 6 #' | |
| 7 #' @detail Les graphiques, les calculs associƩs et les notations utilisees dans le script suivent | |
| 8 #' l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear | |
| 9 #' mixed model assumptions and some remedial measures, International Statistical Review | |
| 10 #' (doi:10.1111/insr.12178) | |
| 11 #' | |
| 12 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
| 13 #' @return A list | |
| 14 #' @author Natacha Lenuzza | |
| 15 #' @examples | |
| 16 #' print("hello !") | |
| 17 #' | |
| 18 #' @export lmer.computeDiag | |
| 19 | |
| 20 | |
| 21 | |
| 22 lmer.computeDiag <- function(mfl) { | |
| 23 | |
| 24 ## Check arguments --------------------------------------------------------- | |
| 25 | |
| 26 if (length(mfl@flist) > 1) | |
| 27 stop("Several 'grouping level' for random effect not implemented yet.") | |
| 28 | |
| 29 | |
| 30 ## extracting information from mfl models ------------------------------------------------------------- | |
| 31 # data | |
| 32 df <- mfl@frame | |
| 33 responseC <- names(df)[1] | |
| 34 unitC <- names(mfl@flist)[1] | |
| 35 # observations | |
| 36 yVn <- df[, responseC] | |
| 37 nobsN <- length(yVn) | |
| 38 # units | |
| 39 idunitVc <- levels(mfl@flist[[1]]) | |
| 40 nunitN <- length(unique(idunitVc)) | |
| 41 #X | |
| 42 xMN <- mfl@pp$X | |
| 43 pN <- ncol(xMN) | |
| 44 #Z | |
| 45 zMN <- t(as.matrix(mfl@pp$Zt)) | |
| 46 # Estimated covariance matrix of random effects (Gam) | |
| 47 aux <- VarCorr(mfl)[[1]] ## assuming only one level of grouping | |
| 48 aux2 <- attr(aux, "stddev") | |
| 49 gMN <- attr(aux, "correlation") * (aux2 %*% t(aux2)) | |
| 50 gammaMN <- as.matrix(kronecker(diag(nunitN), gMN)) | |
| 51 q <- dim(gMN)[1] | |
| 52 # Estimated covariance matrix of conditonal error (homoskedastic conditional independance model) | |
| 53 sigsqN <- attr(VarCorr(mfl), "sc")^2 | |
| 54 rMN <- sigsqN * diag(nobsN) | |
| 55 # Estimated covariance matrix of Y | |
| 56 vMN <- (zMN %*% gammaMN %*% t(zMN)) + rMN | |
| 57 invvMN <- MASS::ginv(vMN) | |
| 58 # H and Q matrix | |
| 59 hMN <- MASS::ginv(t(xMN) %*% invvMN %*% xMN) | |
| 60 qMN <- invvMN - invvMN %*% xMN %*% (hMN) %*% t(xMN) %*% invvMN | |
| 61 # eblue and eblup | |
| 62 eblueVn <- mfl@beta | |
| 63 eblupVn <- gammaMN %*% t(zMN) %*% invvMN %*% (yVn - xMN %*% eblueVn) ## equivalent de ranef(mfl) | |
| 64 rownames(eblupVn) <- colnames(zMN) | |
| 65 ## Calculs of matrices and vectors used in graph diagnosics --------------------------------------------- | |
| 66 ## Marginal and individual predictions, residuals and variances | |
| 67 marpredVn <- xMN %*% eblueVn | |
| 68 marresVn <- yVn - marpredVn | |
| 69 marvarMN <- vMN - xMN %*% hMN %*% t(xMN) | |
| 70 condpredVn <- marpredVn + zMN %*% eblupVn | |
| 71 condresVn <- yVn - condpredVn | |
| 72 condvarMN <- rMN %*% qMN %*% rMN | |
| 73 ## Analysis of marginal and conditional residuals | |
| 74 stmarresVn <- stcondresVn <- rep(0, nobsN) | |
| 75 lesverVn <- rep(0, nunitN) | |
| 76 names(lesverVn) <- idunitVc | |
| 77 for (i in 1:nunitN) { | |
| 78 idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i | |
| 79 miN <- length(idxiVn) | |
| 80 ## standardization of marginal residual | |
| 81 stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn, idxiVn])) %*% marresVn[idxiVn]) | |
| 82 ##Standardized Lessafre and Verbeke's measure | |
| 83 auxMN <- diag(1, ncol = miN, nrow = miN) - stmarresVn[idxiVn] %*% t(stmarresVn[idxiVn]) | |
| 84 lesverVn[i] <- sum(diag(auxMN %*% t(auxMN))) | |
| 85 ## standardization of conditional residual | |
| 86 stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn, idxiVn])) %*% condresVn[idxiVn]) | |
| 87 } | |
| 88 lesverVn <- lesverVn / sum(lesverVn) | |
| 89 ## Least confounded conditional residuals | |
| 90 ## EBLUP analysis (Mahalanobis' distance) | |
| 91 varbMN <- gammaMN %*% t(zMN) %*% qMN %*% zMN %*% gammaMN | |
| 92 mdistVn <- rep(0, nunitN) | |
| 93 qm <- q - 1 | |
| 94 for (j in 1:nunitN) { | |
| 95 gbi <- varbMN[(q * j - qm):(q * j), (q * j - qm):(q * j)] | |
| 96 eblupi <- eblupVn[(q * j - qm):(q * j)] | |
| 97 mdistVn[j] <- t(eblupi) %*% ginv(gbi) %*% eblupi | |
| 98 } | |
| 99 names(mdistVn) <- levels(mfl@flist[[1]]) | |
| 100 ## output ---------------------------------------------- | |
| 101 return(list( | |
| 102 data = df, | |
| 103 q = q, | |
| 104 eblue = eblueVn, | |
| 105 eblup = eblupVn, | |
| 106 marginal.prediction = marpredVn, | |
| 107 conditional.prediction = condpredVn, | |
| 108 std.marginal.residuals = stmarresVn, | |
| 109 std.conditional.residuals = stcondresVn, | |
| 110 mahalanobis.distance = mdistVn, | |
| 111 std.mahalanobis.distance = mdistVn / sum(mdistVn), | |
| 112 std.lesaffreverbeke.measure = lesverVn | |
| 113 )) | |
| 114 } | |
| 115 | |
| 116 | |
| 117 #' Wrapper function for diagnostic plots of 'lmer' linear mixed models | |
| 118 #' | |
| 119 #' (W4M mixmod) | |
| 120 #' | |
| 121 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
| 122 #' @param title aa | |
| 123 #' @param outlier.limit aa | |
| 124 #' @param pvalCutof aa | |
| 125 #' @param resC aa | |
| 126 #' @param uniC aa | |
| 127 #' @param fixC aa | |
| 128 #' @param lest.confounded Not used yet. | |
| 129 #' @return NULL | |
| 130 #' @author Natacha Lenuzza | |
| 131 #' @examples | |
| 132 #' print("hello !") | |
| 133 #' | |
| 134 #' @export diagmflF | |
| 135 | |
| 136 | |
| 137 diagmflF <- function(mfl, | |
| 138 title = "", | |
| 139 outlier.limit = 3, | |
| 140 pvalCutof = 0.05, | |
| 141 resC = "vd", | |
| 142 uniC = "subject", | |
| 143 timC = "time", | |
| 144 fixC = "fixfact", | |
| 145 least.confounded = FALSE) { | |
| 146 ## diagnostics | |
| 147 diagLs <- lmer.computeDiag(mfl) | |
| 148 ## plots | |
| 149 blank <- rectGrob(gp = gpar(col = "white")) | |
| 150 rectspacer <- rectGrob(height = unit(0.1, "npc"), gp = gpar(col = "grey")) | |
| 151 grid.arrange(blank, | |
| 152 plot_linearity(diagLs, hlimitN = outlier.limit, plotL = FALSE, | |
| 153 label_factor = c(uniC, fixC, timC)), | |
| 154 blank, | |
| 155 plot_conditionalResiduals(diagLs, hlimitN = outlier.limit, plotL = FALSE, | |
| 156 label_factor = c(uniC, fixC, timC)), | |
| 157 blank, | |
| 158 plot_condresQQplot(diagLs, plotL = FALSE), | |
| 159 blank, | |
| 160 plot_lesaffreVeerbeke(diagLs, plotL = FALSE), | |
| 161 blank, | |
| 162 plot_randomEffect(mfl, plotL = FALSE)[[1]], | |
| 163 blank, | |
| 164 plot_mahalanobisKhi2(diagLs, plotL = FALSE), | |
| 165 blank, | |
| 166 plot_mahalanobis(diagLs, plotL = FALSE), | |
| 167 blank, | |
| 168 blank, | |
| 169 blank, | |
| 170 top = textGrob(title, gp = gpar(fontsize = 40, font = 4)), | |
| 171 layout_matrix = matrix(c(rep(1, 7), | |
| 172 2, 3, rep(4, 3), 20, 21, | |
| 173 rep(5, 7), | |
| 174 6:12, | |
| 175 rep(13, 7), | |
| 176 14:18, rep(19, 2)), | |
| 177 ncol = 7, nrow = 6, byrow = TRUE), | |
| 178 heights = c(0.1 / 3, 0.3, 0.1 / 3, 0.3, 0.1 / 3, 0.3), | |
| 179 widths = c(0.22, 0.04, 0.22, 0.04, 0.22, 0.04, 0.22)) | |
| 180 } | |
| 181 | |
| 182 ####################################################################################################### | |
| 183 ## Raw data time courses | |
| 184 ####################################################################################################### | |
| 185 | |
| 186 #' Visualization of raw time course | |
| 187 #' | |
| 188 #' Une | |
| 189 #' | |
| 190 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
| 191 #' @param responseC Name of the 'response' variable | |
| 192 #' @param timeC Name of the 'time' variable | |
| 193 #' @param subjectC Name of the 'subject' variable | |
| 194 #' @param fixfactC Name of the 'fixed factor' variable (e.g.treatment) | |
| 195 #' @param offset_subject Boolean indicating if an offset value (subject's mean) should substracted to each data point. Default is FALSE | |
| 196 #' @param plotL Boolean | |
| 197 #' @param colorType One of NA, FIXFACT or SUBJECT | |
| 198 #' @param shapeType One of NA, FIXFACT or SUBJECT | |
| 199 #' @param lineType One of NA, FIXFACT or SUBJECT | |
| 200 #' @return A plot | |
| 201 #' @author Natacha Lenuzza | |
| 202 #' @examples | |
| 203 #' print("hello !") | |
| 204 #' | |
| 205 #' @export plot_timeCourse | |
| 206 | |
| 207 | |
| 208 | |
| 209 plot_timeCourse <- function(mfl, | |
| 210 responseC, | |
| 211 timeC, | |
| 212 subjectC, | |
| 213 fixfactC = NULL, | |
| 214 offset_subject = FALSE, | |
| 215 plotL = TRUE, | |
| 216 colorType = NA, ## subject, fixfact, none or NA | |
| 217 shapeType = NA, ## subject, fixfact, none or NA | |
| 218 lineType = NA ## subject, fixfact, none or NA | |
| 219 ) { | |
| 220 ## Data ----- | |
| 221 if (class(mfl) %in% c("merModLmerTest", "lmerMod", "lmerModLmerTest")) { | |
| 222 DF <- mfl@frame | |
| 223 } else if (class(mfl) == "data.frame") { | |
| 224 DF <- mfl | |
| 225 } else { | |
| 226 stop("'mfl' argument must be a linear mixed effect model or a data frame.") | |
| 227 } | |
| 228 ## Format data ----- | |
| 229 if (is.null(fixfactC)) { | |
| 230 DF <- DF[, c(responseC, timeC, subjectC)] | |
| 231 colnames(DF) <- c("DV", "TIME", "SUBJECT") | |
| 232 meanDF <- aggregate(DF$DV, | |
| 233 by = list(SUBJECT = DF$SUBJECT, | |
| 234 TIME = DF$TIME), | |
| 235 FUN = mean, | |
| 236 na.rm = TRUE) | |
| 237 colnames(meanDF) <- c("SUBJECT", "TIME", "DV") | |
| 238 meanDF$GROUP <- meanDF$SUBJECT | |
| 239 } else{ | |
| 240 DF <- DF[, c(responseC, fixfactC, timeC, subjectC)] | |
| 241 colnames(DF) <- c("DV", "FIXFACT", "TIME", "SUBJECT") | |
| 242 meanDF <- aggregate(DF$DV, | |
| 243 by = list(SUBJECT = DF$SUBJECT, | |
| 244 TIME = DF$TIME, | |
| 245 FIXFACT = DF$FIXFACT), | |
| 246 FUN = mean, | |
| 247 na.rm = TRUE) | |
| 248 colnames(meanDF) <- c("SUBJECT", "TIME", "FIXFACT", "DV") | |
| 249 meanDF$GROUP <- paste(meanDF$SUBJECT, meanDF$FIXFACT, sep = "_") | |
| 250 } | |
| 251 ## Offset ----- | |
| 252 if (offset_subject) { | |
| 253 offsetMN <- aggregate(DF$DV, by = list(DF$SUBJECT), mean, na.rm = TRUE) | |
| 254 offsetVn <- offsetMN[, 2] | |
| 255 names(offsetVn) <- offsetMN[, 1] | |
| 256 rm(offsetMN) | |
| 257 DF$DV <- DF$DV - offsetVn[DF$SUBJECT] | |
| 258 meanDF$DV <- meanDF$DV - offsetVn[as.character(meanDF$SUBJECT)] | |
| 259 } | |
| 260 ## Graphical parameters ----- | |
| 261 xlabC <- timeC | |
| 262 ylabC <- responseC | |
| 263 titC <- "Individual time-courses" | |
| 264 if (offset_subject) { | |
| 265 ylabC <- paste(ylabC, "minus 'within-subject' empirical mean") | |
| 266 titC <- paste(titC, "('within-subject' empirical mean offset)") | |
| 267 } | |
| 268 ## color | |
| 269 if (is.na(colorType)) { ## automaticatical attribution | |
| 270 if (is.null(fixfactC)) { | |
| 271 colorType <- "SUBJECT" | |
| 272 } else { | |
| 273 colorType <- "FIXFACT" | |
| 274 } | |
| 275 colTxt <- paste(", colour=", colorType) | |
| 276 } else if (colorType == "none") { | |
| 277 colTxt <- "" | |
| 278 } else { | |
| 279 colTxt <- paste(", colour=", colorType) | |
| 280 } | |
| 281 ## lineType | |
| 282 if (is.na(lineType)) { ## automaticatical attribution | |
| 283 if (is.null(fixfactC)) { | |
| 284 linTxt <- "" | |
| 285 } else { | |
| 286 linTxt <- paste(", linetype=", | |
| 287 ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) | |
| 288 } | |
| 289 } else if (lineType == "none") { | |
| 290 linTxt <- "" | |
| 291 } else { | |
| 292 linTxt <- paste(", linetype=", lineType) | |
| 293 } | |
| 294 ## shapeType | |
| 295 if (is.na(shapeType)) { ## automaticatical attribution | |
| 296 if (is.null(fixfactC)) { | |
| 297 shaTxt <- "" | |
| 298 } else { | |
| 299 shaTxt <- paste(", shape=", | |
| 300 ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT")) | |
| 301 } | |
| 302 } else if (shapeType == "none") { | |
| 303 shaTxt <- "" | |
| 304 } else { | |
| 305 shaTxt <- paste(", shape=", shapeType) | |
| 306 } | |
| 307 ## aes mapping | |
| 308 txtMap <- paste("aes(x = TIME, y = DV", | |
| 309 colTxt, shaTxt, ")", sep = "") | |
| 310 txtLineMap <- paste("aes(x = TIME, y = DV, group = GROUP ", | |
| 311 colTxt, linTxt, ")", sep = "") | |
| 312 ## plot and output | |
| 313 p <- ggplot(data = DF, mapping = eval(parse(text = txtMap))) + | |
| 314 ggtitle(titC) + | |
| 315 xlab(xlabC) + ylab(ylabC) + | |
| 316 theme(legend.position = "left", | |
| 317 plot.title = element_text(size = rel(1.2), face = "bold")) + | |
| 318 geom_point() + | |
| 319 geom_line(eval(parse(text = txtLineMap)), data = meanDF) + | |
| 320 theme_bw() + | |
| 321 NULL | |
| 322 if (plotL) plot(p) | |
| 323 invisible(p) | |
| 324 } | |
| 325 | |
| 326 ####################################################################################################### | |
| 327 ## Post-hoc estimate | |
| 328 ####################################################################################################### | |
| 329 #' Visualization of fixed effects (post-hoc estimates) | |
| 330 #' | |
| 331 #' Description | |
| 332 #' | |
| 333 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
| 334 #' @param pvalCutof User pvalue cut of | |
| 335 #' @param plotL Boolean | |
| 336 #' @param titC Title of the plot | |
| 337 #' @return A plot | |
| 338 #' @author Natacha Lenuzza | |
| 339 #' @examples | |
| 340 #' print("hello !") | |
| 341 #' | |
| 342 #' @export plot_posthoc | |
| 343 plot_posthoc <- function(mfl, pvalCutof = 0.05, plotL = TRUE, titC = "Post-hoc estimates") { | |
| 344 ddlsm1 <- as.data.frame(difflsmeans(mfl, test.effs = NULL)) | |
| 345 colnames(ddlsm1)[ncol(ddlsm1)] <- "pvalue" | |
| 346 ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) | |
| 347 ## modif JF pour tenir compte du seuil de pvalues defini par le user | |
| 348 ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof)] <- paste("p-value < ", pvalCutof, sep = "") | |
| 349 ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 5)] <- paste("p-value < ", pvalCutof / 5, sep = "") | |
| 350 ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 10)] <- paste("p-value < ", pvalCutof / 10, sep = "") | |
| 351 ddlsm1$levels <- rownames(ddlsm1) | |
| 352 ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { | |
| 353 strsplit(namC, split = " ", fixed = TRUE)[[1]][1] | |
| 354 }) | |
| 355 colValue <- c("grey", "yellow", "orange", "red") | |
| 356 names(colValue) <- c("NS", | |
| 357 paste("p-value < ", pvalCutof, sep = ""), | |
| 358 paste("p-value < ", pvalCutof / 5, sep = ""), | |
| 359 paste("p-value < ", pvalCutof / 10, sep = "")) | |
| 360 p <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) + | |
| 361 facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + | |
| 362 geom_bar(aes(fill = Significance), stat = "identity") + | |
| 363 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
| 364 scale_fill_manual(values = colValue) + | |
| 365 geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.25) + | |
| 366 ggtitle(titC) + xlab("") + | |
| 367 NULL | |
| 368 if (plotL) plot(p) | |
| 369 invisible(p) | |
| 370 } | |
| 371 | |
| 372 ####################################################################################################### | |
| 373 ## Visualisation des effets alƩatoires | |
| 374 ####################################################################################################### | |
| 375 #' Visualization of random effects | |
| 376 #' | |
| 377 #' Equivalent of dotplot(ranef) | |
| 378 #' | |
| 379 #' @param mfl A linear mixed model fitted via lmer or a data frame containing data | |
| 380 #' @param plotL Logical | |
| 381 #' @return A plot | |
| 382 #' @author Natacha Lenuzza | |
| 383 #' @examples | |
| 384 #' print("hello !") | |
| 385 #' | |
| 386 #' @export plot_randomEffect | |
| 387 | |
| 388 | |
| 389 plot_randomEffect <- function(mfl, plotL = TRUE) { | |
| 390 ## Estimation et format des effets alƩatoires | |
| 391 randomEffect <- ranef(mfl, condVar = TRUE) | |
| 392 DF <- data.frame(randomEffect = rep(names(randomEffect), | |
| 393 times = sapply(seq_len(length(randomEffect)), | |
| 394 function(lsi) { | |
| 395 return(length(unlist(randomEffect[[lsi]])))}))) | |
| 396 DF$condVar <- DF$estimate <- DF$x2 <- DF$x1 <- rep(NA, nrow(DF)) | |
| 397 for (rafC in names(randomEffect)) { | |
| 398 eff <- randomEffect[[rafC]] | |
| 399 DF$x1[which(DF$randomEffect == rafC)] <- rep(colnames(eff), each = nrow(eff)) | |
| 400 DF$x2[which(DF$randomEffect == rafC)] <- rep(rownames(eff), ncol(eff)) | |
| 401 DF$estimate[which(DF$randomEffect == rafC)] <- unlist(eff) | |
| 402 condvar <- attr(randomEffect[[rafC]], "postVar") | |
| 403 se <- NULL | |
| 404 for (coli in seq_len(ncol(eff))) { | |
| 405 se <- c(se, | |
| 406 sapply(seq_len(nrow(eff)), | |
| 407 function(i) { | |
| 408 return(condvar[coli, coli, i])})) | |
| 409 } | |
| 410 DF$condVar[which(DF$randomEffect == rafC)] <- se | |
| 411 } | |
| 412 DF$se <- sqrt(DF$condVar) | |
| 413 DF$lower <- DF$estimate - 1.96 * DF$se | |
| 414 DF$upper <- DF$estimate + 1.96 * DF$se | |
| 415 ## Plot | |
| 416 plotLs <- vector("list", length(randomEffect)) | |
| 417 names(plotLs) <- names(randomEffect) | |
| 418 for (pi in seq_len(length(plotLs))) { | |
| 419 subDF <- DF[DF$randomEffect == names(plotLs)[pi], ] | |
| 420 subDF <- subDF[order(subDF$x1, subDF$estimate, decreasing = FALSE), ] | |
| 421 p <- ggplot(data = subDF, | |
| 422 mapping = aes(x = estimate, y = reorder(x2, estimate)) | |
| 423 ) + | |
| 424 geom_point(size = 3) + | |
| 425 geom_segment(aes(xend = lower, yend = x2)) + | |
| 426 geom_segment(aes(xend = upper, yend = x2)) + | |
| 427 facet_wrap(~x1, ncol = length(unique(subDF$x1))) + | |
| 428 ylab("") + xlab("") + | |
| 429 ggtitle(paste("Random effect - ", names(plotLs)[pi], sep = "")) + | |
| 430 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + | |
| 431 geom_vline(xintercept = 0, linetype = "dashed") + | |
| 432 theme_bw() | |
| 433 plotLs[[pi]] <- p | |
| 434 if (plotL) plot(p) | |
| 435 } | |
| 436 invisible(plotLs) | |
| 437 } | |
| 438 | |
| 439 ####################################################################################################### | |
| 440 ## LinearitƩ des effets et outlying observations | |
| 441 ####################################################################################################### | |
| 442 #' Linarity of the fixed effect with regard to the continuous time | |
| 443 #' | |
| 444 #' @param diagLs diagnostic list | |
| 445 #' @param hlimitN Limit value for outliers (e.g.2 or 3) | |
| 446 #' @param plotL Boolean | |
| 447 #' @param label_factor Column of observation names used to label outlying values | |
| 448 #' @return A plot | |
| 449 #' @author Natacha Lenuzza | |
| 450 #' @examples | |
| 451 #' print("hello !") | |
| 452 #' | |
| 453 #' @export plot_linearity | |
| 454 #' | |
| 455 | |
| 456 plot_linearity <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { | |
| 457 df <- cbind.data.frame(diagLs$data, | |
| 458 marginal.prediction = diagLs$marginal.prediction, | |
| 459 standardized.marginal.residuals = diagLs$std.marginal.residuals) | |
| 460 # outlier annotation | |
| 461 df$outliers <- rep("", nrow(df)) | |
| 462 outidx <- which(abs(df$standardized.marginal.residuals) > hlimitN) | |
| 463 df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] | |
| 464 if (length(label_factor) >= 1) { | |
| 465 df[outidx, "outliers"] <- paste(df[outidx, "outliers"], | |
| 466 df[outidx, label_factor[1]], | |
| 467 sep = "_") | |
| 468 if (length(label_factor) > 1) { | |
| 469 for (li in 2:length(label_factor)) { | |
| 470 df[outidx, "outliers"] <- paste(df[outidx, "outliers"], | |
| 471 df[outidx, label_factor[li]], | |
| 472 sep = ".") | |
| 473 } | |
| 474 } | |
| 475 } | |
| 476 p <- ggplot(data = df, | |
| 477 aes(x = marginal.prediction, | |
| 478 y = standardized.marginal.residuals)) + | |
| 479 geom_point(size = 2) + | |
| 480 geom_hline(yintercept = 0, col = "grey") + | |
| 481 geom_smooth(aes(x = marginal.prediction, | |
| 482 y = standardized.marginal.residuals), data = df, se = FALSE, col = "blue", method = "loess") + | |
| 483 ggtitle("Linearity of effects/outlying obervations") + | |
| 484 xlab("Marginal predictions") + | |
| 485 ylab("Standardized marginal residuals") + | |
| 486 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + | |
| 487 geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + | |
| 488 geom_text(aes(label = outliers), hjust = 0, vjust = 0) | |
| 489 if (plotL) plot(p) | |
| 490 invisible(p) | |
| 491 } | |
| 492 | |
| 493 | |
| 494 ####################################################################################################### | |
| 495 ## EBLUP | |
| 496 ####################################################################################################### | |
| 497 | |
| 498 | |
| 499 #' Mahalanobis distance | |
| 500 #' | |
| 501 #' @param diagLs diagnostic list | |
| 502 #' @param plotL Boolean | |
| 503 #' @return A plot | |
| 504 #' @author Natacha Lenuzza | |
| 505 #' @examples | |
| 506 #' print("hello !") | |
| 507 #' | |
| 508 #' @export plot_mahalanobis | |
| 509 #' | |
| 510 | |
| 511 | |
| 512 plot_mahalanobis <- function(diagLs, plotL = TRUE) { | |
| 513 unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), | |
| 514 mal = diagLs$std.mahalanobis.distance) | |
| 515 ## Outlying subjects | |
| 516 p <- | |
| 517 ggplot(aes(y = mal, x = unit), data = unitDf) + | |
| 518 geom_point(size = 3) + | |
| 519 ylab("Standardized Mahalanobis distance") + | |
| 520 geom_vline(xintercept = 0, linetype = "dashed") + | |
| 521 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + | |
| 522 geom_hline(yintercept = 2 * mean(unitDf$mal), linetype = "dashed") + | |
| 523 geom_text(aes(label = unit), | |
| 524 data = unitDf[unitDf$mal > 2 * mean(unitDf$mal), ], | |
| 525 hjust = 1, vjust = 0) + | |
| 526 ggtitle("Outlying unit") + | |
| 527 xlab("unit") | |
| 528 if (plotL) plot(p) | |
| 529 invisible(p) | |
| 530 } | |
| 531 | |
| 532 | |
| 533 | |
| 534 | |
| 535 | |
| 536 | |
| 537 #' Mahalanobis distance (Chi2) | |
| 538 #' | |
| 539 #' @param diagLs diagnostic list | |
| 540 #' @param plotL aa | |
| 541 #' @return A plot | |
| 542 #' @author Natacha Lenuzza | |
| 543 #' @examples | |
| 544 #' print("hello !") | |
| 545 #' | |
| 546 #' @export plot_mahalanobisKhi2 | |
| 547 #' | |
| 548 | |
| 549 | |
| 550 plot_mahalanobisKhi2 <- function(diagLs, plotL = TRUE) { | |
| 551 unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance), | |
| 552 mal = diagLs$mahalanobis.distance) | |
| 553 p <- qqplotF(x = unitDf$mal, | |
| 554 distribution = "chisq", | |
| 555 df = diagLs$q, | |
| 556 line.estimate = NULL, | |
| 557 conf = 0.95) + | |
| 558 xlab("Chi-squared quantiles") + | |
| 559 ylab("Mahalanobis distance") + | |
| 560 ggtitle("Normality of random effect") + | |
| 561 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) | |
| 562 if (plotL) plot(p) | |
| 563 invisible(p) | |
| 564 } | |
| 565 | |
| 566 | |
| 567 | |
| 568 | |
| 569 | |
| 570 | |
| 571 | |
| 572 ####################################################################################################### | |
| 573 ## Residus conditionels | |
| 574 ####################################################################################################### | |
| 575 | |
| 576 ## Presence of outlying observations and homoscedacity of residuals | |
| 577 | |
| 578 #' Homoskedacity of conditionalresiduals | |
| 579 #' | |
| 580 #' @param diagLs diagnostic list | |
| 581 #' @param hlimitN Limit value for outliers (e.g.2 or 3) | |
| 582 #' @param plotL Boolean | |
| 583 #' @param label_factor Column of observation names used to label outlying values | |
| 584 #' @return A plot | |
| 585 #' @author Natacha Lenuzza | |
| 586 #' @examples | |
| 587 #' print("hello !") | |
| 588 #' | |
| 589 #' @export plot_conditionalResiduals | |
| 590 #' | |
| 591 | |
| 592 | |
| 593 plot_conditionalResiduals <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) { | |
| 594 df <- cbind.data.frame(diagLs$data, | |
| 595 conditional.prediction = diagLs$conditional.prediction, | |
| 596 standardized.conditional.residuals = diagLs$std.conditional.residuals) | |
| 597 # outlier annotation | |
| 598 df$outliers <- rep("", nrow(df)) | |
| 599 outidx <- which(abs(df$standardized.conditional.residuals) > hlimitN) | |
| 600 df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx] | |
| 601 if (length(label_factor) >= 1) { | |
| 602 df[outidx, "outliers"] <- paste(df[outidx, "outliers"], | |
| 603 df[outidx, label_factor[1]], | |
| 604 sep = "_") | |
| 605 if (length(label_factor) > 1) { | |
| 606 for (li in 2:length(label_factor)) { | |
| 607 df[outidx, "outliers"] <- paste(df[outidx, "outliers"], | |
| 608 df[outidx, label_factor[li]], | |
| 609 sep = ".") | |
| 610 } | |
| 611 } | |
| 612 } | |
| 613 p <- ggplot(data = df, | |
| 614 aes(x = conditional.prediction, | |
| 615 y = standardized.conditional.residuals)) + | |
| 616 geom_point(size = 2) + | |
| 617 geom_hline(yintercept = 0, col = "grey") + | |
| 618 geom_smooth(aes(x = conditional.prediction, | |
| 619 y = standardized.conditional.residuals), | |
| 620 data = df, se = FALSE, col = "blue", method = "loess") + | |
| 621 ggtitle("Homoscedasticity of conditional residuals/outlying observations") + | |
| 622 xlab("Individual predictions") + | |
| 623 ylab("Standardized conditional residuals") + | |
| 624 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) + | |
| 625 geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") + | |
| 626 geom_text(aes(label = outliers), hjust = 0, vjust = 0) | |
| 627 if (plotL) plot(p) | |
| 628 invisible(p) | |
| 629 } | |
| 630 | |
| 631 | |
| 632 | |
| 633 | |
| 634 #' Normality of conditionalresiduals | |
| 635 #' | |
| 636 #' @param diagLs diagnostic list | |
| 637 #' @param plotL aa | |
| 638 #' @return A plot | |
| 639 #' @author Natacha Lenuzza | |
| 640 #' @examples | |
| 641 #' print("hello !") | |
| 642 #' | |
| 643 #' @export plot_condresQQplot | |
| 644 #' | |
| 645 | |
| 646 | |
| 647 plot_condresQQplot <- function(diagLs, plotL = TRUE) { | |
| 648 df <- cbind.data.frame(diagLs$data, | |
| 649 conditional.prediction = diagLs$conditional.prediction, | |
| 650 standardized.conditional.residuals = diagLs$std.conditional.residuals) | |
| 651 p <- qqplotF(x = df$standardized.conditional.residuals, | |
| 652 distribution = "norm", | |
| 653 line.estimate = NULL, | |
| 654 conf = 0.95) + | |
| 655 xlab("Standard normal quantiles") + | |
| 656 ylab("Standardized conditional residual quantiles") + | |
| 657 ggtitle("Normality of conditional error") + | |
| 658 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) | |
| 659 if (plotL) plot(p) | |
| 660 invisible(p) | |
| 661 } | |
| 662 | |
| 663 | |
| 664 | |
| 665 | |
| 666 | |
| 667 ####################################################################################################### | |
| 668 ## Within-units covariance structure | |
| 669 ####################################################################################################### | |
| 670 | |
| 671 | |
| 672 #' Lesaffre-Veerbeke measure | |
| 673 #' | |
| 674 #' @param diagLs diagnostic list | |
| 675 #' @param plotL aa | |
| 676 #' @return A plot | |
| 677 #' @author Natacha Lenuzza | |
| 678 #' @examples | |
| 679 #' print("hello !") | |
| 680 #' | |
| 681 #' @export plot_lesaffreVeerbeke | |
| 682 #' | |
| 683 | |
| 684 | |
| 685 plot_lesaffreVeerbeke <- function(diagLs, plotL = TRUE) { | |
| 686 unitDf <- data.frame(unit = names(diagLs$std.lesaffreverbeke.measure), | |
| 687 lvm = diagLs$std.lesaffreverbeke.measure) | |
| 688 p <- ggplot(data = unitDf, | |
| 689 aes(x = unit, | |
| 690 y = lvm)) + | |
| 691 geom_point(size = 2) + | |
| 692 theme(legend.position = "none") + | |
| 693 xlab("units") + | |
| 694 ylab("Standardized Lesaffre-Verbeke measure") + | |
| 695 geom_hline(yintercept = 2 * mean(unitDf$lvm), linetype = "dashed") + | |
| 696 geom_text(aes(label = unit), | |
| 697 data = unitDf[unitDf$lvm > 2 * mean(unitDf$lvm), ], | |
| 698 hjust = 0, vjust = 0) + | |
| 699 ggtitle("Within-units covariance matrice") + | |
| 700 theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) | |
| 701 if (plotL) plot(p) | |
| 702 invisible(p) | |
| 703 } | |
| 704 | |
| 705 ##-------------------------------------------------------------------------------------------------## | |
| 706 ## Helpers | |
| 707 ##-------------------------------------------------------------------------------------------------## | |
| 708 | |
| 709 | |
| 710 ## square root of a matrix | |
| 711 ## From Rocha, Singer and Nobre | |
| 712 | |
| 713 #' square root of a matrix (Rocha) | |
| 714 #' | |
| 715 #' Description | |
| 716 #' | |
| 717 #' @param mat Matrix | |
| 718 #' @return A list | |
| 719 #' @author Natacha Lenuzza | |
| 720 #' @examples | |
| 721 #' print("hello !") | |
| 722 #' | |
| 723 #' @export sqrt.matrix | |
| 724 | |
| 725 sqrt.matrix <- function(mat) { | |
| 726 mat <- as.matrix(mat) # new line of code | |
| 727 singular_dec <- svd(mat, LINPACK = F) | |
| 728 U <- singular_dec$u | |
| 729 V <- singular_dec$v | |
| 730 D <- diag(singular_dec$d) | |
| 731 sqrtmatrix <- U %*% sqrt(D) %*% t(V) | |
| 732 } | |
| 733 | |
| 734 | |
| 735 ## square root of a matrix | |
| 736 ## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6) | |
| 737 ## (for matMN a n x n matrix that symetric and non-negative definite) | |
| 738 | |
| 739 #' square root of a matrix (Rocha) | |
| 740 #' | |
| 741 #' @param mat Matrix | |
| 742 #' @return A list | |
| 743 #' @author Natacha Lenuzza | |
| 744 #' @examples | |
| 745 #' print("hello !") | |
| 746 #' | |
| 747 #' @export sqrtmF | |
| 748 | |
| 749 sqrtmF <- function(matMN) { | |
| 750 matMN <- as.matrix(matMN) | |
| 751 ## check that matMN is symetric: if (!all(t(matMN == matMN))) stop("matMN must be symetric.") | |
| 752 svd_dec <- svd(matMN) | |
| 753 invisible(svd_dec$u %*% sqrt(diag(svd_dec$d)) %*% t(svd_dec$v)) | |
| 754 } | |
| 755 | |
| 756 | |
| 757 ## qqplotF | |
| 758 ## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c | |
| 759 | |
| 760 qqplotF <- function(x, | |
| 761 distribution = "norm", ..., | |
| 762 line.estimate = NULL, | |
| 763 conf = 0.95, | |
| 764 labels = names(x)) { | |
| 765 q.function <- eval(parse(text = paste0("q", distribution))) | |
| 766 d.function <- eval(parse(text = paste0("d", distribution))) | |
| 767 x <- na.omit(x) | |
| 768 ord <- order(x) | |
| 769 n <- length(x) | |
| 770 P <- ppoints(length(x)) | |
| 771 daf <- data.frame(ord.x = x[ord], z = q.function(P, ...)) | |
| 772 if (is.null(line.estimate)) { | |
| 773 Q.x <- quantile(daf$ord.x, c(0.25, 0.75)) | |
| 774 Q.z <- q.function(c(0.25, 0.75), ...) | |
| 775 b <- diff(Q.x) / diff(Q.z) | |
| 776 coef <- c(Q.x[1] - b * Q.z[1], b) | |
| 777 } else { | |
| 778 coef <- coef(line.estimate(ord.x ~ z)) | |
| 779 } | |
| 780 zz <- qnorm(1 - (1 - conf) / 2) | |
| 781 SE <- (coef[2] / d.function(daf$z, ...)) * sqrt(P * (1 - P) / n) | |
| 782 fit.value <- coef[1] + coef[2] * daf$z | |
| 783 daf$upper <- fit.value + zz * SE | |
| 784 daf$lower <- fit.value - zz * SE | |
| 785 if (!is.null(labels)) { | |
| 786 daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord], "") | |
| 787 } | |
| 788 p <- ggplot(daf, aes(x = z, y = ord.x)) + | |
| 789 geom_point() + | |
| 790 geom_abline(intercept = coef[1], slope = coef[2], col = "red") + | |
| 791 geom_line(aes(x = z, y = lower), daf, col = "red", linetype = "dashed") + | |
| 792 geom_line(aes(x = z, y = upper), daf, col = "red", linetype = "dashed") + | |
| 793 xlab("") + ylab("") | |
| 794 if (!is.null(labels)) p <- p + geom_text(aes(label = label)) | |
| 795 return(p) | |
| 796 } | |
| 797 | |
| 798 | |
| 799 ## histogramm | |
| 800 histF <- function(x, sd_x = NULL, breaks = "scott") { | |
| 801 if (is.null(sd_x)) | |
| 802 sd_x <- sd(x) | |
| 803 ## Bandwith estimation (default is Scott) | |
| 804 if (!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd")) | |
| 805 breaks <- "scott" | |
| 806 if (breaks %in% c("sqrt", "sturges", "rice")) { | |
| 807 k <- switch(breaks, | |
| 808 sqrt = sqrt(length(x)), | |
| 809 sturges = floor(log2(x)) + 1, | |
| 810 rice = floor(2 * length(x) ^ (1 / 3)) | |
| 811 ) | |
| 812 bw <- diff(range(x)) / k | |
| 813 }else{ | |
| 814 bw <- switch(breaks, | |
| 815 scott = 3.5 * sd_x / length(x) ^ (1 / 3), | |
| 816 fd = diff(range(x)) / (2 * IQR(x) / length(x) ^ (1 / 3)) | |
| 817 ) | |
| 818 } | |
| 819 | |
| 820 | |
| 821 daf <- data.frame(x = x) | |
| 822 ## graph | |
| 823 return(ggplot(data = daf, aes(x)) + | |
| 824 geom_histogram(aes(y = ..density..), | |
| 825 col = "black", fill = "grey", binwidth = bw) + | |
| 826 geom_density(size = 1.2, | |
| 827 col = "blue", | |
| 828 linetype = "blank", | |
| 829 fill = rgb(0, 0, 1, 0.1)) + | |
| 830 stat_function(fun = dnorm, | |
| 831 args = list(mean = 0, sd = sd_x), | |
| 832 col = "blue", size = 1.2) + | |
| 833 theme(legend.position = "none") + | |
| 834 xlab("")) | |
| 835 } | |
| 836 | |
| 837 | |
| 838 plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) { | |
| 839 | |
| 840 ## define subscript of the different columns depending if we have only time (ncol(df)=3) or not | |
| 841 if (ncol(df) > 3) { | |
| 842 varidx <- 4 | |
| 843 ffidx <- 1 | |
| 844 timidx <- 2 | |
| 845 individx <- 3 | |
| 846 } else { | |
| 847 varidx <- 3 | |
| 848 ffidx <- 1 | |
| 849 timidx <- 1 | |
| 850 individx <- 2 | |
| 851 } | |
| 852 nameVar <- colnames(df)[varidx] | |
| 853 fflab <- colnames(df)[ffidx] | |
| 854 ## Individual time-course | |
| 855 rawPlot <- | |
| 856 ggplot(data = df, aes(x = df[[timidx]], y = df[[varidx]], colour = df[[ffidx]], group = df[[individx]])) + | |
| 857 geom_point() + | |
| 858 geom_line() + ggtitle("Individual time-courses (raw data)") + | |
| 859 ylab(nameVar) + | |
| 860 xlab(label = colnames(df)[2]) + | |
| 861 theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) | |
| 862 ## Boxplot of fixed factor | |
| 863 bPlot <- | |
| 864 ggplot(data = df, aes(y = df[[varidx]], x = df[[ffidx]], color = df[[ffidx]])) + | |
| 865 geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 4) + | |
| 866 ggtitle(paste("Boxplot by ", fflab, sep = "")) + xlab("") + ylab("") + | |
| 867 theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold")) | |
| 868 ## Post-hoc estimates | |
| 869 ddlsm1 <- mfl | |
| 870 ddlsm1$name <- rownames(ddlsm1) | |
| 871 ddlsm1$Significance <- rep("NS", nrow(ddlsm1)) | |
| 872 ## modif JF pour tenir compte du seuil de pvalues defini par le user | |
| 873 options("scipen" = 100, "digits" = 5) | |
| 874 pvalCutof <- as.numeric(pvalCutof) | |
| 875 bs <- 0.05; bm <- 0.01; bi <- 0.005 | |
| 876 if (pvalCutof > bm) { | |
| 877 bs <- pvalCutof | |
| 878 } else | |
| 879 if (pvalCutof < bm & pvalCutof > bi) { | |
| 880 bm <- pvalCutof; bs <- pvalCutof | |
| 881 } else | |
| 882 if (pvalCutof < bi) { | |
| 883 bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof | |
| 884 } | |
| 885 lbs <- paste("p-value < ", bs, sep = "") | |
| 886 lbm <- paste("p-value < ", bm, sep = "") | |
| 887 lbi <- paste("p-value < ", bi, sep = "") | |
| 888 cols <- paste("p-value < ", bs, sep = "") | |
| 889 colm <- paste("p-value < ", bm, sep = "") | |
| 890 coli <- paste("p-value < ", bi, sep = "") | |
| 891 valcol <- c("grey", "yellow", "orange", "red") | |
| 892 names(valcol) <- c("NS", lbs, lbm, lbi) | |
| 893 ddlsm1$Significance[which(ddlsm1$p.value <= bs)] <- lbs | |
| 894 ddlsm1$Significance[which(ddlsm1$p.value < bs & ddlsm1$p.value >= bm)] <- lbm | |
| 895 ddlsm1$Significance[which(ddlsm1$p.value < bi)] <- lbi | |
| 896 ddlsm1$levels <- rownames(ddlsm1) | |
| 897 ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) { | |
| 898 strsplit(namC, split = " ", fixed = TRUE)[[1]][1] | |
| 899 }) | |
| 900 | |
| 901 phPlot <- | |
| 902 ggplot(ddlsm1, aes(x = levels, y = Estimate)) + | |
| 903 facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") + | |
| 904 geom_bar(aes(fill = Significance), stat = "identity") + | |
| 905 theme(axis.text.x = element_text(angle = 90, hjust = 1)) + | |
| 906 scale_fill_manual( | |
| 907 values = valcol) + | |
| 908 geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI), width = 0.25) + | |
| 909 ggtitle("Post-hoc estimates ") + xlab("") + | |
| 910 theme(plot.title = element_text(size = rel(1.2), face = "bold")) | |
| 911 | |
| 912 ## Final plotting | |
| 913 grid.arrange(arrangeGrob(rawPlot, bPlot, ncol = 2), | |
| 914 phPlot, nrow = 2, | |
| 915 top = textGrob(title, gp = gpar(fontsize = 32, font = 4)) | |
| 916 ) | |
| 917 | |
| 918 } |
