changeset 0:a4d89d47646f draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/tools-metabolomics commit 8d2ca678d973501b60479a8dc3f212eecd56eab8
author workflow4metabolomics
date Mon, 16 May 2022 09:25:01 +0000
parents
children
files diagmfl.R mixmodel.xml mixmodel_script.R mixmodel_wrapper.R test-data/OneFactor_dataMatrix.tsv test-data/OneFactor_sampleMetadata.tsv test-data/OneFactor_variableMetadata.tsv test-data/TwoFactor_dataMatrix.txt test-data/TwoFactor_sampleMetadata.txt test-data/TwoFactor_variableMetadata.txt test-data/mixmodel_TwoFactor_variableMetadata.txt
diffstat 11 files changed, 2016 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/diagmfl.R	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,918 @@
+#' Calcul des grandeurs "diagnostiques"
+#'
+#'  Script adapte de http://www.ime.unicamp.br/~cnaber/residdiag_nlme_v22.R pour fonctionner
+#'  avec un modele lmer (et non lme), des sujets avec des identifiants non numeriques,
+#'  et des observations non ordonnees sujet par sujet (dernier point a verifier.)
+#'
+#'  @detail Les graphiques, les calculs associés et les notations utilisees dans le script suivent
+#'   l'article de Singer et al (2016) Graphical Tools for detedcting departures from linear
+#'    mixed model assumptions and some remedial measures, International Statistical Review
+#'       (doi:10.1111/insr.12178)
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @return A list
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export lmer.computeDiag
+
+
+
+lmer.computeDiag <- function(mfl) {
+
+  ## Check arguments ---------------------------------------------------------
+
+  if (length(mfl@flist) > 1)
+    stop("Several 'grouping level' for random effect not implemented yet.")
+
+
+  ## extracting information from mfl models -------------------------------------------------------------
+  # data
+  df <- mfl@frame
+  responseC <- names(df)[1]
+  unitC <- names(mfl@flist)[1]
+  # observations
+  yVn <- df[, responseC]
+  nobsN <- length(yVn)
+  # units
+  idunitVc <- levels(mfl@flist[[1]])
+  nunitN <- length(unique(idunitVc))
+  #X
+  xMN <- mfl@pp$X
+  pN <- ncol(xMN)
+  #Z
+  zMN <- t(as.matrix(mfl@pp$Zt))
+  # Estimated covariance matrix of random effects (Gam)
+  aux <- VarCorr(mfl)[[1]] ## assuming only one level of grouping
+  aux2 <- attr(aux, "stddev")
+  gMN <- attr(aux, "correlation") * (aux2 %*% t(aux2))
+  gammaMN <- as.matrix(kronecker(diag(nunitN), gMN))
+  q <- dim(gMN)[1]
+  # Estimated covariance matrix of conditonal error (homoskedastic conditional independance model)
+  sigsqN <- attr(VarCorr(mfl), "sc")^2
+  rMN <- sigsqN * diag(nobsN)
+  # Estimated covariance matrix of Y
+  vMN <- (zMN %*% gammaMN %*% t(zMN)) + rMN
+  invvMN <- MASS::ginv(vMN)
+  # H and Q matrix
+  hMN <- MASS::ginv(t(xMN) %*% invvMN %*% xMN)
+  qMN <- invvMN - invvMN %*% xMN %*% (hMN) %*% t(xMN) %*% invvMN
+  # eblue and eblup
+  eblueVn <- mfl@beta
+  eblupVn <- gammaMN %*% t(zMN) %*% invvMN %*% (yVn - xMN %*% eblueVn) ## equivalent de ranef(mfl)
+  rownames(eblupVn) <- colnames(zMN)
+  ##  Calculs of matrices and vectors used in graph diagnosics ---------------------------------------------
+  ## Marginal and individual predictions, residuals and variances
+  marpredVn <- xMN %*% eblueVn
+  marresVn <- yVn - marpredVn
+  marvarMN <- vMN - xMN %*% hMN %*% t(xMN)
+  condpredVn <- marpredVn + zMN %*% eblupVn
+  condresVn <- yVn - condpredVn
+  condvarMN <- rMN %*% qMN %*% rMN
+  ## Analysis of marginal and conditional residuals
+  stmarresVn <- stcondresVn <- rep(0, nobsN)
+  lesverVn <- rep(0, nunitN)
+  names(lesverVn) <- idunitVc
+  for (i in 1:nunitN) {
+      idxiVn <- which(df[, unitC] == idunitVc[i]) ## position des observations du sujet i
+    miN <- length(idxiVn)
+      ## standardization of marginal residual
+    stmarresVn[idxiVn] <- as.vector(solve(sqrtmF(marvarMN[idxiVn, idxiVn])) %*% marresVn[idxiVn])
+      ##Standardized Lessafre and Verbeke's measure
+    auxMN <- diag(1, ncol = miN, nrow = miN) - stmarresVn[idxiVn] %*% t(stmarresVn[idxiVn])
+    lesverVn[i] <- sum(diag(auxMN %*% t(auxMN)))
+      ## standardization of conditional residual
+    stcondresVn[idxiVn] <- as.vector(solve(sqrtmF(condvarMN[idxiVn, idxiVn])) %*% condresVn[idxiVn])
+  }
+  lesverVn <- lesverVn / sum(lesverVn)
+  ## Least confounded conditional residuals
+  ## EBLUP analysis (Mahalanobis' distance)
+  varbMN <- gammaMN %*% t(zMN) %*% qMN %*% zMN %*% gammaMN
+  mdistVn <- rep(0, nunitN)
+  qm <- q - 1
+  for (j in 1:nunitN) {
+    gbi <- varbMN[(q * j - qm):(q * j), (q * j - qm):(q * j)]
+    eblupi <- eblupVn[(q * j - qm):(q * j)]
+    mdistVn[j] <- t(eblupi) %*% ginv(gbi) %*% eblupi
+  }
+  names(mdistVn) <- levels(mfl@flist[[1]])
+  ## output ----------------------------------------------
+  return(list(
+    data = df,
+    q = q,
+    eblue = eblueVn,
+    eblup = eblupVn,
+    marginal.prediction = marpredVn,
+    conditional.prediction = condpredVn,
+    std.marginal.residuals = stmarresVn,
+    std.conditional.residuals = stcondresVn,
+    mahalanobis.distance = mdistVn,
+    std.mahalanobis.distance = mdistVn / sum(mdistVn),
+    std.lesaffreverbeke.measure = lesverVn
+  ))
+}
+
+
+#' Wrapper function for diagnostic plots of 'lmer' linear mixed models
+#'
+#' (W4M mixmod)
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param title aa
+#' @param outlier.limit aa
+#' @param pvalCutof aa
+#' @param resC aa
+#' @param uniC aa
+#' @param fixC aa
+#' @param lest.confounded Not used yet.
+#' @return NULL
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export diagmflF
+
+
+diagmflF <- function(mfl,
+                     title = "",
+                     outlier.limit = 3,
+                     pvalCutof = 0.05,
+                     resC = "vd",
+                     uniC = "subject",
+                     timC = "time",
+                     fixC = "fixfact",
+                     least.confounded = FALSE) {
+  ## diagnostics
+  diagLs <- lmer.computeDiag(mfl)
+  ## plots
+  blank <- rectGrob(gp = gpar(col = "white"))
+  rectspacer <- rectGrob(height = unit(0.1, "npc"), gp = gpar(col = "grey"))
+  grid.arrange(blank,
+               plot_linearity(diagLs, hlimitN = outlier.limit, plotL = FALSE,
+                              label_factor = c(uniC, fixC, timC)),
+               blank,
+               plot_conditionalResiduals(diagLs, hlimitN = outlier.limit, plotL = FALSE,
+                                         label_factor = c(uniC, fixC, timC)),
+               blank,
+               plot_condresQQplot(diagLs,  plotL = FALSE),
+               blank,
+               plot_lesaffreVeerbeke(diagLs,  plotL = FALSE),
+               blank,
+               plot_randomEffect(mfl, plotL = FALSE)[[1]],
+               blank,
+               plot_mahalanobisKhi2(diagLs,  plotL = FALSE),
+               blank,
+               plot_mahalanobis(diagLs,  plotL = FALSE),
+               blank,
+               blank,
+               blank,
+               top = textGrob(title, gp = gpar(fontsize = 40, font = 4)),
+               layout_matrix = matrix(c(rep(1, 7),
+                                        2, 3, rep(4, 3), 20, 21,
+                                        rep(5, 7),
+                                        6:12,
+                                        rep(13, 7),
+                                        14:18, rep(19, 2)),
+                                      ncol = 7, nrow = 6, byrow = TRUE),
+               heights = c(0.1 / 3, 0.3, 0.1 / 3, 0.3, 0.1 / 3, 0.3),
+               widths = c(0.22, 0.04, 0.22, 0.04, 0.22, 0.04, 0.22))
+}
+
+#######################################################################################################
+## Raw data time courses
+#######################################################################################################
+
+#' Visualization of raw time course
+#'
+#' Une
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param responseC Name of the 'response' variable
+#' @param timeC Name of the 'time' variable
+#' @param subjectC  Name of the 'subject' variable
+#' @param fixfactC  Name of the 'fixed factor' variable (e.g.treatment)
+#' @param offset_subject Boolean indicating if an offset value (subject's mean) should substracted to each data point. Default is FALSE
+#' @param plotL Boolean
+#' @param colorType One of NA, FIXFACT or SUBJECT
+#' @param shapeType One of NA, FIXFACT or SUBJECT
+#' @param lineType One of NA, FIXFACT or SUBJECT
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_timeCourse
+
+
+
+plot_timeCourse <- function(mfl,
+                             responseC,
+                             timeC,
+                             subjectC,
+                             fixfactC = NULL,
+                             offset_subject = FALSE,
+                             plotL = TRUE,
+                             colorType = NA, ## subject, fixfact, none or NA
+                             shapeType = NA, ## subject, fixfact, none or NA
+                             lineType = NA ## subject, fixfact, none or NA
+) {
+  ## Data -----
+  if (class(mfl) %in% c("merModLmerTest", "lmerMod", "lmerModLmerTest")) {
+    DF <- mfl@frame
+  } else if (class(mfl) == "data.frame") {
+    DF <- mfl
+  } else {
+    stop("'mfl' argument must be a linear mixed effect model or a data frame.")
+  }
+  ## Format data -----
+  if (is.null(fixfactC)) {
+    DF <- DF[, c(responseC,  timeC, subjectC)]
+    colnames(DF) <- c("DV", "TIME", "SUBJECT")
+    meanDF <- aggregate(DF$DV,
+                        by = list(SUBJECT = DF$SUBJECT,
+                                  TIME = DF$TIME),
+                        FUN = mean,
+                        na.rm = TRUE)
+    colnames(meanDF) <- c("SUBJECT", "TIME", "DV")
+    meanDF$GROUP <- meanDF$SUBJECT
+    } else{
+      DF <- DF[, c(responseC, fixfactC, timeC, subjectC)]
+    colnames(DF) <- c("DV", "FIXFACT", "TIME", "SUBJECT")
+      meanDF <- aggregate(DF$DV,
+                        by = list(SUBJECT = DF$SUBJECT,
+                                  TIME = DF$TIME,
+                                  FIXFACT = DF$FIXFACT),
+                        FUN = mean,
+                        na.rm = TRUE)
+    colnames(meanDF) <- c("SUBJECT", "TIME", "FIXFACT", "DV")
+    meanDF$GROUP <- paste(meanDF$SUBJECT, meanDF$FIXFACT, sep = "_")
+    }
+  ## Offset -----
+  if (offset_subject) {
+      offsetMN <- aggregate(DF$DV, by = list(DF$SUBJECT), mean, na.rm = TRUE)
+    offsetVn <- offsetMN[, 2]
+    names(offsetVn) <- offsetMN[, 1]
+    rm(offsetMN)
+    DF$DV <- DF$DV - offsetVn[DF$SUBJECT]
+    meanDF$DV <- meanDF$DV - offsetVn[as.character(meanDF$SUBJECT)]
+  }
+  ## Graphical parameters -----
+  xlabC <-  timeC
+  ylabC <- responseC
+  titC <- "Individual time-courses"
+  if (offset_subject) {
+    ylabC <- paste(ylabC, "minus 'within-subject' empirical mean")
+    titC <- paste(titC, "('within-subject' empirical mean offset)")
+  }
+  ## color
+  if (is.na(colorType)) { ## automaticatical attribution
+      if (is.null(fixfactC)) {
+      colorType <- "SUBJECT"
+    } else {
+      colorType <- "FIXFACT"
+    }
+    colTxt <- paste(", colour=", colorType)
+    } else if (colorType == "none") {
+    colTxt  <- ""
+  } else {
+    colTxt <- paste(", colour=", colorType)
+  }
+  ## lineType
+  if (is.na(lineType)) { ## automaticatical attribution
+      if (is.null(fixfactC)) {
+      linTxt  <- ""
+    } else {
+      linTxt <- paste(", linetype=",
+                      ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT"))
+    }
+    } else if (lineType == "none") {
+    linTxt  <- ""
+  } else {
+    linTxt  <-  paste(", linetype=", lineType)
+  }
+  ## shapeType
+  if (is.na(shapeType)) { ## automaticatical attribution
+      if (is.null(fixfactC)) {
+      shaTxt  <- ""
+    } else {
+      shaTxt <- paste(", shape=",
+                      ifelse(colorType == "SUBJECT", "FIXFACT", "SUBJECT"))
+    }
+    } else if (shapeType == "none") {
+    shaTxt  <- ""
+  } else {
+    shaTxt  <-  paste(", shape=", shapeType)
+  }
+  ## aes mapping
+  txtMap <- paste("aes(x = TIME, y = DV",
+                  colTxt, shaTxt, ")", sep = "")
+  txtLineMap <- paste("aes(x = TIME, y = DV, group = GROUP ",
+                      colTxt, linTxt,  ")", sep = "")
+  ## plot and output
+  p <- ggplot(data = DF, mapping = eval(parse(text = txtMap))) +
+    ggtitle(titC) +
+    xlab(xlabC) + ylab(ylabC) +
+    theme(legend.position = "left",
+          plot.title = element_text(size = rel(1.2), face = "bold")) +
+    geom_point() +
+    geom_line(eval(parse(text = txtLineMap)), data = meanDF) +
+    theme_bw() +
+    NULL
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+#######################################################################################################
+## Post-hoc estimate
+#######################################################################################################
+#' Visualization of fixed effects (post-hoc estimates)
+#'
+#' Description
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param pvalCutof User pvalue cut of
+#' @param plotL Boolean
+#' @param titC Title of the plot
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_posthoc
+plot_posthoc <- function(mfl, pvalCutof = 0.05, plotL = TRUE, titC = "Post-hoc estimates") {
+  ddlsm1 <- as.data.frame(difflsmeans(mfl, test.effs = NULL))
+  colnames(ddlsm1)[ncol(ddlsm1)] <- "pvalue"
+  ddlsm1$Significance <- rep("NS", nrow(ddlsm1))
+  ## modif JF pour tenir compte du seuil de pvalues defini par le user
+  ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof)] <- paste("p-value < ", pvalCutof, sep = "")
+  ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 5)] <- paste("p-value < ", pvalCutof / 5, sep = "")
+  ddlsm1$Significance[which(ddlsm1$pvalue < pvalCutof / 10)] <- paste("p-value < ", pvalCutof / 10, sep = "")
+  ddlsm1$levels <- rownames(ddlsm1)
+  ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) {
+    strsplit(namC, split = " ", fixed = TRUE)[[1]][1]
+  })
+  colValue <- c("grey", "yellow", "orange", "red")
+  names(colValue) <- c("NS",
+                       paste("p-value < ", pvalCutof, sep = ""),
+                       paste("p-value < ", pvalCutof / 5, sep = ""),
+                       paste("p-value < ", pvalCutof / 10, sep = ""))
+  p <- ggplot(ddlsm1, aes(x = levels, y = Estimate)) +
+    facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") +
+    geom_bar(aes(fill = Significance), stat = "identity") +
+    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
+    scale_fill_manual(values = colValue) +
+    geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.25) +
+    ggtitle(titC) + xlab("") +
+    NULL
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+#######################################################################################################
+## Visualisation des effets aléatoires
+#######################################################################################################
+#' Visualization of random effects
+#'
+#' Equivalent of dotplot(ranef)
+#'
+#' @param mfl A linear mixed model fitted via lmer or a data frame containing data
+#' @param  plotL Logical
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_randomEffect
+
+
+plot_randomEffect <- function(mfl, plotL = TRUE) {
+  ## Estimation et format des effets aléatoires
+  randomEffect <- ranef(mfl, condVar = TRUE)
+  DF <- data.frame(randomEffect = rep(names(randomEffect),
+                                      times = sapply(seq_len(length(randomEffect)),
+                                                      function(lsi) {
+                                                        return(length(unlist(randomEffect[[lsi]])))})))
+  DF$condVar <- DF$estimate <- DF$x2 <- DF$x1 <- rep(NA, nrow(DF))
+  for (rafC in names(randomEffect)) {
+      eff <- randomEffect[[rafC]]
+      DF$x1[which(DF$randomEffect == rafC)] <- rep(colnames(eff), each = nrow(eff))
+    DF$x2[which(DF$randomEffect == rafC)] <- rep(rownames(eff), ncol(eff))
+    DF$estimate[which(DF$randomEffect == rafC)] <- unlist(eff)
+      condvar <- attr(randomEffect[[rafC]], "postVar")
+      se <- NULL
+    for (coli in seq_len(ncol(eff))) {
+      se <- c(se,
+              sapply(seq_len(nrow(eff)),
+                     function(i) {
+                       return(condvar[coli, coli, i])}))
+    }
+    DF$condVar[which(DF$randomEffect == rafC)] <- se
+    }
+  DF$se <- sqrt(DF$condVar)
+  DF$lower <- DF$estimate - 1.96 * DF$se
+  DF$upper <- DF$estimate + 1.96 * DF$se
+  ## Plot
+  plotLs <- vector("list", length(randomEffect))
+  names(plotLs) <- names(randomEffect)
+  for (pi in seq_len(length(plotLs))) {
+      subDF <- DF[DF$randomEffect == names(plotLs)[pi], ]
+      subDF <- subDF[order(subDF$x1, subDF$estimate, decreasing = FALSE), ]
+        p <- ggplot(data = subDF,
+                mapping = aes(x = estimate, y = reorder(x2, estimate))
+    ) +
+      geom_point(size = 3) +
+      geom_segment(aes(xend = lower, yend = x2)) +
+      geom_segment(aes(xend = upper, yend = x2)) +
+      facet_wrap(~x1, ncol = length(unique(subDF$x1))) +
+      ylab("") + xlab("") +
+      ggtitle(paste("Random effect - ", names(plotLs)[pi], sep = "")) +
+      theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+      geom_vline(xintercept = 0, linetype = "dashed") +
+      theme_bw()
+        plotLs[[pi]] <- p
+      if (plotL) plot(p)
+  }
+  invisible(plotLs)
+}
+
+#######################################################################################################
+## Linearité des effets et outlying observations
+#######################################################################################################
+#' Linarity of the fixed effect with regard to the continuous time
+#'
+#' @param diagLs diagnostic list
+#' @param hlimitN Limit value for outliers (e.g.2 or 3)
+#' @param plotL Boolean
+#' @param label_factor Column of observation names used to label outlying values
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_linearity
+#'
+
+plot_linearity <- function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) {
+  df <- cbind.data.frame(diagLs$data,
+                         marginal.prediction = diagLs$marginal.prediction,
+                         standardized.marginal.residuals = diagLs$std.marginal.residuals)
+  # outlier annotation
+  df$outliers <- rep("", nrow(df))
+  outidx <- which(abs(df$standardized.marginal.residuals) > hlimitN)
+  df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx]
+  if (length(label_factor) >= 1) {
+    df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+                                    df[outidx, label_factor[1]],
+                                    sep = "_")
+    if (length(label_factor) > 1) {
+      for (li in 2:length(label_factor)) {
+        df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+                                        df[outidx, label_factor[li]],
+                                        sep = ".")
+      }
+    }
+    }
+  p <- ggplot(data = df,
+              aes(x = marginal.prediction,
+                  y = standardized.marginal.residuals)) +
+    geom_point(size = 2) +
+    geom_hline(yintercept = 0, col = "grey") +
+    geom_smooth(aes(x = marginal.prediction,
+                    y = standardized.marginal.residuals), data = df,  se = FALSE, col = "blue", method = "loess") +
+    ggtitle("Linearity of effects/outlying obervations") +
+    xlab("Marginal predictions") +
+    ylab("Standardized marginal residuals") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+    geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") +
+    geom_text(aes(label = outliers), hjust = 0, vjust = 0)
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+#######################################################################################################
+## EBLUP
+#######################################################################################################
+
+
+#' Mahalanobis distance
+#'
+#' @param diagLs diagnostic list
+#' @param plotL Boolean
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_mahalanobis
+#'
+
+
+plot_mahalanobis <- function(diagLs,  plotL = TRUE) {
+  unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance),
+                       mal = diagLs$std.mahalanobis.distance)
+  ## Outlying subjects
+  p <-
+    ggplot(aes(y = mal, x = unit), data = unitDf) +
+    geom_point(size = 3) +
+    ylab("Standardized Mahalanobis distance") +
+    geom_vline(xintercept = 0, linetype = "dashed") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+    geom_hline(yintercept = 2 * mean(unitDf$mal), linetype = "dashed") +
+    geom_text(aes(label = unit),
+              data = unitDf[unitDf$mal > 2 * mean(unitDf$mal), ],
+              hjust = 1, vjust = 0) +
+    ggtitle("Outlying unit") +
+    xlab("unit")
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+
+
+
+
+#' Mahalanobis distance (Chi2)
+#'
+#' @param diagLs diagnostic list
+#' @param plotL aa
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_mahalanobisKhi2
+#'
+
+
+plot_mahalanobisKhi2 <- function(diagLs,  plotL = TRUE) {
+  unitDf <- data.frame(unit = names(diagLs$std.mahalanobis.distance),
+                       mal = diagLs$mahalanobis.distance)
+  p <- qqplotF(x = unitDf$mal,
+              distribution = "chisq",
+              df = diagLs$q,
+              line.estimate = NULL,
+              conf = 0.95) +
+    xlab("Chi-squared quantiles") +
+    ylab("Mahalanobis distance") +
+    ggtitle("Normality of random effect") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+
+
+
+
+
+#######################################################################################################
+## Residus conditionels
+#######################################################################################################
+
+## Presence of outlying observations and homoscedacity of residuals
+
+#' Homoskedacity of conditionalresiduals
+#'
+#' @param diagLs diagnostic list
+#' @param hlimitN Limit value for outliers (e.g.2 or 3)
+#' @param plotL Boolean
+#' @param label_factor Column of observation names used to label outlying values
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_conditionalResiduals
+#'
+
+
+plot_conditionalResiduals <-  function(diagLs, hlimitN, plotL = TRUE, label_factor = NULL) {
+  df <- cbind.data.frame(diagLs$data,
+                         conditional.prediction = diagLs$conditional.prediction,
+                         standardized.conditional.residuals = diagLs$std.conditional.residuals)
+  # outlier annotation
+  df$outliers <- rep("", nrow(df))
+  outidx <- which(abs(df$standardized.conditional.residuals) > hlimitN)
+  df[outidx, "outliers"] <- (seq_len(nrow(df)))[outidx]
+  if (length(label_factor) >= 1) {
+    df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+                                    df[outidx, label_factor[1]],
+                                    sep = "_")
+    if (length(label_factor) > 1) {
+      for (li in 2:length(label_factor)) {
+        df[outidx, "outliers"] <- paste(df[outidx, "outliers"],
+                                        df[outidx, label_factor[li]],
+                                        sep = ".")
+      }
+    }
+    }
+  p <- ggplot(data = df,
+              aes(x = conditional.prediction,
+                  y = standardized.conditional.residuals)) +
+    geom_point(size = 2) +
+    geom_hline(yintercept = 0, col = "grey") +
+    geom_smooth(aes(x = conditional.prediction,
+                    y = standardized.conditional.residuals),
+                data = df,  se = FALSE, col = "blue", method = "loess") +
+    ggtitle("Homoscedasticity of conditional residuals/outlying observations") +
+    xlab("Individual predictions") +
+    ylab("Standardized conditional residuals") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold")) +
+    geom_hline(yintercept = c(-1, 1) * hlimitN, linetype = "dashed") +
+    geom_text(aes(label = outliers), hjust = 0, vjust = 0)
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+
+
+#' Normality of conditionalresiduals
+#'
+#' @param diagLs diagnostic list
+#' @param plotL aa
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_condresQQplot
+#'
+
+
+plot_condresQQplot <-  function(diagLs, plotL = TRUE) {
+  df <- cbind.data.frame(diagLs$data,
+                         conditional.prediction = diagLs$conditional.prediction,
+                         standardized.conditional.residuals = diagLs$std.conditional.residuals)
+  p <- qqplotF(x = df$standardized.conditional.residuals,
+              distribution = "norm",
+              line.estimate = NULL,
+              conf = 0.95) +
+    xlab("Standard normal quantiles") +
+    ylab("Standardized conditional residual quantiles") +
+    ggtitle("Normality of conditional error") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+
+
+
+
+#######################################################################################################
+## Within-units covariance structure
+#######################################################################################################
+
+
+#' Lesaffre-Veerbeke measure
+#'
+#' @param diagLs diagnostic list
+#' @param plotL aa
+#' @return A plot
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export plot_lesaffreVeerbeke
+#'
+
+
+plot_lesaffreVeerbeke <- function(diagLs,  plotL = TRUE) {
+  unitDf <- data.frame(unit = names(diagLs$std.lesaffreverbeke.measure),
+                       lvm = diagLs$std.lesaffreverbeke.measure)
+  p <- ggplot(data = unitDf,
+              aes(x = unit,
+                  y = lvm)) +
+    geom_point(size = 2) +
+    theme(legend.position = "none") +
+    xlab("units") +
+    ylab("Standardized Lesaffre-Verbeke measure") +
+    geom_hline(yintercept = 2 * mean(unitDf$lvm), linetype = "dashed") +
+    geom_text(aes(label = unit),
+              data = unitDf[unitDf$lvm > 2 * mean(unitDf$lvm), ],
+              hjust = 0, vjust = 0) +
+    ggtitle("Within-units covariance matrice") +
+    theme(legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+  if (plotL) plot(p)
+  invisible(p)
+}
+
+##-------------------------------------------------------------------------------------------------##
+## Helpers
+##-------------------------------------------------------------------------------------------------##
+
+
+## square root of a matrix
+## From Rocha, Singer and Nobre
+
+#' square root of a matrix (Rocha)
+#'
+#' Description
+#'
+#' @param mat Matrix
+#' @return A list
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export sqrt.matrix
+
+sqrt.matrix <- function(mat) {
+  mat <- as.matrix(mat)  # new line of code
+  singular_dec <- svd(mat, LINPACK = F)
+  U <- singular_dec$u
+  V <- singular_dec$v
+  D <- diag(singular_dec$d)
+  sqrtmatrix <- U %*% sqrt(D) %*% t(V)
+}
+
+
+## square root of a matrix
+## http://www.cs.toronto.edu/~jepson/csc420/notes/introSVD.pdf (page 6)
+## (for matMN a n x n matrix that symetric and non-negative definite)
+
+#' square root of a matrix (Rocha)
+#'
+#' @param mat Matrix
+#' @return A list
+#' @author Natacha Lenuzza
+#' @examples
+#' print("hello !")
+#'
+#' @export sqrtmF
+
+sqrtmF <- function(matMN) {
+  matMN <- as.matrix(matMN)
+  ## check that matMN is symetric: if (!all(t(matMN == matMN))) stop("matMN must be symetric.")
+  svd_dec <- svd(matMN)
+  invisible(svd_dec$u %*% sqrt(diag(svd_dec$d)) %*% t(svd_dec$v))
+}
+
+
+## qqplotF
+## adapted from https://gist.github.com/rentrop/d39a8406ad8af2a1066c
+
+qqplotF <- function(x,
+                    distribution = "norm", ...,
+                    line.estimate = NULL,
+                    conf = 0.95,
+                    labels = names(x)) {
+  q.function <- eval(parse(text = paste0("q", distribution)))
+  d.function <- eval(parse(text = paste0("d", distribution)))
+  x <- na.omit(x)
+  ord <- order(x)
+  n <- length(x)
+  P <- ppoints(length(x))
+  daf <- data.frame(ord.x = x[ord], z = q.function(P, ...))
+  if (is.null(line.estimate)) {
+    Q.x <- quantile(daf$ord.x, c(0.25, 0.75))
+    Q.z <- q.function(c(0.25, 0.75), ...)
+    b <- diff(Q.x) / diff(Q.z)
+    coef <- c(Q.x[1] - b * Q.z[1], b)
+  } else {
+    coef <- coef(line.estimate(ord.x ~ z))
+  }
+  zz <- qnorm(1 - (1 - conf) / 2)
+  SE <- (coef[2] / d.function(daf$z, ...)) * sqrt(P * (1 - P) / n)
+  fit.value <- coef[1] + coef[2] * daf$z
+  daf$upper <- fit.value + zz * SE
+  daf$lower <- fit.value - zz * SE
+  if (!is.null(labels)) {
+    daf$label <- ifelse(daf$ord.x > daf$upper | daf$ord.x < daf$lower, labels[ord], "")
+  }
+  p <- ggplot(daf, aes(x = z, y = ord.x)) +
+    geom_point() +
+    geom_abline(intercept = coef[1], slope = coef[2], col = "red") +
+    geom_line(aes(x = z, y = lower), daf,  col = "red", linetype = "dashed") +
+    geom_line(aes(x = z, y = upper), daf,  col = "red", linetype = "dashed") +
+    xlab("") + ylab("")
+  if (!is.null(labels)) p <- p + geom_text(aes(label = label))
+  return(p)
+}
+
+
+## histogramm
+histF <- function(x, sd_x = NULL, breaks = "scott") {
+  if (is.null(sd_x))
+    sd_x <- sd(x)
+  ## Bandwith estimation (default is Scott)
+  if (!breaks %in% c("sqrt", "sturges", "rice", "scott", "fd"))
+    breaks <- "scott"
+  if (breaks %in% c("sqrt", "sturges", "rice")) {
+    k <- switch(breaks,
+                sqrt = sqrt(length(x)),
+                sturges = floor(log2(x)) + 1,
+                rice = floor(2 * length(x) ^ (1 / 3))
+    )
+    bw <- diff(range(x)) / k
+  }else{
+    bw <- switch(breaks,
+                 scott = 3.5 * sd_x / length(x) ^ (1 / 3),
+                 fd = diff(range(x)) / (2 * IQR(x) / length(x) ^ (1 / 3))
+    )
+  }
+
+
+  daf <- data.frame(x = x)
+  ## graph
+  return(ggplot(data = daf, aes(x)) +
+           geom_histogram(aes(y = ..density..),
+                          col = "black", fill = "grey", binwidth = bw) +
+           geom_density(size = 1.2,
+                        col = "blue",
+                        linetype = "blank",
+                        fill = rgb(0, 0, 1, 0.1)) +
+           stat_function(fun = dnorm,
+                         args = list(mean = 0, sd = sd_x),
+                         col = "blue", size = 1.2) +
+           theme(legend.position = "none") +
+           xlab(""))
+}
+
+
+plot.res.Lmixed <- function(mfl, df, title = "", pvalCutof = 0.05) {
+
+  ## define subscript of the different columns depending if we have only time (ncol(df)=3) or not
+  if (ncol(df) > 3) {
+    varidx <- 4
+    ffidx <- 1
+    timidx <- 2
+    individx <- 3
+    } else {
+    varidx <- 3
+    ffidx <- 1
+    timidx <- 1
+    individx <- 2
+  }
+  nameVar <- colnames(df)[varidx]
+  fflab <- colnames(df)[ffidx]
+  ## Individual time-course
+  rawPlot <-
+    ggplot(data = df, aes(x = df[[timidx]], y = df[[varidx]], colour = df[[ffidx]], group = df[[individx]])) +
+    geom_point() +
+    geom_line() +  ggtitle("Individual time-courses (raw data)") +
+    ylab(nameVar) +
+    xlab(label = colnames(df)[2]) +
+    theme(legend.title = element_blank(), legend.position = "none", plot.title = element_text(size = rel(1.2), face = "bold"))
+  ## Boxplot of fixed factor
+  bPlot <-
+    ggplot(data = df, aes(y = df[[varidx]], x = df[[ffidx]], color = df[[ffidx]])) +
+    geom_boxplot(outlier.colour = "red", outlier.shape = 8, outlier.size = 4) +
+    ggtitle(paste("Boxplot by ", fflab, sep = "")) + xlab("") + ylab("") +
+    theme(legend.title = element_blank(), plot.title = element_text(size = rel(1.2), face = "bold"))
+  ## Post-hoc estimates
+  ddlsm1  <- mfl
+  ddlsm1$name <- rownames(ddlsm1)
+  ddlsm1$Significance <- rep("NS", nrow(ddlsm1))
+  ## modif JF pour tenir compte du seuil de pvalues defini par le user
+  options("scipen" = 100, "digits" = 5)
+  pvalCutof <- as.numeric(pvalCutof)
+  bs <- 0.05; bm <- 0.01; bi <- 0.005
+  if (pvalCutof > bm) {
+    bs <- pvalCutof
+  } else
+    if (pvalCutof < bm & pvalCutof > bi) {
+      bm <- pvalCutof; bs <- pvalCutof
+    } else
+      if (pvalCutof < bi) {
+        bi <- pvalCutof; bm <- pvalCutof; bs <- pvalCutof
+      }
+  lbs <- paste("p-value < ", bs, sep = "")
+  lbm <- paste("p-value < ", bm, sep = "")
+  lbi <- paste("p-value < ", bi, sep = "")
+  cols <- paste("p-value < ", bs, sep = "")
+  colm <- paste("p-value < ", bm, sep = "")
+  coli <- paste("p-value < ", bi, sep = "")
+  valcol <- c("grey", "yellow", "orange", "red")
+  names(valcol) <- c("NS", lbs, lbm, lbi)
+  ddlsm1$Significance[which(ddlsm1$p.value <= bs)] <- lbs
+  ddlsm1$Significance[which(ddlsm1$p.value < bs & ddlsm1$p.value >= bm)] <- lbm
+  ddlsm1$Significance[which(ddlsm1$p.value < bi)] <- lbi
+  ddlsm1$levels <- rownames(ddlsm1)
+  ddlsm1$term <- sapply(rownames(ddlsm1), function(namC) {
+    strsplit(namC, split = " ", fixed = TRUE)[[1]][1]
+  })
+
+  phPlot <-
+    ggplot(ddlsm1, aes(x = levels, y = Estimate)) +
+    facet_grid(facets = ~term, ddlsm1, scales = "free", space = "free") +
+    geom_bar(aes(fill = Significance), stat = "identity") +
+    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
+    scale_fill_manual(
+      values = valcol) +
+    geom_errorbar(aes(ymin = Lower.CI, ymax = Upper.CI), width = 0.25) +
+    ggtitle("Post-hoc estimates ") + xlab("") +
+    theme(plot.title = element_text(size = rel(1.2), face = "bold"))
+
+  ## Final plotting
+  grid.arrange(arrangeGrob(rawPlot, bPlot, ncol = 2),
+               phPlot, nrow = 2,
+               top = textGrob(title, gp = gpar(fontsize = 32, font = 4))
+  )
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mixmodel.xml	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,264 @@
+<tool id="mixmodel4repeated_measures" name="mixmodel" version="3.1.0" profile="19.01">
+    <description>ANOVA for repeated measures statistics</description>
+
+    <requirements>
+        <requirement type="package" version="1.1_27">r-lme4</requirement>
+        <requirement type="package" version="3.1_3">r-lmertest</requirement>
+        <requirement type="package" version="3.2_0">r-ggplot2</requirement>
+        <requirement type="package" version="2.3">r-gridExtra</requirement>
+        <requirement type="package" version="2.40.0">bioconductor-multtest</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript '$__tool_directory__/mixmodel_wrapper.R'
+
+        dataMatrix_in '$dataMatrix_in'
+        sampleMetadata_in '$sampleMetadata_in'
+        variableMetadata_in '$variableMetadata_in'
+
+        fixfact '$fixfact'
+        time '$time'
+        subject '$subject'
+        adjC '$adjC'
+        trf  '$trf'
+        thrN '$thrN'
+        diaR '$diaR'
+        dff '$dff'
+        rounding '${roundchoice.rounding}'
+        #if $roundchoice.rounding == 'yes' :
+            decplaces '${roundchoice.decplaces}'
+        #end if
+
+        variableMetadata_out '$variableMetadata_out'
+        out_graph_pdf '$out_graph_pdf'
+        out_estim_pdf '$out_estim_pdf'
+        information '$information'
+
+    ]]></command>
+
+    <inputs>
+        <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, sep: tabular" />
+        <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata, decimal: '.', missing: NA, mode: character and numerical, sep: tabular" />
+        <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata, decimal: '.', missing: NA, mode: character and numerical, sep: tabular"  />
+        <param name="fixfact"  label="Fixed Factor of interest" type="text" help="Name of sample metadata column corresponding to the fixed factor (use none if only time factor"/>
+        <param name="time"    label="Repeated factor (time)" type="text" help="Name of the column of the sample metadata table corresponding to the repeated factor"/>
+        <param name="subject" label="Subject factor" type="text" help="Name of the column of the sample metadata table corresponding to the subject factor"/>
+        <param name="dff" label="Degrees of freedom method for the F-tests" type="select" help="">
+            <option value="Satt">Satterthwaite</option>
+            <option value="KenR">Kenward-Roger</option>
+        </param>
+        <param name="adjC" label="Method for multiple testing correction" type="select" help="">
+            <option value="fdr">fdr</option>
+            <option value="BH">BH</option>
+            <option value="bonferroni">bonferroni</option>
+            <option value="BY">BY</option>
+            <option value="hochberg">hochberg</option>
+            <option value="holm">holm</option>
+            <option value="hommel">hommel</option>
+            <option value="none">none</option>
+        </param>
+        <param name="trf" label="Log transform of raw data" type="select" help="Transformation of raw data">
+            <option value="none">none</option>
+            <option value="log10">log10</option>
+            <option value="log2">log2</option>
+        </param>
+        <param name="thrN" type="float" value="0.05" label="(Corrected) p-value significance threshold" help="Must be between 0 and 1"/>
+        <param name="diaR" label="Perform diagnostic of the residuals" type="select" help=" Used to assess the quality of models considering distribution of residuals ">
+            <option value="yes">yes</option>
+            <option value="no">no</option>
+        </param>
+        <conditional name="roundchoice">
+            <param name="rounding" type="select" label="Rounding the result's numerical columns" help="Should the numerical values generated by this tool be rounded to a chosen number of decimal places?">
+                <option value="no">No</option>
+                <option value="yes">Yes</option>
+            </param>
+            <when value="yes">
+                <param argument="decplaces" type="integer" value="6" label="Number of decimal places for rounding" help="Positive interger" />
+            </when>
+            <when value="no">
+            </when>
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular"/>
+        <data name="information" label="${tool.name}_information.txt" format="txt"/>
+        <data name="out_estim_pdf" label="${tool.name}_Estimates" format="pdf"/>
+        <data name="out_graph_pdf" label="${tool.name}_diagResiduals" format="pdf">
+            <filter>diaR == 'yes'</filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="4">
+            <param name="dataMatrix_in" value="TwoFactor_dataMatrix.txt" />
+            <param name="sampleMetadata_in" value="TwoFactor_sampleMetadata.txt" />
+            <param name="variableMetadata_in" value="TwoFactor_variableMetadata.txt" />
+            <param name="fixfact" value="phenotype" />
+            <param name="time" value="time" />
+            <param name="subject" value="sujet" />
+            <param name="adjC" value = "none"/>
+            <param name="trf" value = "log10"/>
+            <param name="thrN" value = "0.01"/>
+            <param name="dff" value = "Satt"/>
+            <param name="rounding" value = "yes"/>
+            <param name="decplaces" value = "6"/>
+            <output name="variableMetadata_out" value="mixmodel_TwoFactor_variableMetadata.txt" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+.. class:: infomark
+
+**Tool update: See the 'NEWS' section at the bottom of the page**
+
+.. class:: infomark
+
+**Authors** Natacha Lenuzza and Jean-Francois Martin wrote this wrapper of R repeated measure anova statistical tests. 
+**Maintainers** Melanie Petera and Marie Tremblay-Franco (INRAE-MetaboHUB)
+
+MetaboHUB: The French National Infrastructure for Metabolomics and Fluxomics (https://www.metabohub.fr/home.html)
+
+.. class:: infomark
+
+**Please cite**
+
+R Core Team (2013). R: A language and Environment for Statistical Computing. http://www.r-project.org
+
+.. class:: infomark
+
+**References**
+Kuznetsova A. Brockhoff PB. and Christensen RHB (2017). lmerTest Package: Tests in Linear Mixed Effects Models. Journal of Statistical Software, 82(13), pp. 1–26. doi: 10.18637/jss.v082.i13.
+Benjamini Y. and Hochberg Y. (1995). Controlling the false discovery rate: a practical and powerful approach for multiple testing. Journal of the Royal Statistical Society. Series B (Methodological), 57:289-300.
+
+
+=============
+Mixed models
+=============
+
+-----------
+Description
+-----------
+
+The module performs analysis of variance for repeated measures using mixed model
+
+
+-----------
+Input files
+-----------
+
++---------------------------+------------+
+| File                      |   Format   |
++===========================+============+
+| 1 : Data matrix           |   tabular  |
++---------------------------+------------+
+| 2 : Sample metadatx       |   tabular  |
++---------------------------+------------+
+| 3 : Variable metadata     |   tabular  |
++---------------------------+------------+
+
+
+----------
+Parameters
+----------
+
+Data matrix file
+| variable x sample **dataMatrix** tabular separated file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and variable metadata, respectively (see below)
+|
+
+Sample metadata file
+| sample x metadata **sampleMetadata** tabular separated file of the numeric and/or character sample metadata, with . as decimal and NA for missing values
+|
+
+Variable metadata file
+| variable x metadata **variableMetadata** tabular separated file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
+|
+
+
+Treatment
+| Name of the fixed factor in the sample metadata file. Use "none" if you have only a time factor
+|
+
+Time
+| Name of the repeated (time) factor in the sample metadata file
+|
+
+Subject
+| Name of the subject (on which the repeated measurement id done) in the sample metadata file
+|
+
+Degrees of freedom
+| Method to use for degrees of freedom computation 
+|
+
+Method for multiple testing correction
+| The 7 methods implemented in the 'p.adjust' R function are available and documented as follows:
+| "The adjustment methods include the Bonferroni correction ("bonferroni") in which the p-values are multiplied by the number of comparisons. Less conservative corrections are also included by Holm (1979) ("holm"), Hochberg (1988) ("hochberg"), Hommel (1988) ("hommel"), Benjamini and Hochberg (1995) ("BH" or its alias "fdr"), and Benjamini and Yekutieli (2001) ("BY"), respectively. A pass-through option ("none") is also included. The set of methods are contained in the p.adjust.methods vector for the benefit of methods that need to have the method as an option and pass it on to p.adjust. The first four methods are designed to give strong control of the family-wise error rate. There seems no reason to use the unmodified Bonferroni correction because it is dominated by Holm's method, which is also valid under arbitrary assumptions. Hochberg's and Hommel's methods are valid when the hypothesis tests are independent or when they are non-negatively associated (Sarkar, 1998; Sarkar and Chang, 1997). Hommel's method is more powerful than Hochberg's, but the difference is usually small and the Hochberg p-values are faster to compute. The "BH" (aka "fdr") and "BY" method of Benjamini, Hochberg, and Yekutieli control the false discovery rate, the expected proportion of false discoveries amongst the rejected hypotheses. The false discovery rate is a less stringent condition than the family-wise error rate, so these methods are more powerful than the others."
+
+
+(Corrected) p-value significance threshold
+|
+|
+
+Rounding the result's numerical columns
+| This parameter enables the choice of a given number of decimal places to be used to display results' values in the output columns of the variableMetadata that are generated by the tool.
+| If set to "Yes", you can indicate the number of decimal places wanted, and all the results' values will be rounded according to this number.
+|
+
+------------
+Output files
+------------
+
+variableMetadata_out.tabular
+| **variableMetadata** file identical to the file given as argument plus
+| pvalue of Shapiro normality test of the residuals
+| pvalues of the main effects and interaction
+| PostHoc test with difference between levels and pvalues of these difference
+|
+
+mixedmodel_Estimates
+| A pdf file is created with a graphical representation of differences among levels of factors with a color code for significance and an error bar.
+| Plots are only provided for features with at least one significant factor. 
+
+
+mixedmodel_diagResiduals
+| if Perform diagnostic of the residuals" is set to yes(default) a pdf file is created with a a serie of
+| graphics outputed in order to assess the distribution of residuals to check the adjustment.
+| Plots are only provided for features with at least one factor being significant based on the non-corrected p-values. 
+
+information.txt
+| File with all messages and warnings generated during the computation
+| The list of variables with name and a tag if it is significant for at least fixed or repeated factor.
+
+
+
+---------------------------------------------------
+
+Changelog/News
+--------------
+
+**Version 3.1.0 - April 2022**
+
+- NEW FEATURE: the tool now can be used without fixed factor by specify "none" in the concerned parameter field.
+
+- NEW FEATURE: addition of the possibility to choose the degrees of freedom computation method (previously using only the Satterthwaite method).
+
+- NEW FEATURE: addition of the possibility to choose the number of decimal places in the variable-metadata numerical columns' output.
+
+
+    ]]></help>
+
+    <citations>
+        <citation type="doi">10.18637/jss.v082.i13.</citation>
+        <citation type="bibtex">@ARTICLE{fisher,
+           author = {Benjamini Y. and Hochberg Y.,
+           title = {Controlling the false discovery rate: a practical and powerful approach for multiple testing. Journal of the Royal Statistical Society},
+           journal = {Series B (Methodological)},
+           year = {1995},
+           volume = {57},
+           pages = {289-300}
+        }</citation>
+        <citation type="doi">10.1093/bioinformatics/btu813</citation>
+    </citations>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mixmodel_script.R	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,481 @@
+#######  R functions to perform linear mixed model for repeated measures
+#######  on a multi var dataset using 3 files as used in W4M
+##############################################################################################################
+lmRepeated2FF <- function(ids, ifixfact, itime, isubject, ivd, ndim, nameVar=colnames(ids)[[ivd]],
+                          pvalCutof = 0.05, dffOption, visu = visu, tit = "", least.confounded = FALSE, outlier.limit = 3) {
+   ### function to perform linear mixed model with 1 Fixed factor + Time + random factor subject
+   ### based on lmerTest package providing functions giving the same results as SAS proc mixed
+   options(scipen = 50, digits = 5)
+
+   if (!is.numeric(ids[[ivd]]))     stop("Dependant variable is not numeric")
+   if (!is.factor(ids[[ifixfact]])) stop("fixed factor is not a factor")
+   if (!is.factor(ids[[itime]]))    stop("Repeated factor is not a factor")
+   if (!is.factor(ids[[isubject]])) stop("Random factor is not a factor")
+
+   ## factors
+   time    <- ids[[itime]]
+   fixfact <- ids[[ifixfact]]
+   subject <- ids[[isubject]]
+   # dependant variables
+   vd      <- ids[[ivd]]
+
+   # argument of the function instead of re re-running ndim <- defColRes(ids, ifixfact, itime)
+   # nfp : number of main factors + model infos (REML, varSubject) + normality test
+   nfp <- ndim[1];
+   # ncff number of comparison of the fixed factor
+   nlff <- ndim[2];  ncff <- ndim[3]
+   # nct number of comparison of the time factor
+   nlt <- ndim[4]; nct <- ndim[5]
+   # nci number of comparison of the interaction
+   nli <- ndim[6];  nci <- ndim[7]
+   # number of all lmer results
+   nresT <- ncff + nct + nci
+   ## initialization of the result vector (1 line)
+   ## 4 * because nresf for : pvalues + Etimates + lower CI + Upper CI
+   res <- data.frame(array(rep(NA, (nfp + 4 * nresT))))
+   colnames(res)[1] <- "resultLM"
+
+   ### if at least one subject have data for only 1 time, mixed model is not possible and variable must be skip
+   ### after excluding NA, table function is used to seek subjects with only 1 data
+   ids <- ids[!is.na(ids[[ivd]]), ]
+   skip <- length(which(table(ids[[isubject]]) == 1))
+
+   if (skip == 0) {
+
+      mfl <- lmer(vd ~ time + fixfact + time:fixfact + (1 | subject), ids) # lmer remix
+
+      rsum <- summary(mfl, ddf = dffOption)
+      ## test Shapiro Wilks on the residus of the model
+      rShapiro <- shapiro.test(rsum$residuals)
+      raov <- anova(mfl, ddf = dffOption)
+      dlsm1  <- data.frame(difflsmeans(mfl, test.effs = NULL))
+      ddlsm1 <- dlsm1
+      ## save rownames and factor names
+      rn <- rownames(ddlsm1)
+      fn <- ddlsm1[, c(1, 2)]
+      ## writing the results on a single line
+      namesFactEstim <- paste("estimate ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "")
+      namesFactPval <- paste("pvalue ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "")
+      namesInter <- rownames(ddlsm1)[-c(1:(nct + ncff))]
+      namesEstimate <- paste("estimate ", namesInter)
+      namespvalues <- paste("pvalue ", namesInter)
+      namesFactprinc <- c("pval_time", "pval_trt", "pval_inter")
+      namesFactEstim <- paste("estimate ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "")
+
+      namesFactLowerCI <- paste("lowerCI ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "")
+      namesLowerCI <- paste("lowerCI ", namesInter, sep = "")
+
+      namesFactUpperCI <- paste("UpperCI ", rownames(ddlsm1)[c(1:(nct + ncff))], sep = "")
+      namesUpperCI <- paste("UpperCI ", namesInter, sep = "")
+
+
+      ### lmer results on 1 vector row
+      # pvalue of shapiro Wilks test of the residuals
+      res[1, ] <- rShapiro$p.value; rownames(res)[1] <- "Shapiro.pvalue.residuals"
+      res[2, ] <- rsum$varcor$subject[1] ;rownames(res)[2] <- "Subject.Variance"
+      res[3, ] <- rsum$devcomp$cmp[7] ; rownames(res)[3] <- "REML"
+      ### 3 principal factors pvalues results + shapiro test =>  nfp <- 4
+      res[c((nfp - 2):nfp), ] <- raov[, 6]
+      rownames(res)[c((nfp - 2):nfp)] <- namesFactprinc
+
+      ####################  Residuals diagnostics for significants variables #########################
+      ### Il at least 1 factor is significant and visu=TRUE NL graphics add to pdf
+      ## ajout JF du passage de la valeur de p-value cutoff
+       if (length(which(raov[, 6] <= pvalCutof)) > 0 & visu == "yes") {
+          diagmflF(mfl, title = tit, pvalCutof = pvalCutof, least.confounded = least.confounded,
+                   outlier.limit = outlier.limit)
+       }
+
+      # pvalue of fixed factor comparisons
+      nresf <- nresT
+      res[(nfp + 1):(nfp + nct), ] <- ddlsm1[c(1:nct), 9]
+      res[(nfp + nct + 1):(nfp + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 9]
+      rownames(res)[(nfp + 1):(nfp + nct + ncff)] <- namesFactPval
+      res[(nfp + nct + ncff + 1):(nfp + nresf), ] <- ddlsm1[(nct + ncff + 1):(nresT), 9]
+      rownames(res)[(nfp + nct + ncff + 1):(nfp + nresT)] <- namespvalues
+      # Estimate of the difference between levels of factors
+      res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 3]
+      res[(nfp + nresf + nct + 1):(nfp + nresf + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 3]
+      rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct + ncff)] <- namesFactEstim
+      res[(nfp + nresf + nct + ncff + 1):(nfp + 2 * nresf), ] <- ddlsm1[(nct + ncff + 1):(nresT), 3]
+      rownames(res)[(nfp + nresf + nct + ncff + 1):(nfp + 2 * nresf)] <- namesEstimate
+      # lower CI of the difference between levels of factors
+      nresf <- nresf + nresT
+      res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 7]
+      res[(nfp + nresf + nct + 1):(nfp + nresf + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 7]
+      rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct + ncff)] <- namesFactLowerCI
+      res[(nfp + nresf + nct + ncff + 1):(nfp + 2 * nresf), ] <- ddlsm1[(nct + ncff + 1):(nresf), 7]
+      rownames(res)[(nfp + nresf + nct + ncff + 1):(nfp + nresf + (nresf / 2))] <- namesLowerCI
+      # Upper CI of the difference between levels of factors
+      nresf <- nresf + nresT
+      res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 8]
+      res[(nfp + nresf + nct + 1):(nfp + nresf + nct + ncff), ] <- ddlsm1[(nct + 1):(nct + ncff), 8]
+      rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct + ncff)] <- namesFactUpperCI
+      res[(nfp + nresf + nct + ncff + 1):(nfp + nresf + (nresT)), ] <- ddlsm1[(nct + ncff + 1):(nresT), 8]
+      rownames(res)[(nfp + nresf + nct + ncff + 1):(nfp + nresf + (nresT))] <- namesUpperCI
+
+   } else {
+      ## one of the subject has only one time, subject can't be a random variable
+      ## A repeated measure could be run instead function lme of package nlme, in next version?
+      res[1, ] <- NA
+      cat("\n Computing impossible for feature ", tit, ": at least one subject has only one time.\n")
+   }
+   tres <- data.frame(t(res))
+   rownames(tres)[1] <- nameVar
+   cres <- list(tres, rn, fn)
+   return(cres)
+}
+
+##############################################################################################################
+lmRepeated1FF <- function(ids, ifixfact = 0, itime, isubject, ivd, ndim, nameVar = colnames(ids)[[ivd]],
+                          dffOption, pvalCutof = 0.05) {
+   ### function to perform linear mixed model with factor Time + random factor subject
+   ### based on lmerTest package providing functions giving the same results as SAS proc mixed
+
+
+   if (!is.numeric(ids[[ivd]]))     stop("Dependant variable is not numeric")
+   if (!is.factor(ids[[itime]]))    stop("Repeated factor is not a factor")
+   if (!is.factor(ids[[isubject]])) stop("Random factor is not a factor")
+   # Could be interesting here to add an experience plan check to give back a specific error message
+   # in case time points are missing for some individuals
+
+   time <- ids[[itime]]
+   subject <- ids[[isubject]]
+   vd <- ids[[ivd]] ## dependant variables (quatitative)
+
+   # nfp : nombre de facteurs principaux + model infos + normality test
+   nfp <- ndim[1]
+   # nlt number of time levels; nct number of comparisons of the time factor
+   nlt <- ndim[4]
+   nct <- ndim[5]
+   # number of all lmer results
+   nresT <- nct
+   ## initialization of the result vector (1 line)
+   res <- data.frame(array(rep(NA, (nfp + 4 * nresT))))
+   colnames(res)[1] <- "resultLM"
+
+   ### if at least one subject have data for only 1 time, mixed model is not possible and variable must be skip
+   ### after excluding NA, table function is used to seek subjects with only 1 data
+   ids <- ids[!is.na(ids[[ivd]]), ]
+   skip <- length(which(table(ids[[isubject]]) == 1))
+
+   if (skip == 0) {
+
+      mfl <- lmer(vd ~ time + (1 | subject), ids) # lmer remix
+      rsum <- summary(mfl, ddf = dffOption)
+      ## test Shapiro Wilks on the residus of the model
+      rShapiro <- shapiro.test(rsum$residuals)
+      raov <- anova(mfl, ddf = dffOption)
+      ## Sum of square : aov$'Sum Sq', Mean square : aov$`Mean Sq`, proba : aov$`Pr(>F)`
+
+      ## Test of all differences estimates between levels as SAS proc mixed.
+      ## results are in diffs.lsmeans.table dataframe
+      ## test.effs=NULL perform all pairs comparisons including interaction effect
+      dlsm1  <- data.frame(difflsmeans(mfl, test.effs = NULL))
+      ddlsm1 <- dlsm1
+      ## save rownames and factor names
+      rn <- rownames(ddlsm1)
+      fn <- ddlsm1[, c(1, 2)]
+      ## writing the results on a single line
+      namesFactEstim <- paste("estimate ", rownames(ddlsm1)[c(1:(nct))], sep = "")
+      namesFactPval  <- paste("pvalue ", rownames(ddlsm1)[c(1:(nct))], sep = "")
+      namesFactprinc <- "pval_time"
+      namesLowerCI   <- paste("lowerCI ", rownames(ddlsm1)[c(1:(nct))], sep = "")
+      namesUpperCI   <- paste("upperCI ", rownames(ddlsm1)[c(1:(nct))], sep = "")
+
+      ### lmer results on 1 vector
+      # pvalue of shapiro Wilks test of the residuals
+      res[1, ] <- rShapiro$p.value; rownames(res)[1] <- "Shapiro.pvalue.residuals"
+      res[2, ] <- rsum$varcor$subject[1] ;rownames(res)[2] <- "Subject.Variance"
+      res[3, ] <- rsum$devcomp$cmp[7] ; rownames(res)[3] <- "REML"
+
+      ###  factor time pvalue results + shapiro test
+      res[nfp, ] <- raov[, 6]
+      rownames(res)[nfp] <- namesFactprinc
+
+      # pvalues of time factor levels comparisons
+      res[(nfp + 1):(nfp + nct), ] <- ddlsm1[c(1:nct), 9]
+      rownames(res)[(nfp + 1):(nfp + nct)] <- namesFactPval
+
+      # Estimates of time factor levels
+      nresf <- nresT
+      res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 3]
+      rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct)] <- namesFactEstim
+
+      # Lower CI of the difference between levels of factors
+      # nresf is incremeted
+      nresf <- nresf + nresT
+      res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 7]
+      rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct)] <- namesLowerCI
+      # Lower CI of the difference between levels of factors
+      # nresf is incremeted
+      nresf <- nresf + nresT
+      res[(nfp + nresf + 1):(nfp + nresf + nct), ] <- ddlsm1[c(1:nct), 8]
+      rownames(res)[(nfp + nresf + 1):(nfp + nresf + nct)] <- namesUpperCI
+
+
+   } else {
+      ## one of the subject has only one time, subject can't be a random variable
+      ## A repeated measure could be run instead function lme of package nlme, next version
+       res[1, ] <- NA
+       cat("\n Computing impossible for feature ", colnames(ids)[4], ": at least one subject has only one time.\n")
+   }
+   tres <- data.frame(t(res))
+   rownames(tres)[1] <- nameVar
+   cres <- list(tres, rn, fn)
+   return(cres)
+
+}
+
+##############################################################################################################
+defColRes <- function(ids, ifixfact, itime) {
+   ## define the size of the result file depending on the numbers of levels of the fixed and time factor.
+   ## Numbers of levels define the numbers of comparisons with pvalue and estimate of the difference.
+   ## The result file also contains the pvalue of the fixed factor, time factor and interaction
+   ## plus Shapiro normality test. This is define by nfp
+   ## subscript of fixed factor=0 means no other fixed factor than "time"
+   if (ifixfact > 0) {
+      nfp <- 6 # shapiro + subject variance + REML + time + fixfact + interaction
+      time <- ids[[itime]]
+      fixfact <- ids[[ifixfact]]
+
+      cat("\n Levels fixfact: ", levels(fixfact))
+      cat("\n Levels time: ", levels(time))
+
+      # ncff number of comparisons of the fixed factor (nlff number of levels of fixed factor)
+      nlff <- length(levels(fixfact))
+      ncff <- (nlff * (nlff - 1)) / 2
+      # nct number of comparison of the time factor (nlt number of levels of time factor)
+      nlt <- length(levels(time))
+      nct <- (nlt * (nlt - 1)) / 2
+      # nci number of comparison of the interaction
+      nli <- nlff * nlt
+      nci <- (nli * (nli - 1)) / 2
+      ndim <- c(NA, NA, NA, NA, NA, NA, NA)
+
+      ndim[1] <- nfp   # pvalues of fixed factor, time factor and interaction (3columns) and shapiro test pvalue
+      ndim[2] <- nlff  # number of levels of fixed factor
+      ndim[3] <- ncff  # number of comparisons (2by2) of the fixed factor
+      ndim[4] <- nlt   # number of levels of time factor
+      ndim[5] <- nct   # number of comparisons (2by2) of the time factor
+      ndim[6] <- nli   # number of levels of interaction
+      ndim[7] <- nci   # number of comparisons (2by2) of the interaction
+
+   }
+   else {
+      nfp <- 4 # Mandatory columns: shapiro + subject variance + REML + time
+      time <- ids[[itime]]
+      # nct number of comparison of the time factor
+      nlt <- length(levels(time))
+      nct <- (nlt * (nlt - 1)) / 2
+      ndim <- c(NA, NA, NA, NA, NA, NA, NA)
+
+      ndim[1] <- nfp   # pvalues of shapiro test + subject variance + REML + time factor
+      ndim[4] <- nlt   # number of levels of time factor
+      ndim[5] <- nct   # number of comparisons (2by2) of the time factor
+   }
+   return(ndim)
+}
+
+##############################################################################################################
+lmixedm <- function(datMN,
+                    samDF,
+                    varDF,
+                    fixfact, time, subject,
+                    logtr = "none",
+                    pvalCutof = 0.05,
+                    pvalcorMeth = c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")[7],
+                    dffOption,
+                    visu = "no",
+                    least.confounded = FALSE,
+                    outlier.limit = 3,
+                         pdfC,
+                         pdfE
+                   ) {
+   sampids <- samDF
+   dataMatrix <- datMN
+   varids <- varDF
+
+   options("scipen" = 50, "digits" = 5)
+   pvalCutof <- as.numeric(pvalCutof)
+
+   cat("\n dff computation method=", dffOption)
+   ### Function running lmer function on a set of variables described in
+   ### 3 different dataframes as used by W4M
+   ### results are merge with the metadata variables varids
+   ### ifixfact, itime, isubject are subscripts of the dependant variables. if only time factor the ifixfat is set to 0 and no diag is performed (visu="no")
+   if (fixfact == "none") {
+     ifixfact <- 0 ; visu <- "no"
+   } else ifixfact <- which(colnames(sampids) == fixfact)
+   itime    <- which(colnames(sampids) == time)
+   isubject <- which(colnames(sampids) == subject)
+
+   lmmds <- dataMatrix
+   if (logtr != "log10" & logtr != "log2") logtr <- "none"
+   if (logtr == "log10") lmmds <- log10(lmmds + 1)
+   if (logtr == "log2") lmmds <- log2(lmmds + 1)
+
+   dslm <- cbind(sampids, lmmds)
+
+   nvar <- ncol(lmmds)
+   firstvar <- ncol(sampids) + 1
+   lastvar <- firstvar + ncol(lmmds) - 1
+
+   if (ifixfact > 0)  dslm[[ifixfact]] <- factor(dslm[[ifixfact]])
+   dslm[[itime]]    <- factor(dslm[[itime]])
+   dslm[[isubject]] <- factor(dslm[[isubject]])
+   ## call defColres to define the numbers of test and so the number of columns of results
+   ## depends on whether or not there is a fixed factor with time. If only time factor ifixfact=0
+   if (ifixfact > 0) {
+      ndim <- defColRes(dslm[, c(ifixfact, itime)], ifixfact = 1, itime = 2)
+      nColRes <- ndim[1] + (4 * (ndim[3] + ndim[5] + ndim[7]))
+      firstpval <- ndim[1] - 2
+      lastpval  <- ndim[1] + ndim[3] + ndim[5] + ndim[7]
+  } else {
+      ndim <- defColRes(dslm[, itime], ifixfact = 0, itime = 1)
+      nColRes <- ndim[1] + (4 * (ndim[5]))
+      firstpval <- ndim[1]
+      lastpval <- ndim[1] + ndim[5]
+   }
+   ## initialisation of the  result file
+   resLM <- data.frame(array(rep(NA, nvar * nColRes), dim = c(nvar, nColRes)))
+   rownames(resLM) <- rownames(varids)
+
+   ## PDF initialisation
+   if (visu == "yes") {
+      pdf(pdfC, onefile = TRUE, height = 15, width = 30)
+      par(mfrow = c(1, 3))
+   }
+
+
+   for (i in firstvar:lastvar) {
+
+      subds <- dslm[, c(ifixfact, itime, isubject, i)]
+
+      tryCatch({
+         if (ifixfact > 0)
+            reslmer <- lmRepeated2FF(subds, ifixfact = 1, itime = 2, isubject = 3, ivd = 4, ndim = ndim, visu = visu,
+                                     tit = colnames(dslm)[i], pvalCutof = pvalCutof,
+                                     dffOption = dffOption, least.confounded = least.confounded,
+                                     outlier.limit = outlier.limit)
+         else
+            reslmer <- lmRepeated1FF(subds, ifixfact = 0, itime = 1, isubject = 2, ivd = 3, ndim = ndim,
+                                     pvalCutof = pvalCutof, dffOption = dffOption)
+
+         resLM[i - firstvar + 1, ] <- reslmer[[1]]
+      }, error = function(e) {
+        cat("\nERROR with ", rownames(resLM)[i - firstvar + 1], ": ", conditionMessage(e), "\n")
+       }
+      )
+
+   }
+
+   if (exists("reslmer")) {
+      colnames(resLM) <- colnames(reslmer[[1]])
+      labelRow <- reslmer[[2]]
+      factorRow <- reslmer[[3]]
+   } else {
+      stop("\n- - - - -\nModel computation impossible for every single variables in the dataset: no result returned.\n- - - - -\n")
+   }
+
+   if (visu == "yes") dev.off()
+
+
+   ## pvalue correction with p.adjust library multtest
+   ## Possible methods of pvalue correction
+   AdjustMeth <- c("holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none")
+   if (length(which(pvalcorMeth == AdjustMeth)) == 0) pvalcorMeth <- "none"
+
+   if (pvalcorMeth != "none") {
+      for (k in firstpval:lastpval) {
+         resLM[[k]] <- p.adjust(resLM[[k]], method = pvalcorMeth, n = dim(resLM[k])[[1]])
+
+      }
+   }
+
+   ## for each variables, set pvalues to NA and estimates = 0 when pvalue of factor > pvalCutof value define by user
+   if (ifixfact > 0) {
+      ## time effect
+      resLM[which(resLM[, firstpval] > pvalCutof), c((lastpval + 1):(lastpval + ndim[5]))] <- 0
+      resLM[which(resLM[, firstpval] > pvalCutof), c((ndim[1] + 1):(ndim[1] + ndim[5]))] <- NA
+      ## treatment effect
+      resLM[which(resLM[, firstpval + 1] > pvalCutof), c((lastpval + ndim[5] + 1):(lastpval + ndim[5] + ndim[3]))] <- 0
+      resLM[which(resLM[, firstpval + 1] > pvalCutof), c((ndim[1]  + ndim[5] + 1):(ndim[1] + ndim[5] + ndim[3]))] <- NA
+      ## interaction effect
+      resLM[which(resLM[, firstpval + 2] > pvalCutof), c((lastpval + ndim[5] + ndim[3] + 1):(lastpval + ndim[5] + ndim[3] + ndim[7]))] <- 0
+      resLM[which(resLM[, firstpval + 2] > pvalCutof), c((ndim[1]  + ndim[5] + ndim[3] + 1):(ndim[1]  + ndim[5] + ndim[3] + ndim[7]))] <- NA
+   } else {
+      ## time effect only
+      resLM[which(resLM[, firstpval] > pvalCutof), c((lastpval  + 1):(lastpval  + ndim[5]))] <- 0
+      resLM[which(resLM[, firstpval] > pvalCutof), c((firstpval + 1):(firstpval + ndim[5]))] <- NA
+   }
+
+   ## for each variable, estimates plots are performed if at least one factor is significant after p-value correction
+   pdf(pdfE, onefile = TRUE, height = 15, width = 30)
+
+   ## for each variable (in row)
+   for (i in seq_len(nrow(resLM))) {
+
+      ## if any fixed factor + time factor
+      if (ifixfact > 0)
+
+         ## if any main factor after p-value correction is significant -> plot estimates and time course
+         if (length(which(resLM[i, c(4:6)] < pvalCutof)) > 0) {
+
+            ## Plot of time course by fixfact : data prep with factors and quantitative var to be plot
+            subv <- dslm[, colnames(dslm) == rownames(resLM)[i]]
+            subds <- data.frame(dslm[[ifixfact]], dslm[[itime]], dslm[[isubject]], subv)
+            libvar <- c(fixfact, time, subject)
+            colnames(subds) <- c(libvar, rownames(resLM)[i])
+
+            ## Plot of estimates with error bars for all fixed factors and interaction
+            rddlsm1 <- t(resLM[i, ])
+            pval <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "pvalue"]
+            esti <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "estima"]
+            loci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "lowerC"]
+            upci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "UpperC"]
+            rddlsm1 <- data.frame(pval, esti, loci, upci, factorRow)
+            colnames(rddlsm1) <- c("p.value", "Estimate", "Lower.CI", "Upper.CI", colnames(factorRow))
+            rownames(rddlsm1) <- labelRow
+
+            ## function for plotting these 2 graphs
+            plot.res.Lmixed(rddlsm1, subds, title = rownames(resLM)[i], pvalCutof = pvalCutof)
+
+         }
+
+      ## if only a time factor
+      if (ifixfact == 0)
+
+         ## if time factor after p-value correction is significant -> plot time course
+         if (length(which(resLM[i, 4] < pvalCutof)) > 0) {
+
+            ## Plot of time course  : data prep with factors and quantitative var to be plot
+            subv <- dslm[, colnames(dslm) == rownames(resLM)[i]]
+            subds <- data.frame(dslm[[itime]], dslm[[isubject]], subv)
+            libvar <- c(time, subject)
+            colnames(subds) <- c(libvar, rownames(resLM)[i])
+
+            ## Plot of estimates with error bars for all fixed factors and interaction
+            rddlsm1 <- t(resLM[i, ])
+            pval <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "pvalue"]
+            esti <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "estima"]
+            loci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "lowerC"]
+            upci <- rddlsm1[substr(rownames(rddlsm1), 1, 6) == "upperC"]
+            rddlsm1 <- data.frame(pval, esti, loci, upci, factorRow)
+            colnames(rddlsm1) <- c("p.value", "Estimate", "Lower.CI", "Upper.CI", colnames(factorRow))
+            rownames(rddlsm1) <- labelRow
+
+            ## function for plotting these 2 graphs
+            plot.res.Lmixed(rddlsm1, subds, title = rownames(resLM)[i], pvalCutof = pvalCutof)
+
+         }
+
+   }
+   dev.off()
+
+   ## return result file with pvalues and estimates (exclude confidence interval used for plotting)
+   iCI <- which(substr(colnames(resLM), 4, 7) == "erCI")
+   resLM <- resLM[, - iCI]
+   resLM <- cbind(varids, resLM)
+   return(resLM)
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mixmodel_wrapper.R	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,200 @@
+#!/usr/bin/env Rscript
+
+library(lme4)     ## mixed model computing
+library(Matrix)
+library(MASS)
+library(lmerTest) ## computing pvalue and lsmeans from results of lme4 package
+library(multtest) ## multiple testing
+library(ggplot2)
+library(gridExtra)
+library(grid)
+
+source_local <- function(fname) {
+    argv <- commandArgs(trailingOnly = FALSE)
+    base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+    source(paste(base_dir, fname, sep = "/"))
+}
+
+source_local("mixmodel_script.R")
+source_local("diagmfl.R")
+
+parse_args <- function() {
+  args <- commandArgs()
+  start <- which(args == "--args")[1] + 1
+  if (is.na(start)) {
+    return(list())
+  }
+  seq_by2 <- seq(start, length(args), by = 2)
+  result <- as.list(args[seq_by2 + 1])
+  names(result) <- args[seq_by2]
+  return(result)
+}
+
+argVc <- unlist(parse_args())
+
+##------------------------------
+## Initializing
+##------------------------------
+
+## options
+##--------
+
+strAsFacL <- options()$stringsAsFactors
+options(stringsAsFactors = FALSE)
+
+## constants
+##----------
+
+modNamC <- "mixmodel" ## module name
+
+topEnvC <- environment()
+flagC <- "\n"
+
+## functions
+##----------
+
+flgF <- function(tesC,
+                 envC = topEnvC,
+                 txtC = NA) { ## management of warning and error messages
+
+    tesL <- eval(parse(text = tesC), envir = envC)
+
+    if (!tesL) {
+
+        sink(NULL)
+        stpTxtC <- ifelse(is.na(txtC),
+                          paste0(tesC, " is FALSE"),
+                          txtC)
+
+        stop(stpTxtC,
+             call. = FALSE)
+
+    }
+
+} ## flgF
+
+## log file
+##---------
+
+sink(argVc["information"])
+
+cat("\nStart of the '", modNamC, "' Galaxy module call: ", format(Sys.time(), "%a %d %b %Y %X"), "\n", sep = "")
+cat("\nParameters used:\n\n")
+print(argVc)
+cat("\n\n")
+
+## loading
+##--------
+
+datMN <- t(as.matrix(read.table(argVc["dataMatrix_in"],
+                                check.names = FALSE,
+                                header = TRUE,
+                                row.names = 1,
+                                sep = "\t")))
+
+samDF <- read.table(argVc["sampleMetadata_in"],
+                    check.names = FALSE,
+                    header = TRUE,
+                    row.names = 1,
+                    sep = "\t")
+
+varDF <- read.table(argVc["variableMetadata_in"],
+                    check.names = FALSE,
+                    header = TRUE,
+                    row.names = 1,
+                    sep = "\t")
+
+
+## checking
+##---------
+
+flgF("identical(rownames(datMN), rownames(samDF))", txtC = "Column names of the dataMatrix are not identical to the row names of the sampleMetadata; check your data with the 'Check Format' module in the 'Quality Control' section")
+flgF("identical(colnames(datMN), rownames(varDF))", txtC = "Row names of the dataMatrix are not identical to the row names of the variableMetadata; check your data with the 'Check Format' module in the 'Quality Control' section")
+
+flgF("argVc['time']    %in% colnames(samDF)", txtC = paste0("Required time factor '", argVc["time"], "' could not be found in the column names of the sampleMetadata"))
+flgF("argVc['subject'] %in% colnames(samDF)", txtC = paste0("Required subject factor '", argVc["subject"], "' could not be found in the column names of the sampleMetadata"))
+
+flgF("mode(samDF[, argVc['time']])    %in% c('character', 'numeric')", txtC = paste0("The '", argVc["time"], "' column of the sampleMetadata should contain either number only, or character only"))
+flgF("mode(samDF[, argVc['subject']]) %in% c('character', 'numeric')", txtC = paste0("The '", argVc["subject"], "' column of the sampleMetadata should contain either number only, or character only"))
+
+flgF("argVc['adjC'] %in% c('holm', 'hochberg', 'hommel', 'bonferroni', 'BH', 'BY', 'fdr', 'none')")
+flgF("argVc['trf'] %in% c('none', 'log10', 'log2')")
+
+flgF("0 <= as.numeric(argVc['thrN']) && as.numeric(argVc['thrN']) <= 1", txtC = "(corrected) p-value threshold must be between 0 and 1")
+flgF("argVc['diaR'] %in% c('no', 'yes')")
+
+
+##------------------------------
+## Formating
+##------------------------------
+
+if (argVc["dff"] == "Satt") {
+  dffmeth <- "Satterthwaite"
+} else {
+  dffmeth <- "Kenward-Roger"
+}
+
+
+##------------------------------
+## Computation
+##------------------------------
+
+
+varDFout <- lmixedm(datMN = datMN,
+                     samDF = samDF,
+                     varDF = varDF,
+                     fixfact     = argVc["fixfact"],
+                     time        = argVc["time"],
+                     subject     = argVc["subject"],
+                     logtr       = argVc["trf"],
+                     pvalCutof   = argVc["thrN"],
+                     pvalcorMeth = argVc["adjC"],
+                     dffOption   = dffmeth,
+                     visu        = argVc["diaR"],
+                     least.confounded = FALSE,
+                     outlier.limit = 3,
+                     pdfC        = argVc["out_graph_pdf"],
+                     pdfE        = argVc["out_estim_pdf"]
+                     )
+
+
+
+
+##------------------------------
+## Rounding
+##------------------------------
+
+if (argVc["rounding"] == "yes") {
+  varDFout[, which(!(colnames(varDFout) %in% colnames(varDF)))] <- apply(varDFout[, which(!(colnames(varDFout) %in% colnames(varDF)))], 2, round, digits = as.numeric(argVc["decplaces"]))
+}
+
+##------------------------------
+## Ending
+##------------------------------
+
+
+## saving
+##--------
+
+varDFout <- cbind.data.frame(variableMetadata = rownames(varDFout),
+                          varDFout)
+
+write.table(varDFout,
+            file = argVc["variableMetadata_out"],
+            quote = FALSE,
+            row.names = FALSE,
+            sep = "\t")
+
+## closing
+##--------
+
+cat("\n\nEnd of '", modNamC, "' Galaxy module call: ",
+    as.character(Sys.time()), "\n", sep = "")
+cat("\nInformation about R (version, Operating System, attached or loaded packages):\n\n")
+sessionInfo()
+
+sink()
+
+options(stringsAsFactors = strAsFacL)
+
+rm(list = ls())
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/OneFactor_dataMatrix.tsv	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,23 @@
+variableMetadata	s1D2	s2D2	s3D2	s1D3	s2D3	s3D3	s1D4	s2D4	s3D4	s1D5	s2D5	s3D5	s1D6	s2D6	s3D6
+V1	1.06E+06	1.07E+06	8.93E+05	9.11E+05	8.07E+05	6.90E+05	7.07E+05	7.66E+05	6.82E+05	6.25E+05	7.25E+05	6.78E+05	7.59E+05	6.70E+05	7.41E+05
+V2	4.67E+06	4.78E+06	4.17E+06	4.84E+06	4.50E+06	4.41E+06	6.96E+06	7.05E+06	6.65E+06	4.12E+06	4.42E+06	4.39E+06	1.26E+07	1.28E+07	1.32E+07
+V3	9.03E+05	8.44E+05	7.31E+05	9.97E+05	7.97E+05	7.47E+05	7.18E+05	8.28E+05	7.81E+05	9.08E+05	8.35E+05	8.37E+05	1.12E+06	1.11E+06	1.16E+06
+V4	1.62E+05	1.27E+05	1.21E+05	1.41E+05	8.48E+04	1.06E+05	9.72E+04	1.52E+05	1.27E+05	1.29E+05	1.42E+05	1.17E+05	1.72E+05	1.67E+05	1.87E+05
+V5	5.22E+04	6.20E+04	5.55E+04	4.78E+04	3.41E+04	5.84E+04	6.30E+04	1.11E+05	1.16E+05	4.59E+04	3.34E+04	4.08E+04	9.14E+04	6.78E+04	1.08E+05
+V6	1.68E+05	1.83E+05	2.76E+05	2.30E+05	1.62E+05	3.13E+05	4.47E+05	5.34E+05	6.59E+05	2.38E+05	2.30E+05	1.81E+05	4.93E+05	3.83E+05	6.30E+05
+V7	8.85E+06	8.39E+06	8.56E+06	5.51E+06	5.07E+06	4.90E+06	2.90E+06	3.10E+06	3.01E+06	2.36E+06	2.40E+06	2.30E+06	5.06E+06	4.94E+06	4.93E+06
+V8	2.53E+05	2.26E+05	1.90E+05	2.75E+05	2.67E+05	2.22E+05	2.12E+05	2.43E+05	1.72E+05	1.51E+05	1.56E+05	1.50E+05	2.49E+05	2.22E+05	2.48E+05
+V9	7.91E+05	7.16E+05	6.57E+05	6.08E+05	4.62E+05	4.88E+05	5.49E+05	6.13E+05	5.54E+05	7.56E+05	7.81E+05	7.59E+05	1.47E+06	1.46E+06	1.60E+06
+V10	1.76E+04	1.79E+04	1.86E+04	4.00E+04	4.27E+04	3.30E+04	5.34E+04	6.26E+04	6.09E+04	1.04E+05	8.24E+04	7.16E+04	2.09E+05	2.07E+05	2.19E+05
+V11	1.03E+05	9.28E+04	9.34E+04	8.70E+04	5.73E+04	8.05E+04	1.82E+06	1.98E+06	1.76E+06	8.41E+04	7.80E+04	7.31E+04	5.62E+04	5.01E+04	7.48E+04
+V12	4.48E+04	6.71E+04	4.27E+04	3.54E+04	3.04E+04	2.73E+04	2.47E+04	2.85E+04	3.26E+04	2.11E+04	1.55E+04	2.71E+04	3.44E+04	4.14E+04	5.87E+04
+V13	4.48E+04	6.71E+04	4.27E+04	3.54E+04	3.04E+04	2.73E+04	2.47E+04	2.85E+04	3.26E+04	2.11E+04	1.55E+04	2.71E+04	3.44E+04	4.14E+04	5.87E+04
+V14	1.16E+04	6.64E+03	1.29E+04	2.59E+04	1.55E+04	2.21E+04	3.15E+04	4.05E+04	3.10E+04	4.37E+04	2.90E+04	2.67E+04	6.62E+04	1.00E+05	8.75E+04
+V15	3.04E+05	3.05E+05	2.69E+05	2.57E+05	1.99E+05	2.16E+05	1.97E+05	1.97E+05	2.03E+05	2.65E+05	2.71E+05	2.60E+05	1.67E+05	1.89E+05	1.99E+05
+V16	2.69E+05	2.45E+05	2.18E+05	1.23E+05	1.27E+05	1.04E+05	2.91E+05	2.82E+05	2.79E+05	2.37E+05	2.11E+05	2.18E+05	5.60E+05	5.89E+05	6.08E+05
+V17	9.30E+04	8.55E+04	7.01E+04	4.84E+04	4.06E+04	3.14E+04	8.77E+04	1.03E+05	9.48E+04	9.88E+04	6.91E+04	7.81E+04	3.19E+05	3.25E+05	3.76E+05
+V18	2.30E+04	3.87E+04	2.01E+04	9.56E+03	1.27E+04	1.69E+04	2.38E+04	2.66E+04	2.99E+04	5.98E+04	4.93E+04	5.13E+04	1.08E+05	1.30E+05	1.36E+05
+V19	6.25E+04	7.43E+04	4.71E+04	1.62E+04	1.05E+04	1.57E+04	3.73E+04	3.08E+04	3.67E+04	3.50E+04	3.90E+04	2.78E+04	4.64E+05	1.60E+05	1.75E+05
+V20	1.72E+04	1.58E+04	1.35E+04	1.20E+04	1.02E+04	8.85E+03	1.35E+04	1.71E+04	1.62E+04	1.64E+04	8.41E+03	6.02E+03	5.07E+04	7.33E+04	6.57E+04
+V21	1.86E+06	1.90E+06	1.90E+06	2.05E+06	2.06E+06	2.07E+06	1.97E+06	1.94E+06	1.90E+06	1.64E+06	1.67E+06	1.78E+06	1.65E+06	1.73E+06	1.77E+06
+V22	1.52E+04	8.17E+03	1.37E+04	1.39E+04	1.34E+04	1.36E+04	1.40E+04	1.67E+04	1.60E+04	1.33E+04	1.69E+04	1.20E+04	1.05E+04	1.15E+04	1.11E+04
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/OneFactor_sampleMetadata.tsv	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,16 @@
+sampleMetadata	time	subject
+s1D2	D2	s1
+s2D2	D2	s2
+s3D2	D2	s3
+s1D3	D3	s1
+s2D3	D3	s2
+s3D3	D3	s3
+s1D4	D4	s1
+s2D4	D4	s2
+s3D4	D4	s3
+s1D5	D5	s1
+s2D5	D5	s2
+s3D5	D5	s3
+s1D6	D6	s1
+s2D6	D6	s2
+s3D6	D6	s3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/OneFactor_variableMetadata.tsv	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,23 @@
+variableMetadata	name
+V1	name1
+V2	name2
+V3	name3
+V4	name4
+V5	name5
+V6	name6
+V7	name7
+V8	name8
+V9	name9
+V10	name10
+V11	name11
+V12	name12
+V13	name13
+V14	name14
+V15	name15
+V16	name16
+V17	name17
+V18	name18
+V19	name19
+V20	name20
+V21	name21
+V22	name22
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TwoFactor_dataMatrix.txt	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,6 @@
+variable	S01T0	S01T1	S01T2	S01T3	S02T0	S02T1	S02T2	S02T3	S03T0	S03T1	S03T2	S03T3	S04T0	S04T1	S04T2	S04T3	S05T0	S05T1	S05T2	S05T3	S06T0	S06T1	S06T2	S06T3	S07T0	S07T1	S07T2	S07T3	S08T0	S08T1	S08T2	S08T3	S09T0	S09T1	S09T2	S09T3	S10T0	S10T1	S10T2	S10T3	S11T0	S11T1	S11T2	S11T3	S12T0	S12T1	S12T2	S12T3	S13T0	S13T1	S13T2	S13T3	S14T0	S14T1	S14T2	S14T3	S15T0	S15T1	S15T2	S15T3	S16T0	S16T1	S16T2	S16T3	S17T0	S17T1	S17T2	S17T3	S18T0	S18T1	S18T2	S18T3
+v0.4425_385	126.39	234.11	162.68	115.29	172.44	171.53	180.31	114.94	224.14	195.09	189.45	122.79	212.02	243.48	188.42	138.91	241.59	192.95	214.74	98.69	270.03	176.79	208.73	78.04	247.82	176.51	175.78	98.94	260.63	181.93	178.06	111.58	218.95	176.24	200.29	112.77	140.23	157.38	124.62	114.34	147.23	165.29	125.85	128.41	126.35	139.75	127.95	74.86	124.92	155.12	109.4	86.88	145.57	142	135.29	83.08	116.95	142.28	113.43	118.26	80.81	146.03	130.52	103.4	79.58	168	109.35	88.19	138.12	166.61	115.05	83.05
+v0.4498_625	108.9	133.53	128.66	205.38	121.48	112.23	142.97	199.35	127.44	102.45	173.31	252.14	123.96	96.93	118.7	234.54	146.33	109.1	178.69	233.61	154.27	138.81	157.97	213.18	176.06	108.79	142.11	116.14	138.78	131.42	172.65	214.14	159.19	102.17	137.67	211.04	120.1	69.85	65.75	92.68	121.22	70.04	76.06	91.89	81.18	73.14	80.93	92.69	110.68	66.84	79.72	92.89	93.5	62.98	87.39	98.32	84.59	65.18	99.23	36.94	85.1	80.49	95.17	115.22	113.47	65.32	53.29	113.25	106.68	74.04	66.5	88.96
+v0.4711_463	124.1	166.84	156.18	82.57	158.4	190.09	152.89	100.76	155.75	174.85	153.14	112.76	171.54	150.37	163.49	116.06	198.78	161.53	247.61	122.94	167.38	186.12	194.59	93.05	208.24	150.15	140.97	55.62	200.32	138.59	145.99	89.44	192.49	131.44	145.89	93.38	102.94	67.11	83.99	86.86	110.09	98.61	77.91	100.75	136.32	92.56	86.79	75.62	144.39	65.27	110.24	94.26	136.48	77.62	95.43	60.3	135.98	87.54	65.37	92.04	112.09	58.9	92.35	72.92	132.84	97.12	80.71	79.01	144.75	85.78	73.61	83.68
+v0.4715_659	688.13	739.41	529.62	354.46	828.48	660.76	545.32	318.51	798.21	714.6	617.95	400.94	815.2	696.85	611.31	354.28	909.08	792.05	650.85	386.48	897.32	729.1	638.99	420.19	967.92	765.14	579.41	369.82	929.8	759.62	617.5	339.81	989.48	781.37	606.77	386.24	289.67	359.43	240.13	294.74	314.71	338.43	203.29	264.08	349.01	365.76	245.06	283.41	327.8	354.82	228.03	263.83	300.57	362.51	242.41	282.21	280.17	344.99	253.88	273.4	304.37	325.89	292.93	292.98	317.8	318.16	205.93	245.92	310.23	330.51	230.27	270.3
+v0.4766_757	117.8	110.31	114.56	99.99	144.39	155.33	114.47	122	147.16	132.82	129.78	94.86	128.26	161.38	119.12	96.73	177.26	134.94	149.39	134.12	163.22	146.12	148.26	108.68	159.39	140.75	119.11	65.83	170.45	131.79	163.58	104.63	167.33	112.88	142.58	85.38	80.53	84.3	42.33	62.04	73.74	97.78	58	79.33	91.7	82.39	69.71	83.58	77.86	65.56	74.11	82.9	90.53	75.27	80.46	81.88	92.77	52.94	67.99	84.21	91.27	69.54	77.13	85.27	93.18	83.68	57.01	65.09	92.12	82.63	70.9	75.53
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TwoFactor_sampleMetadata.txt	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,73 @@
+sample	sujet	phenotype	time
+S01T0	S01	WT	T0
+S01T1	S01	WT	T1
+S01T2	S01	WT	T2
+S01T3	S01	WT	T3
+S02T0	S02	WT	T0
+S02T1	S02	WT	T1
+S02T2	S02	WT	T2
+S02T3	S02	WT	T3
+S03T0	S03	WT	T0
+S03T1	S03	WT	T1
+S03T2	S03	WT	T2
+S03T3	S03	WT	T3
+S04T0	S04	WT	T0
+S04T1	S04	WT	T1
+S04T2	S04	WT	T2
+S04T3	S04	WT	T3
+S05T0	S05	WT	T0
+S05T1	S05	WT	T1
+S05T2	S05	WT	T2
+S05T3	S05	WT	T3
+S06T0	S06	WT	T0
+S06T1	S06	WT	T1
+S06T2	S06	WT	T2
+S06T3	S06	WT	T3
+S07T0	S07	WT	T0
+S07T1	S07	WT	T1
+S07T2	S07	WT	T2
+S07T3	S07	WT	T3
+S08T0	S08	WT	T0
+S08T1	S08	WT	T1
+S08T2	S08	WT	T2
+S08T3	S08	WT	T3
+S09T0	S09	WT	T0
+S09T1	S09	WT	T1
+S09T2	S09	WT	T2
+S09T3	S09	WT	T3
+S10T0	S10	MT	T0
+S10T1	S10	MT	T1
+S10T2	S10	MT	T2
+S10T3	S10	MT	T3
+S11T0	S11	MT	T0
+S11T1	S11	MT	T1
+S11T2	S11	MT	T2
+S11T3	S11	MT	T3
+S12T0	S12	MT	T0
+S12T1	S12	MT	T1
+S12T2	S12	MT	T2
+S12T3	S12	MT	T3
+S13T0	S13	MT	T0
+S13T1	S13	MT	T1
+S13T2	S13	MT	T2
+S13T3	S13	MT	T3
+S14T0	S14	MT	T0
+S14T1	S14	MT	T1
+S14T2	S14	MT	T2
+S14T3	S14	MT	T3
+S15T0	S15	MT	T0
+S15T1	S15	MT	T1
+S15T2	S15	MT	T2
+S15T3	S15	MT	T3
+S16T0	S16	MT	T0
+S16T1	S16	MT	T1
+S16T2	S16	MT	T2
+S16T3	S16	MT	T3
+S17T0	S17	MT	T0
+S17T1	S17	MT	T1
+S17T2	S17	MT	T2
+S17T3	S17	MT	T3
+S18T0	S18	MT	T0
+S18T1	S18	MT	T1
+S18T2	S18	MT	T2
+S18T3	S18	MT	T3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/TwoFactor_variableMetadata.txt	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,6 @@
+variable	num
+v0.4425_385	1
+v0.4498_625	2
+v0.4711_463	3
+v0.4715_659	4
+v0.4766_757	5
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mixmodel_TwoFactor_variableMetadata.txt	Mon May 16 09:25:01 2022 +0000
@@ -0,0 +1,6 @@
+variableMetadata	num	Shapiro.pvalue.residuals	Subject.Variance	REML	pval_time	pval_trt	pval_inter	pvalue.timeT0...timeT1	pvalue.timeT0...timeT2	pvalue.timeT0...timeT3	pvalue.timeT1...timeT2	pvalue.timeT1...timeT3	pvalue.timeT2...timeT3	pvalue.fixfactMT...fixfactWT	pvalue..timeT0.fixfactMT...timeT1.fixfactMT	pvalue..timeT0.fixfactMT...timeT2.fixfactMT	pvalue..timeT0.fixfactMT...timeT3.fixfactMT	pvalue..timeT0.fixfactMT...timeT0.fixfactWT	pvalue..timeT0.fixfactMT...timeT1.fixfactWT	pvalue..timeT0.fixfactMT...timeT2.fixfactWT	pvalue..timeT0.fixfactMT...timeT3.fixfactWT	pvalue..timeT1.fixfactMT...timeT2.fixfactMT	pvalue..timeT1.fixfactMT...timeT3.fixfactMT	pvalue..timeT1.fixfactMT...timeT0.fixfactWT	pvalue..timeT1.fixfactMT...timeT1.fixfactWT	pvalue..timeT1.fixfactMT...timeT2.fixfactWT	pvalue..timeT1.fixfactMT...timeT3.fixfactWT	pvalue..timeT2.fixfactMT...timeT3.fixfactMT	pvalue..timeT2.fixfactMT...timeT0.fixfactWT	pvalue..timeT2.fixfactMT...timeT1.fixfactWT	pvalue..timeT2.fixfactMT...timeT2.fixfactWT	pvalue..timeT2.fixfactMT...timeT3.fixfactWT	pvalue..timeT3.fixfactMT...timeT0.fixfactWT	pvalue..timeT3.fixfactMT...timeT1.fixfactWT	pvalue..timeT3.fixfactMT...timeT2.fixfactWT	pvalue..timeT3.fixfactMT...timeT3.fixfactWT	pvalue..timeT0.fixfactWT...timeT1.fixfactWT	pvalue..timeT0.fixfactWT...timeT2.fixfactWT	pvalue..timeT0.fixfactWT...timeT3.fixfactWT	pvalue..timeT1.fixfactWT...timeT2.fixfactWT	pvalue..timeT1.fixfactWT...timeT3.fixfactWT	pvalue..timeT2.fixfactWT...timeT3.fixfactWT	estimate.timeT0...timeT1	estimate.timeT0...timeT2	estimate.timeT0...timeT3	estimate.timeT1...timeT2	estimate.timeT1...timeT3	estimate.timeT2...timeT3	estimate.fixfactMT...fixfactWT	estimate..timeT0.fixfactMT...timeT1.fixfactMT	estimate..timeT0.fixfactMT...timeT2.fixfactMT	estimate..timeT0.fixfactMT...timeT3.fixfactMT	estimate..timeT0.fixfactMT...timeT0.fixfactWT	estimate..timeT0.fixfactMT...timeT1.fixfactWT	estimate..timeT0.fixfactMT...timeT2.fixfactWT	estimate..timeT0.fixfactMT...timeT3.fixfactWT	estimate..timeT1.fixfactMT...timeT2.fixfactMT	estimate..timeT1.fixfactMT...timeT3.fixfactMT	estimate..timeT1.fixfactMT...timeT0.fixfactWT	estimate..timeT1.fixfactMT...timeT1.fixfactWT	estimate..timeT1.fixfactMT...timeT2.fixfactWT	estimate..timeT1.fixfactMT...timeT3.fixfactWT	estimate..timeT2.fixfactMT...timeT3.fixfactMT	estimate..timeT2.fixfactMT...timeT0.fixfactWT	estimate..timeT2.fixfactMT...timeT1.fixfactWT	estimate..timeT2.fixfactMT...timeT2.fixfactWT	estimate..timeT2.fixfactMT...timeT3.fixfactWT	estimate..timeT3.fixfactMT...timeT0.fixfactWT	estimate..timeT3.fixfactMT...timeT1.fixfactWT	estimate..timeT3.fixfactMT...timeT2.fixfactWT	estimate..timeT3.fixfactMT...timeT3.fixfactWT	estimate..timeT0.fixfactWT...timeT1.fixfactWT	estimate..timeT0.fixfactWT...timeT2.fixfactWT	estimate..timeT0.fixfactWT...timeT3.fixfactWT	estimate..timeT1.fixfactWT...timeT2.fixfactWT	estimate..timeT1.fixfactWT...timeT3.fixfactWT	estimate..timeT2.fixfactWT...timeT3.fixfactWT
+v0.4425_385	1	0.004001	0	-140.931331	0	0	0.000264	0.191909	0.277993	0	0.018695	0	0	0	0.001835	0.869646	0.006728	0	0	0	0.238471	0.002992	0	0.000044	0.003836	0.009486	0.000036	0.004238	0	0	0	0.180279	0	0	0	0.112058	0.170546	0.091717	0	0.745427	0	0	-0.030835	0.02558	0.192247	0.056415	0.223081	0.166666	-0.148875	-0.107496	-0.005448	0.092614	-0.252536	-0.206709	-0.195927	0.039343	0.102048	0.200111	-0.14504	-0.099213	-0.088431	0.146839	0.098062	-0.247088	-0.201261	-0.19048	0.044791	-0.34515	-0.299323	-0.288542	-0.053272	0.045827	0.056608	0.291879	0.010781	0.246052	0.23527
+v0.4498_625	2	0.000002	0	-120.084702	0	0	0.000801	0.000045	0.135374	0.047823	0.005607	0	0.000776	0	0.000128	0.004174	0.135488	0.000855	0.170794	0.000053	0	0.272071	0.012599	0	0.000001	0	0	0.149351	0	0.000049	0	0	0.000005	0.00515	0	0	0.038427	0.408218	0.000047	0.00448	0	0.000769	0.120489	0.041611	-0.055515	-0.078879	-0.176005	-0.097126	-0.249448	0.158724	0.115616	0.058829	-0.136156	-0.053901	-0.16855	-0.306015	-0.043108	-0.099895	-0.29488	-0.212625	-0.327274	-0.46474	-0.056787	-0.251772	-0.169517	-0.284166	-0.421632	-0.194985	-0.11273	-0.227379	-0.364845	0.082255	-0.032394	-0.16986	-0.114649	-0.252114	-0.137466
+v0.4711_463	3	0.474695	0.000093	-135.57425	0	0	0.000006	0.000013	0.000128	0	0.488485	0.000043	0.000004	0	0	0.000004	0.000001	0.000295	0.006152	0.002433	0.000335	0.51173	0.755875	0	0	0	0.046325	0.729076	0	0	0	0.173445	0	0	0	0.089918	0.319884	0.50045	0	0.745698	0	0	0.11757	0.100696	0.22644	-0.016874	0.108869	0.125743	-0.194248	0.200783	0.178185	0.190094	-0.13207	-0.097712	-0.108863	0.130715	-0.022598	-0.010689	-0.332853	-0.298495	-0.309645	-0.070068	0.011909	-0.310255	-0.275897	-0.287048	-0.04747	-0.322164	-0.287806	-0.298957	-0.059379	0.034358	0.023208	0.262785	-0.01115	0.228427	0.239578
+v0.4715_659	4	0.623926	0.000349	-238.084202	0	0	0	0.219999	0	0	0	0	0	0	0.001773	0	0.000326	0	0	0	0.000025	0	0	0	0	0	0.070602	0.000027	0	0	0	0	0	0	0	0	0.000006	0	0	0	0	0	0.012039	0.138015	0.211119	0.125976	0.19908	0.073104	-0.326171	-0.045354	0.116577	0.053052	-0.44462	-0.375188	-0.285168	-0.075434	0.161931	0.098406	-0.399266	-0.329834	-0.239814	-0.03008	-0.063525	-0.561197	-0.491765	-0.401744	-0.192011	-0.497672	-0.42824	-0.33822	-0.128486	0.069432	0.159453	0.369186	0.09002	0.299754	0.209734
+v0.4766_757	5	0.005552	0.000365	-149.179654	0.00001	0	0.00028	0.015826	0.000092	0.000002	0.083272	0.004665	0.23638	0	0.064552	0.000169	0.102895	0	0	0	0.062636	0.033569	0.819707	0	0	0	0.000447	0.019489	0	0	0	0	0	0	0	0.000898	0.106307	0.055879	0	0.755242	0.000055	0.000152	0.052746	0.090042	0.115324	0.037296	0.062579	0.025283	-0.224878	0.056412	0.121655	0.049578	-0.240112	-0.191032	-0.181683	-0.059041	0.065243	-0.006834	-0.296524	-0.247444	-0.238095	-0.115453	-0.072077	-0.361767	-0.312687	-0.303338	-0.180696	-0.28969	-0.24061	-0.231262	-0.10862	0.04908	0.058428	0.181071	0.009349	0.131991	0.122642