Mercurial > repos > iuc > limma_voom

--- a/limma_voom.R	Tue Mar 01 08:03:53 2022 +0000
+++ b/limma_voom.R	Fri Feb 09 17:06:25 2024 +0000
@@ -46,7 +46,21 @@
 # Modified by: Maria Doyle - Jun 2017, Jan 2018, May 2018

 # Record starting time
-time_start <- as.character(Sys.time())
+time_start <- Sys.time()
+
+# Setup R error handling to go to stderr
+options(
+    show.error.messages = FALSE,
+    error = function() {
+        cat(geterrmessage(), file = stderr())
+        q("no", 1, FALSE)
+    }
+)
+
+# Unify locale settings
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+warnings()

 # Load all required libraries
 library(methods, quietly = TRUE, warn.conflicts = FALSE)
@@ -106,19 +120,19 @@
 # Create cata function: default path set, default seperator empty and appending
 # true by default (Ripped straight from the cat function with altered argument
 # defaults)
-cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE, labels = NULL,
-               append = TRUE) {
-    if (is.character(file))
-        if (file == "")
+cata <- function(..., file = opt$htmlPath, sep = "", fill = FALSE,
+                 labels = NULL, append = TRUE) {
+    if (is.character(file)) {
+        if (file == "") {
             file <- stdout()
-        else if (substring(file, 1L, 1L) == "|") {
+        } else if (substring(file, 1L, 1L) == "|") {
             file <- pipe(substring(file, 2L), "w")
             on.exit(close(file))
+        } else {
+            file <- file(file, ifelse(append, "a", "w"))
+            on.exit(close(file))
         }
-        else {
-        file <- file(file, ifelse(append, "a", "w"))
-      on.exit(close(file))
-        }
+    }
     .Internal(cat(list(...), file, sep, fill, labels, append))
 }

@@ -162,39 +176,42 @@

 # Get options, using the spec as defined by the enclosed list.
 # Read the options from the default: commandArgs(TRUE).
-spec <- matrix(c(
-    "htmlPath", "R", 1, "character",
-    "outPath", "o", 1, "character",
-    "filesPath", "j", 2, "character",
-    "matrixPath", "m", 2, "character",
-    "factFile", "f", 2, "character",
-    "factInput", "i", 2, "character",
-    "annoPath", "a", 2, "character",
-    "contrastFile", "C", 1, "character",
-    "contrastInput", "D", 1, "character",
-    "cpmReq", "c", 1, "double",
-    "totReq", "y", 0, "logical",
-    "cntReq", "z", 1, "integer",
-    "sampleReq", "s", 1, "integer",
-    "filtCounts", "F", 0, "logical",
-    "normCounts", "x", 0, "logical",
-    "rdaOpt", "r", 0, "logical",
-    "lfcReq", "l", 1, "double",
-    "pValReq", "p", 1, "double",
-    "pAdjOpt", "d", 1, "character",
-    "normOpt", "n", 1, "character",
-    "robOpt", "b", 0, "logical",
-    "trend", "t", 1, "double",
-    "weightOpt", "w", 0, "logical",
-    "topgenes", "G", 1, "integer",
-    "treatOpt", "T", 0, "logical",
-    "plots", "P", 1, "character",
-    "libinfoOpt", "L", 0, "logical"),
-    byrow = TRUE, ncol = 4)
+spec <- matrix(
+    c(
+        "htmlPath", "R", 1, "character",
+        "outPath", "o", 1, "character",
+        "filesPath", "j", 2, "character",
+        "matrixPath", "m", 2, "character",
+        "factFile", "f", 2, "character",
+        "factInput", "i", 2, "character",
+        "annoPath", "a", 2, "character",
+        "contrastFile", "C", 1, "character",
+        "contrastInput", "D", 1, "character",
+        "cpmReq", "c", 1, "double",
+        "totReq", "y", 0, "logical",
+        "cntReq", "z", 1, "integer",
+        "sampleReq", "s", 1, "integer",
+        "filtCounts", "F", 0, "logical",
+        "normCounts", "x", 0, "logical",
+        "rdaOpt", "r", 0, "logical",
+        "lfcReq", "l", 1, "double",
+        "pValReq", "p", 1, "double",
+        "pAdjOpt", "d", 1, "character",
+        "normOpt", "n", 1, "character",
+        "robOpt", "b", 0, "logical",
+        "trend", "t", 1, "double",
+        "weightOpt", "w", 0, "logical",
+        "topgenes", "G", 1, "integer",
+        "treatOpt", "T", 0, "logical",
+        "plots", "P", 1, "character",
+        "libinfoOpt", "L", 0, "logical"
+    ),
+    byrow = TRUE, ncol = 4
+)
 opt <- getopt(spec)


-if (is.null(opt$matrixPath) & is.null(opt$filesPath)) {
+if (is.null(opt$matrixPath) && is.null(opt$filesPath)) {
     cat("A counts matrix (or a set of counts files) is required.\n")
     q(status = 1)
 }
@@ -283,10 +300,12 @@
     factor_list <- parser$getObject()
     factors <- sapply(factor_list, function(x) x[[1]])
     filenames_in <- unname(unlist(factor_list[[1]][[2]]))
-    sampletable <- data.frame(sample = basename(filenames_in),
-                            filename = filenames_in,
-                            row.names = filenames_in,
-                            stringsAsFactors = FALSE)
+    sampletable <- data.frame(
+        sample = basename(filenames_in),
+        filename = filenames_in,
+        row.names = filenames_in,
+        stringsAsFactors = FALSE
+    )
     for (factor in factor_list) {
         factorname <- factor[[1]]
         sampletable[[factorname]] <- character(nrow(sampletable))
@@ -301,12 +320,11 @@
     rem <- c("sample", "filename")
     factors <- sampletable[, !(names(sampletable) %in% rem), drop = FALSE]

-    #read in count files and create single table
+    # read in count files and create single table
     countfiles <- lapply(sampletable$filename, function(x) {
         read.delim(x, row.names = 1)
-        })
+    })
     counts <- do.call("cbind", countfiles)
-
 } else {
     # Process the single count matrix
     counts <- read.table(opt$matrixPath, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = FALSE, check.names = FALSE)
@@ -317,12 +335,13 @@
     # Process factors
     if (is.null(opt$factInput)) {
         factordata <- read.table(opt$factFile, header = TRUE, sep = "\t", strip.white = TRUE, stringsAsFactors = TRUE)
-        if (!setequal(factordata[, 1], colnames(counts)))
+        if (!setequal(factordata[, 1], colnames(counts))) {
             stop("Sample IDs in counts and factors files don't match")
+        }
         # order samples as in counts matrix
         factordata <- factordata[match(colnames(counts), factordata[, 1]), ]
         factors <- factordata[, -1, drop = FALSE]
-    }  else {
+    } else {
         factors <- unlist(strsplit(opt$factInput, "|", fixed = TRUE))
         factordata <- list()
         for (fact in factors) {
@@ -347,19 +366,19 @@
 factors <- sapply(factors, make.names)
 factors <- data.frame(factors, stringsAsFactors = TRUE)

- # if annotation file provided
+# if annotation file provided
 if (have_anno) {
     geneanno <- read.table(opt$annoPath, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE)
 }

-#Create output directory
+# Create output directory
 dir.create(opt$outPath, showWarnings = FALSE)

 # Process contrasts
 if (is.null(opt$contrastInput)) {
     contrast_data <- read.table(opt$contrastFile, header = TRUE, sep = "\t", quote = "", strip.white = TRUE, stringsAsFactors = FALSE)
     contrast_data <- contrast_data[, 1, drop = TRUE]
-}  else {
+} else {
     # Split up contrasts seperated by comma into a vector then sanitise
     contrast_data <- unlist(strsplit(opt$contrastInput, split = ","))
 }
@@ -370,15 +389,14 @@
 cons <- NULL
 cons_d <- NULL
 for (i in contrast_data) {
-
     # if the contrast is a difference of differences e.g. (A-B)-(X-Y)
     if (grepl("\\)-\\(", i)) {
         i <- unlist(strsplit(i, split = "\\)-\\("))
         i <- gsub("\\(|\\)", "", i)
         for (j in i) {
-           j <- sanitise_contrast(j)
-           j <- paste0("(", j, ")")
-           cons_d  <- append(cons_d, unlist(j))
+            j <- sanitise_contrast(j)
+            j <- paste0("(", j, ")")
+            cons_d <- append(cons_d, unlist(j))
         }
         cons_d <- paste(cons_d, collapse = "-")
         cons <- append(cons, unlist(cons_d))
@@ -428,10 +446,14 @@

 # Initialise data for html links and images, data frame with columns Label and
 # Link
-link_data <- data.frame(Label = character(), Link = character(),
-                       stringsAsFactors = FALSE)
-image_data <- data.frame(Label = character(), Link = character(),
-                        stringsAsFactors = FALSE)
+link_data <- data.frame(
+    Label = character(), Link = character(),
+    stringsAsFactors = FALSE
+)
+image_data <- data.frame(
+    Label = character(), Link = character(),
+    stringsAsFactors = FALSE
+)

 # Initialise vectors for storage of up/down/neutral regulated counts
 up_count <- numeric()
@@ -447,11 +469,11 @@
 data <- list()
 data$counts <- counts
 if (have_anno) {
-  # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
-  annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ]
-  data$genes <- annoord
+    # order annotation by genes in counts (assumes gene ids are in 1st column of geneanno)
+    annoord <- geneanno[match(row.names(counts), geneanno[, 1]), ]
+    data$genes <- annoord
 } else {
-  data$genes <- data.frame(GeneID = row.names(counts))
+    data$genes <- data.frame(GeneID = row.names(counts))
 }

 # Creating naming data
@@ -461,7 +483,7 @@

 # Creating colours for the groups
 cols <- as.numeric(factors[, 1])
-col.group <- palette()[cols]
+col_group <- palette()[cols]

 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
 # samples. Default is no filtering
@@ -469,7 +491,6 @@
 nsamples <- ncol(data$counts)

 if (filt_cpm || filt_smpcount || filt_totcount) {
-
     if (filt_totcount) {
         keep <- rowSums(data$counts) >= opt$cntReq
     } else if (filt_smpcount) {
@@ -513,22 +534,22 @@

         # before filtering
         lcpm1 <- cpm(counts, log = TRUE)
-        plot(density(lcpm1[, 1]), col = col.group[1], lwd = 2, las = 2, main = "", xlab = "")
+        plot(density(lcpm1[, 1]), col = col_group[1], lwd = 2, las = 2, main = "", xlab = "")
         title(main = "Density Plot: Raw counts", xlab = "Log-cpm")
         for (i in 2:nsamples) {
             den <- density(lcpm1[, i])
-            lines(den$x, den$y, col = col.group[i], lwd = 2)
+            lines(den$x, den$y, col = col_group[i], lwd = 2)
         }

         # after filtering
         lcpm2 <- cpm(data$counts, log = TRUE)
-        plot(density(lcpm2[, 1]), col = col.group[1], lwd = 2, las = 2, main = "", xlab = "")
+        plot(density(lcpm2[, 1]), col = col_group[1], lwd = 2, las = 2, main = "", xlab = "")
         title(main = "Density Plot: Filtered counts", xlab = "Log-cpm")
         for (i in 2:nsamples) {
             den <- density(lcpm2[, i])
-            lines(den$x, den$y, col = col.group[i], lwd = 2)
+            lines(den$x, den$y, col = col_group[i], lwd = 2)
         }
-        legend("topright", samplenames, text.col = col.group, bty = "n")
+        legend("topright", samplenames, text.col = col_group, bty = "n")
         img_name <- "Densityplots.png"
         img_addr <- "densityplots.png"
         image_data <- rbind(image_data, data.frame(Label = img_name, Link = img_addr, stringsAsFactors = FALSE))
@@ -537,19 +558,19 @@
         # PDF
         pdf(den_pdf, width = 14)
         par(mfrow = c(1, 2), cex.axis = 0.8)
-        plot(density(lcpm1[, 1]), col = col.group[1], lwd = 2, las = 2, main = "", xlab = "")
+        plot(density(lcpm1[, 1]), col = col_group[1], lwd = 2, las = 2, main = "", xlab = "")
         title(main = "Density Plot: Raw counts", xlab = "Log-cpm")
         for (i in 2:nsamples) {
             den <- density(lcpm1[, i])
-            lines(den$x, den$y, col = col.group[i], lwd = 2)
+            lines(den$x, den$y, col = col_group[i], lwd = 2)
         }
-        plot(density(lcpm2[, 1]), col = col.group[1], lwd = 2, las = 2, main = "", xlab = "")
+        plot(density(lcpm2[, 1]), col = col_group[1], lwd = 2, las = 2, main = "", xlab = "")
         title(main = "Density Plot: Filtered counts", xlab = "Log-cpm")
         for (i in 2:nsamples) {
             den <- density(lcpm2[, i])
-            lines(den$x, den$y, col = col.group[i], lwd = 2)
+            lines(den$x, den$y, col = col_group[i], lwd = 2)
         }
-        legend("topright", samplenames, text.col = col.group, bty = "n")
+        legend("topright", samplenames, text.col = col_group, bty = "n")
         link_name <- "DensityPlots.pdf"
         link_addr <- "densityplots.pdf"
         link_data <- rbind(link_data, data.frame(Label = link_name, Link = link_addr, stringsAsFactors = FALSE))
@@ -582,7 +603,7 @@

 # Calculating normalising factors
 print("Calculating Normalisation Factors")
-logcounts <- y #store for plots
+logcounts <- y # store for plots
 y <- calcNormFactors(y, method = opt$normOpt)

 # Generate contrasts information
@@ -594,20 +615,20 @@
 ################################################################################

 # Plot Box plots (before and after normalisation)
-if (opt$normOpt != "none" & "b" %in% plots) {
+if (opt$normOpt != "none" && "b" %in% plots) {
     png(box_png, width = 1000, height = 500)
     par(mfrow = c(1, 2), mar = c(6, 4, 2, 2) + 0.1)
     labels <- colnames(counts)

     lcpm1 <- cpm(y$counts, log = TRUE)
-    boxplot(lcpm1, las = 2, col = col.group, xaxt = "n", xlab = "")
+    boxplot(lcpm1, las = 2, col = col_group, xaxt = "n", xlab = "")
     axis(1, at = seq_along(labels), labels = FALSE)
     abline(h = median(lcpm1), col = 4)
     text(x = seq_along(labels), y = par("usr")[3] - 1, srt = 45, adj = 1, labels = labels, xpd = TRUE)
     title(main = "Box Plot: Unnormalised counts", ylab = "Log-cpm")

     lcpm2 <- cpm(y, log = TRUE)
-    boxplot(lcpm2, las = 2, col = col.group, xaxt = "n",  xlab = "")
+    boxplot(lcpm2, las = 2, col = col_group, xaxt = "n", xlab = "")
     axis(1, at = seq_along(labels), labels = FALSE)
     text(x = seq_along(labels), y = par("usr")[3] - 1, srt = 45, adj = 1, labels = labels, xpd = TRUE)
     abline(h = median(lcpm2), col = 4)
@@ -620,12 +641,12 @@

     pdf(box_pdf, width = 14)
     par(mfrow = c(1, 2), mar = c(6, 4, 2, 2) + 0.1)
-    boxplot(lcpm1, las = 2, col = col.group, xaxt = "n", xlab = "")
+    boxplot(lcpm1, las = 2, col = col_group, xaxt = "n", xlab = "")
     axis(1, at = seq_along(labels), labels = FALSE)
     abline(h = median(lcpm1), col = 4)
     text(x = seq_along(labels), y = par("usr")[3] - 1, srt = 45, adj = 1, labels = labels, xpd = TRUE)
     title(main = "Box Plot: Unnormalised counts", ylab = "Log-cpm")
-    boxplot(lcpm2, las = 2, col = col.group, xaxt = "n",  xlab = "")
+    boxplot(lcpm2, las = 2, col = col_group, xaxt = "n", xlab = "")
     axis(1, at = seq_along(labels), labels = FALSE)
     text(x = seq_along(labels), y = par("usr")[3] - 1, srt = 45, adj = 1, labels = labels, xpd = TRUE)
     abline(h = median(lcpm2), col = 4)
@@ -644,7 +665,7 @@

 # get column of matrix
 get_cols <- function(x, inds) {
-  x[, inds, drop = FALSE]
+    x[, inds, drop = FALSE]
 }

 x <- cpm(y, log = TRUE)
@@ -656,19 +677,19 @@
 bad <- rowSums(is.finite(x)) < nsamples

 if (any(bad)) {
-  warning("Rows containing infinite values have been removed")
-  x <- x[!bad, , drop = FALSE]
+    warning("Rows containing infinite values have been removed")
+    x <- x[!bad, , drop = FALSE]
 }

 dd <- matrix(0, nrow = nsamples, ncol = nsamples, dimnames = list(cn, cn))
 topindex <- nprobes - top + 1L
 for (i in 2L:(nsamples)) {
-  for (j in 1L:(i - 1L)) {
-    dists <- (get_cols(x, i) - get_cols(x, j))^2
-    dists <- sort.int(dists, partial = topindex)
-    topdist <- dists[topindex:nprobes]
-    dd[i, j] <- sqrt(mean(topdist))
-  }
+    for (j in 1L:(i - 1L)) {
+        dists <- (get_cols(x, i) - get_cols(x, j))^2
+        dists <- sort.int(dists, partial = topindex)
+        topdist <- dists[topindex:nprobes]
+        dd[i, j] <- sqrt(mean(topdist))
+    }
 }

 a1 <- suppressWarnings(cmdscale(as.dist(dd), k = min(ndim, 8), eig = TRUE))
@@ -676,7 +697,7 @@
 png(mdsscree_png, width = 1000, height = 500)
 par(mfrow = c(1, 2))
 plotMDS(y, labels = samplenames, col = as.numeric(factors[, 1]), main = "MDS Plot: Dims 1 and 2")
-barplot(eigen$eigen, names.arg = eigen$name,  main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las = 1)
+barplot(eigen$eigen, names.arg = eigen$name, main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las = 1)
 img_name <- paste0("MDSPlot_", names(factors)[1], ".png")
 img_addr <- "mdsscree.png"
 image_data <- rbind(image_data, data.frame(Label = img_name, Link = img_addr, stringsAsFactors = FALSE))
@@ -685,7 +706,7 @@
 pdf(mdsscree_pdf, width = 14)
 par(mfrow = c(1, 2))
 plotMDS(y, labels = samplenames, col = as.numeric(factors[, 1]), main = "MDS Plot: Dims 1 and 2")
-barplot(eigen$eigen, names.arg = eigen$name,  main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las = 1)
+barplot(eigen$eigen, names.arg = eigen$name, main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las = 1)
 link_name <- paste0("MDSPlot_", names(factors)[1], ".pdf")
 link_addr <- "mdsscree.pdf"
 link_data <- rbind(link_data, data.frame(Label = link_name, Link = link_addr, stringsAsFactors = FALSE))
@@ -693,8 +714,10 @@

 # generate Glimma interactive MDS Plot
 if ("i" %in% plots) {
-    Glimma::glMDSPlot(y, labels = samplenames, groups = factors[, 1],
-        folder = "glimma_MDS", launch = FALSE)
+    Glimma::glMDSPlot(y,
+        labels = samplenames, groups = factors[, 1],
+        folder = "glimma_MDS", launch = FALSE
+    )
     link_name <- "Glimma_MDSPlot.html"
     link_addr <- "glimma_MDS/MDS-Plot.html"
     link_data <- rbind(link_data, c(link_name, link_addr))
@@ -789,7 +812,6 @@
         # Generating fit data and top table with weights
         wts <- vdata$weights
         voomfit <- lmFit(vdata, design, weights = wts)
-
     } else {
         voom_pdf <- make_out("voomplot.pdf")
         voom_png <- make_out("voomplot.png")
@@ -812,7 +834,7 @@
         voomfit <- lmFit(vdata, design)
     }

-     # Save normalised counts (log2cpm)
+    # Save normalised counts (log2cpm)
     if (want_norm) {
         norm_counts <- data.frame(vdata$genes, vdata$E, check.names = FALSE)
         write.table(norm_counts, file = norm_out, row.names = FALSE, sep = "\t", quote = FALSE)
@@ -847,7 +869,7 @@
 link_data <- rbind(link_data, c(link_name, link_addr))
 invisible(dev.off())

- # Save library size info
+# Save library size info
 if (want_libinfo) {
     efflibsize <- round(y$samples$lib.size * y$samples$norm.factors)
     libsizeinfo <- cbind(y$samples, EffectiveLibrarySize = efflibsize)
@@ -869,8 +891,10 @@
     }
 }

-status <- decideTests(fit, adjust.method = opt$pAdjOpt, p.value = opt$pValReq,
-                       lfc = opt$lfcReq)
+status <- decideTests(fit,
+    adjust.method = opt$pAdjOpt, p.value = opt$pValReq,
+    lfc = opt$lfcReq
+)
 sum_status <- summary(status)

 for (i in seq_along(cons)) {
@@ -885,7 +909,7 @@
     # Write top expressions table
     if (want_treat) {
         top <- topTreat(fit, coef = i, adjust.method = opt$pAdjOpt, number = Inf, sort.by = "P")
-    } else{
+    } else {
         top <- topTable(fit, coef = i, adjust.method = opt$pAdjOpt, number = Inf, sort.by = "P")
     }
     write.table(top, file = top_out[i], row.names = FALSE, sep = "\t", quote = FALSE)
@@ -895,10 +919,12 @@

     # Plot MD (log ratios vs mean average) using limma package on weighted
     pdf(md_pdf[i])
-    limma::plotMD(fit, status = status[, i], coef = i,
+    limma::plotMD(fit,
+        status = status[, i], coef = i,
         main = paste("MD Plot:", unmake_names(con)),
         hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
-        xlab = "Average Expression", ylab = "logFC")
+        xlab = "Average Expression", ylab = "logFC"
+    )
     abline(h = 0, col = "grey", lty = 2)
     link_name <- paste0("MDPlot_", con, ".pdf")
     link_addr <- paste0("mdplot_", con, ".pdf")
@@ -906,7 +932,7 @@
     invisible(dev.off())

     # Generate Glimma interactive Volcano, MD plot and tables, requires annotation file (assumes gene labels/symbols in 2nd column)
-    if ("i" %in% plots & have_anno) {
+    if ("i" %in% plots && have_anno) {
         # make gene labels unique to handle NAs
         geneanno <- y$genes
         geneanno[, 2] <- make.unique(geneanno[, 2])
@@ -914,25 +940,29 @@
         # use the logcpms for the counts
         if (want_trend) {
             cnts <- logcpm
-        } else{
+        } else {
             cnts <- vdata$E
         }

         # MD plot
-        Glimma::glMDPlot(fit, coef = i, counts = cnts, anno = geneanno, groups = factors[, 1],
-             status = status[, i], sample.cols = col.group,
-             main = paste("MD Plot:", unmake_names(con)), side.main = colnames(y$genes)[2],
-             folder = paste0("glimma_", unmake_names(con)), launch = FALSE)
+        Glimma::glMDPlot(fit,
+            coef = i, counts = cnts, anno = geneanno, groups = factors[, 1],
+            status = status[, i], sample.cols = col_group,
+            main = paste("MD Plot:", unmake_names(con)), side.main = colnames(y$genes)[2],
+            folder = paste0("glimma_", unmake_names(con)), launch = FALSE
+        )
         link_name <- paste0("Glimma_MDPlot_", con, ".html")
         link_addr <- paste0("glimma_", con, "/MD-Plot.html")
         link_data <- rbind(link_data, c(link_name, link_addr))

         # Volcano plot
-        Glimma::glXYPlot(x = fit$coefficients[, i], y = -log10(fit$p.value[, i]), counts = cnts, anno = geneanno, groups = factors[, 1],
-            status = status[, i], sample.cols = col.group,
+        Glimma::glXYPlot(
+            x = fit$coefficients[, i], y = -log10(fit$p.value[, i]), counts = cnts, anno = geneanno, groups = factors[, 1],
+            status = status[, i], sample.cols = col_group,
             main = paste("Volcano Plot:", unmake_names(con)), side.main = colnames(y$genes)[2],
             xlab = "logFC", ylab = "-log10(P-value)",
-            folder = paste0("glimma_volcano_", unmake_names(con)), launch = FALSE)
+            folder = paste0("glimma_volcano_", unmake_names(con)), launch = FALSE
+        )
         link_name <- paste0("Glimma_VolcanoPlot_", con, ".html")
         link_addr <- paste0("glimma_volcano_", con, "/XY-Plot.html")
         link_data <- rbind(link_data, c(link_name, link_addr))
@@ -946,10 +976,12 @@
     } else {
         labels <- fit$genes$GeneID
     }
-    limma::volcanoplot(fit, coef = i,
+    limma::volcanoplot(fit,
+        coef = i,
         main = paste("Volcano Plot:", unmake_names(con)),
         highlight = opt$topgenes,
-        names = labels)
+        names = labels
+    )
     link_name <- paste0("VolcanoPlot_", con, ".pdf")
     link_addr <- paste0("volplot_", con, ".pdf")
     link_data <- rbind(link_data, c(link_name, link_addr))
@@ -960,21 +992,27 @@
     par(mfrow = c(1, 2), mar = c(5, 4, 2, 2) + 0.1, oma = c(0, 0, 3, 0))

     # MD plot
-    limma::plotMD(fit, status = status[, i], coef = i, main = "MD Plot",
+    limma::plotMD(fit,
+        status = status[, i], coef = i, main = "MD Plot",
         hl.col = alpha(c("firebrick", "blue"), 0.4), values = c(1, -1),
-        xlab = "Average Expression", ylab = "logFC")
+        xlab = "Average Expression", ylab = "logFC"
+    )
     abline(h = 0, col = "grey", lty = 2)

     # Volcano
     if (have_anno) {
         # labels must be in second column currently
-        limma::volcanoplot(fit, coef = i, main = "Volcano Plot",
+        limma::volcanoplot(fit,
+            coef = i, main = "Volcano Plot",
             highlight = opt$topgenes,
-            names = fit$genes[, 2])
+            names = fit$genes[, 2]
+        )
     } else {
-        limma::volcanoplot(fit, coef = i, main = "Volcano Plot",
+        limma::volcanoplot(fit,
+            coef = i, main = "Volcano Plot",
             highlight = opt$topgenes,
-            names = fit$genes$GeneID)
+            names = fit$genes$GeneID
+        )
     }

     img_name <- paste0("MDVolPlot_", con)
@@ -999,10 +1037,12 @@
         } else {
             labels <- rownames(topexp)
         }
-        heatmap.2(topexp, scale = "row", Colv = FALSE, Rowv = FALSE, dendrogram = "none",
+        heatmap.2(topexp,
+            scale = "row", Colv = FALSE, Rowv = FALSE, dendrogram = "none",
             main = paste("Contrast:", unmake_names(con), "\nTop", opt$topgenes, "genes by adj.P.Val"),
             trace = "none", density.info = "none", lhei = c(2, 10), margin = c(8, 6), labRow = labels, cexRow = 0.7, srtCol = 45,
-            col = mycol, ColSideColors = col.group)
+            col = mycol, ColSideColors = col_group
+        )
         link_name <- paste0("Heatmap_", con, ".pdf")
         link_addr <- paste0("heatmap_", con, ".pdf")
         link_data <- rbind(link_data, c(link_name, link_addr))
@@ -1013,17 +1053,21 @@
         # Plot Stripcharts of top genes
         pdf(strip_pdf[i], title = paste("Contrast:", unmake_names(con)))
         par(mfrow = c(3, 2), cex.main = 0.8, cex.axis = 0.8)
-        cols <- unique(col.group)
+        cols <- unique(col_group)

         for (j in seq_along(topgenes)) {
             lfc <- round(top[topgenes[j], "logFC"], 2)
             pval <- round(top[topgenes[j], "adj.P.Val"], 5)
             if (want_trend) {
-                stripchart(plot_data[topgenes[j], ] ~ factors[, 1], vertical = TRUE, las = 2, pch = 16, cex = 0.8, cex.lab = 0.8, col = cols,
-                    method = "jitter", ylab = "Normalised log2 expression", main = paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
+                stripchart(plot_data[topgenes[j], ] ~ factors[, 1],
+                    vertical = TRUE, las = 2, pch = 16, cex = 0.8, cex.lab = 0.8, col = cols,
+                    method = "jitter", ylab = "Normalised log2 expression", main = paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)
+                )
             } else {
-                stripchart(plot_data$E[topgenes[j], ] ~ factors[, 1], vertical = TRUE, las = 2, pch = 16, cex = 0.8, cex.lab = 0.8, col = cols,
-                    method = "jitter", ylab = "Normalised log2 expression", main = paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
+                stripchart(plot_data$E[topgenes[j], ] ~ factors[, 1],
+                    vertical = TRUE, las = 2, pch = 16, cex = 0.8, cex.lab = 0.8, col = cols,
+                    method = "jitter", ylab = "Normalised log2 expression", main = paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval)
+                )
             }
         }
         link_name <- paste0("Stripcharts_", con, ".pdf")
@@ -1039,11 +1083,13 @@
 if (want_rda) {
     print("Saving RData")
     if (want_weight) {
-      save(counts, data, y, status, plot_data, labels, factors, wts, fit, top, contrast_data, contrasts, design,
-           file = rda_out, ascii = TRUE)
+        save(counts, data, y, status, plot_data, labels, factors, wts, fit, top, contrast_data, contrasts, design,
+            file = rda_out, ascii = TRUE
+        )
     } else {
-      save(counts, data, y, status, plot_data, labels, factors, fit, top, contrast_data, contrasts, design,
-           file = rda_out, ascii = TRUE)
+        save(counts, data, y, status, plot_data, labels, factors, fit, top, contrast_data, contrasts, design,
+            file = rda_out, ascii = TRUE
+        )
     }
     link_data <- rbind(link_data, c((paste0(de_method, "_analysis.RData")), (paste0(de_method, "_analysis.RData"))))
 }
@@ -1053,9 +1099,11 @@
 link_data <- rbind(link_data, c("Session Info", "session_info.txt"))

 # Record ending time and calculate total run time
-time_end <- as.character(Sys.time())
-time_taken <- capture.output(round(difftime(time_end, time_start), digits = 3))
+time_end <- Sys.time()
+time_taken <- capture.output(round(difftime(time_end, time_start), digits = 2))
 time_taken <- gsub("Time difference of ", "", time_taken, fixed = TRUE)
+time_start <- format(time_start, "%A, %B %d, %Y %H:%M:%S")
+time_end <- format(time_end, "%A, %B %d, %Y %H:%M:%S")
 ################################################################################
 ### HTML Generation
 ################################################################################
@@ -1097,35 +1145,35 @@
 cata("</table>")

 cata("<h4>Plots:</h4>\n")
-#PDFs
+# PDFs
 for (i in seq_len(nrow(link_data))) {
-    if (grepl(".pdf", link_data$Link[i]) & grepl("density|cpm|boxplot|mds|mdplots|voom|saplot", link_data$Link[i])) {
+    if (grepl(".pdf", link_data$Link[i]) && grepl("density|cpm|boxplot|mds|mdplots|voom|saplot", link_data$Link[i])) {
         html_link(link_data$Link[i], link_data$Label[i])
-  }
+    }
 }

 for (i in seq_len(nrow(link_data))) {
     if (grepl("mdplot_", link_data$Link[i])) {
         html_link(link_data$Link[i], link_data$Label[i])
-  }
+    }
 }

 for (i in seq_len(nrow(link_data))) {
     if (grepl("volplot", link_data$Link[i])) {
         html_link(link_data$Link[i], link_data$Label[i])
-  }
+    }
 }

 for (i in seq_len(nrow(link_data))) {
     if (grepl("heatmap", link_data$Link[i])) {
         html_link(link_data$Link[i], link_data$Label[i])
-  }
+    }
 }

 for (i in seq_len(nrow(link_data))) {
     if (grepl("stripcharts", link_data$Link[i])) {
         html_link(link_data$Link[i], link_data$Label[i])
-  }
+    }
 }

 cata("<h4>Tables:</h4>\n")
@@ -1148,11 +1196,11 @@

 if ("i" %in% plots) {
     cata("<h4>Glimma Interactive Results:</h4>\n")
-        for (i in seq_len(nrow(link_data))) {
-            if (grepl("glimma", link_data$Link[i])) {
-                html_link(link_data$Link[i], link_data$Label[i])
-            }
+    for (i in seq_len(nrow(link_data))) {
+        if (grepl("glimma", link_data$Link[i])) {
+            html_link(link_data$Link[i], link_data$Label[i])
         }
+    }
 }

 cata("<p>Alt-click links to download file.</p>\n")
@@ -1165,23 +1213,31 @@

 if (filt_cpm || filt_smpcount || filt_totcount) {
     if (filt_cpm) {
-    temp_str <- paste("Genes without more than", opt$cpmReq,
-                                     "CPM in at least", opt$sampleReq, "samples are insignificant",
-                                     "and filtered out.")
+        temp_str <- paste(
+            "Genes without more than", opt$cpmReq,
+            "CPM in at least", opt$sampleReq, "samples are insignificant",
+            "and filtered out."
+        )
     } else if (filt_smpcount) {
-        temp_str <- paste("Genes without more than", opt$cntReq,
-                                     "counts in at least", opt$sampleReq, "samples are insignificant",
-                                     "and filtered out.")
+        temp_str <- paste(
+            "Genes without more than", opt$cntReq,
+            "counts in at least", opt$sampleReq, "samples are insignificant",
+            "and filtered out."
+        )
     } else if (filt_totcount) {
-            temp_str <- paste("Genes without more than", opt$cntReq,
-                                     "counts, after summing counts for all samples, are insignificant",
-                                     "and filtered out.")
+        temp_str <- paste(
+            "Genes without more than", opt$cntReq,
+            "counts, after summing counts for all samples, are insignificant",
+            "and filtered out."
+        )
     }

     list_item(temp_str)
     filter_prop <- round(filtered_count / prefilter_count * 100, digits = 2)
-    temp_str <- paste0(filtered_count, " of ", prefilter_count, " (", filter_prop,
-                                     "%) genes were filtered out for low expression.")
+    temp_str <- paste0(
+        filtered_count, " of ", prefilter_count, " (", filter_prop,
+        "%) genes were filtered out for low expression."
+    )
     list_item(temp_str)
 }
 list_item(opt$normOpt, " was the method used to normalise library sizes.")
@@ -1207,22 +1263,28 @@
 }
 if (opt$pAdjOpt != "none") {
     if (opt$pAdjOpt == "BH" || opt$pAdjOpt == "BY") {
-        temp_str <- paste0("MD Plot highlighted genes are significant at FDR ",
-                        "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
-                        "least ", opt$lfcReq, ".")
+        temp_str <- paste0(
+            "MD Plot highlighted genes are significant at FDR ",
+            "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
+            "least ", opt$lfcReq, "."
+        )
         list_item(temp_str)
     } else if (opt$pAdjOpt == "holm") {
-        temp_str <- paste0("MD Plot highlighted genes are significant at adjusted ",
-                        "p-value of ", opt$pValReq, "  by the Holm(1979) ",
-                        "method, and exhibit log2-fold-change of at least ",
-                        opt$lfcReq, ".")
+        temp_str <- paste0(
+            "MD Plot highlighted genes are significant at adjusted ",
+            "p-value of ", opt$pValReq, "  by the Holm(1979) ",
+            "method, and exhibit log2-fold-change of at least ",
+            opt$lfcReq, "."
+        )
         list_item(temp_str)
     }
-  } else {
-        temp_str <- paste0("MD Plot highlighted genes are significant at p-value ",
-                      "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
-                      "least ", opt$lfcReq, ".")
-        list_item(temp_str)
+} else {
+    temp_str <- paste0(
+        "MD Plot highlighted genes are significant at p-value ",
+        "of ", opt$pValReq, " and exhibit log2-fold-change of at ",
+        "least ", opt$lfcReq, "."
+    )
+    list_item(temp_str)
 }
 cata("</ul>\n")

@@ -1247,65 +1309,88 @@
     table_head_item(row.names(factors)[i])
     for (j in seq_len(ncol(factors))) {
         table_item(as.character(unmake_names(factors[i, j])))
-  }
-  cata("</tr>\n")
+    }
+    cata("</tr>\n")
 }
 cata("</table>")

 cit <- character()
 link <- character()
-link[1] <- paste0("<a href=\"",
-                  "http://www.bioconductor.org/packages/release/bioc/",
-                  "vignettes/limma/inst/doc/usersguide.pdf",
-                  "\">", "limma User's Guide", "</a>.")
+link[1] <- paste0(
+    "<a href=\"",
+    "http://www.bioconductor.org/packages/release/bioc/",
+    "vignettes/limma/inst/doc/usersguide.pdf",
+    "\">", "limma User's Guide", "</a>."
+)

-link[2] <- paste0("<a href=\"",
-                  "http://www.bioconductor.org/packages/release/bioc/",
-                  "vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf",
-                  "\">", "edgeR User's Guide", "</a>")
+link[2] <- paste0(
+    "<a href=\"",
+    "http://www.bioconductor.org/packages/release/bioc/",
+    "vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf",
+    "\">", "edgeR User's Guide", "</a>"
+)

 cit[1] <- paste("Please cite the following paper for this tool:")

-cit[2] <- paste("Liu R, Holik AZ, Su S, Jansz N, Chen K, Leong HS, Blewitt ME,",
-                "Asselin-Labat ML, Smyth GK, Ritchie ME (2015). Why weight? ",
-                "Modelling sample and observational level variability improves power ",
-                "in RNA-seq analyses. Nucleic Acids Research, 43(15), e97.")
-
-cit[3] <- paste("Please cite the paper below for the limma software itself.",
-                "Please also try to cite the appropriate methodology articles",
-                "that describe the statistical methods implemented in limma,",
-                "depending on which limma functions you are using. The",
-                "methodology articles are listed in Section 2.1 of the",
-                link[1],
-                "Cite no. 3 only if sample weights were used.")
-cit[4] <- paste("Smyth GK (2005). Limma: linear models for microarray data.",
-                "In: 'Bioinformatics and Computational Biology Solutions using",
-                "R and Bioconductor'. R. Gentleman, V. Carey, S. doit,.",
-                "Irizarry, W. Huber (eds), Springer, New York, pages 397-420.")
-cit[5] <- paste("Please cite the first paper for the software itself and the",
-                "other papers for the various original statistical methods",
-                "implemented in edgeR.  See Section 1.2 in the", link[2],
-                "for more detail.")
-cit[6] <- paste("Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a",
-                "Bioconductor package for differential expression analysis",
-                "of digital gene expression data. Bioinformatics 26, 139-140")
-cit[7] <- paste("Robinson MD and Smyth GK (2007). Moderated statistical tests",
-                "for assessing differences in tag abundance. Bioinformatics",
-                "23, 2881-2887")
-cit[8] <- paste("Robinson MD and Smyth GK (2008). Small-sample estimation of",
-                "negative binomial dispersion, with applications to SAGE data.",
-                "Biostatistics, 9, 321-332")
-cit[9] <- paste("McCarthy DJ, Chen Y and Smyth GK (2012). Differential",
-                "expression analysis of multifactor RNA-Seq experiments with",
-                "respect to biological variation. Nucleic Acids Research 40,",
-                "4288-4297")
-cit[10] <- paste("Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:",
-                "precision weights unlock linear model analysis tools for",
-                "RNA-seq read counts. Genome Biology 15, R29.")
-cit[11] <- paste("Ritchie ME, Diyagama D, Neilson J, van Laar R,",
-                "Dobrovic A, Holloway A and Smyth GK (2006).",
-                "Empirical array quality weights for microarray data.",
-                "BMC Bioinformatics 7, Article 261.")
+cit[2] <- paste(
+    "Liu R, Holik AZ, Su S, Jansz N, Chen K, Leong HS, Blewitt ME,",
+    "Asselin-Labat ML, Smyth GK, Ritchie ME (2015). Why weight? ",
+    "Modelling sample and observational level variability improves power ",
+    "in RNA-seq analyses. Nucleic Acids Research, 43(15), e97."
+)
+cit[3] <- paste(
+    "Please cite the paper below for the limma software itself.",
+    "Please also try to cite the appropriate methodology articles",
+    "that describe the statistical methods implemented in limma,",
+    "depending on which limma functions you are using. The",
+    "methodology articles are listed in Section 2.1 of the",
+    link[1],
+    "Cite no. 3 only if sample weights were used."
+)
+cit[4] <- paste(
+    "Smyth GK (2005). Limma: linear models for microarray data.",
+    "In: 'Bioinformatics and Computational Biology Solutions using",
+    "R and Bioconductor'. R. Gentleman, V. Carey, S. doit,.",
+    "Irizarry, W. Huber (eds), Springer, New York, pages 397-420."
+)
+cit[5] <- paste(
+    "Please cite the first paper for the software itself and the",
+    "other papers for the various original statistical methods",
+    "implemented in edgeR.  See Section 1.2 in the", link[2],
+    "for more detail."
+)
+cit[6] <- paste(
+    "Robinson MD, McCarthy DJ and Smyth GK (2010). edgeR: a",
+    "Bioconductor package for differential expression analysis",
+    "of digital gene expression data. Bioinformatics 26, 139-140"
+)
+cit[7] <- paste(
+    "Robinson MD and Smyth GK (2007). Moderated statistical tests",
+    "for assessing differences in tag abundance. Bioinformatics",
+    "23, 2881-2887"
+)
+cit[8] <- paste(
+    "Robinson MD and Smyth GK (2008). Small-sample estimation of",
+    "negative binomial dispersion, with applications to SAGE data.",
+    "Biostatistics, 9, 321-332"
+)
+cit[9] <- paste(
+    "McCarthy DJ, Chen Y and Smyth GK (2012). Differential",
+    "expression analysis of multifactor RNA-Seq experiments with",
+    "respect to biological variation. Nucleic Acids Research 40,",
+    "4288-4297"
+)
+cit[10] <- paste(
+    "Law CW, Chen Y, Shi W, and Smyth GK (2014). Voom:",
+    "precision weights unlock linear model analysis tools for",
+    "RNA-seq read counts. Genome Biology 15, R29."
+)
+cit[11] <- paste(
+    "Ritchie ME, Diyagama D, Neilson J, van Laar R,",
+    "Dobrovic A, Holloway A and Smyth GK (2006).",
+    "Empirical array quality weights for microarray data.",
+    "BMC Bioinformatics 7, Article 261."
+)
 cata("<h3>Citations</h3>\n")
 cata(cit[1], "\n")
 cata("<br>\n")
@@ -1338,13 +1423,16 @@

 cata("<table border=\"0\">\n")
 cata("<tr>\n")
-table_item("Task started at:"); table_item(time_start)
+table_item("Task started at:")
+table_item(time_start)
 cata("</tr>\n")
 cata("<tr>\n")
-table_item("Task ended at:"); table_item(time_end)
+table_item("Task ended at:")
+table_item(time_end)
 cata("</tr>\n")
 cata("<tr>\n")
-table_item("Task run time:"); table_item(time_taken)
+table_item("Task run time:")
+table_item(time_taken)
 cata("<tr>\n")
 cata("</table>\n")
--- a/limma_voom.xml	Tue Mar 01 08:03:53 2022 +0000
+++ b/limma_voom.xml	Fri Feb 09 17:06:25 2024 +0000
@@ -1,13 +1,12 @@
-<tool id="limma_voom" name="limma" version="@TOOL_VERSION@+galaxy0">
+<tool id="limma_voom" name="limma" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
     <description>
         Perform differential expression with limma-voom or limma-trend
     </description>
     <macros>
-        <token name="@TOOL_VERSION@">3.50.1</token>
+        <token name="@TOOL_VERSION@">3.58.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">23.0</token>
     </macros>
-    <xrefs>
-        <xref type="bio.tools">limma</xref>
-    </xrefs>
     <edam_topics>
         <edam_topic>topic_3308</edam_topic>
     </edam_topics>
@@ -15,16 +14,20 @@
         <edam_operation>operation_3563</edam_operation>
         <edam_operation>operation_3223</edam_operation>
     </edam_operations>
+    <xrefs>
+        <xref type="bio.tools">limma</xref>
+        <xref type="bioconductor">limma</xref>
+    </xrefs>

     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">bioconductor-limma</requirement>
-        <requirement type="package" version="3.36.0">bioconductor-edger</requirement>
-        <requirement type="package" version="1.4.36">r-statmod</requirement>
-        <requirement type="package" version="1.1.1">r-scales</requirement>
+        <requirement type="package" version="4.0.2">bioconductor-edger</requirement>
+        <requirement type="package" version="1.5.0">r-statmod</requirement>
+        <requirement type="package" version="1.3.0">r-scales</requirement>
         <requirement type="package" version="0.2.21">r-rjson</requirement>
-        <requirement type="package" version="1.20.3">r-getopt</requirement>
-        <requirement type="package" version="3.1.1">r-gplots</requirement>
-        <requirement type="package" version="2.4.0">bioconductor-glimma</requirement>
+        <requirement type="package" version="1.20.4">r-getopt</requirement>
+        <requirement type="package" version="3.1.3.1">r-gplots</requirement>
+        <requirement type="package" version="2.12.0">bioconductor-glimma</requirement>
     </requirements>

     <version_command><![CDATA[
@@ -733,6 +736,12 @@
             <param name="normalisationOption" value="TMM" />
             <param name="topgenes" value="6" />
             <output_collection name="outTables" count="3">
+                <element name="limma-voom_Mut-WT-WT-Mut" ftype="tabular" >
+                     <assert_contents>
+                        <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
+                        <has_text_matching expression="11304.*0.9146" />
+                    </assert_contents>
+                </element>
                 <element name="limma-voom_Mut-WT" ftype="tabular" >
                     <assert_contents>
                         <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
@@ -745,12 +754,6 @@
                         <has_text_matching expression="11304.*-0.4573" />
                     </assert_contents>
                 </element>
-                <element name="limma-voom_Mut-WT-WT-Mut" ftype="tabular" >
-                     <assert_contents>
-                        <has_text_matching expression="GeneID.*logFC.*AveExpr.*t.*P.Value.*adj.P.Val.*B" />
-                        <has_text_matching expression="11304.*0.9146" />
-                    </assert_contents>
-                </element>
             </output_collection>
         </test>
     </tests>