Mercurial > repos > iuc > limma_voom

--- a/limma_voom.R	Wed May 09 13:27:14 2018 -0400
+++ b/limma_voom.R	Tue May 15 02:36:36 2018 -0400
@@ -23,7 +23,8 @@
 #       trend", "t", 1, "double"            -Float for prior.count if limma-trend is used instead of voom
 #       weightOpt", "w", 0, "logical"       -String specifying if voomWithQualityWeights should be used
 #       topgenes", "G", 1, "integer"        -Integer specifying no. of genes to highlight in volcano and heatmap
-#       treatOpt", "T", 0, "logical"        -String specifying if TREAT function shuld be used
+#       treatOpt", "T", 0, "logical"        -String specifying if TREAT function should be used
+#       plots, "P", 1, "character"          -String specifying additional plots to be created
 #
 # OUT:
 #       Density Plots (if filtering)
@@ -125,7 +126,7 @@
 }

 # Function to write code for html images
-HtmlImage <- function(source, label=source, height=600, width=600) {
+HtmlImage <- function(source, label=source, height=500, width=500) {
     cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
     cata("\" width=\"", width, "\"/>\n")
 }
@@ -175,7 +176,8 @@
     "trend", "t", 1, "double",
     "weightOpt", "w", 0, "logical",
     "topgenes", "G", 1, "integer",
-    "treatOpt", "T", 0, "logical"),
+    "treatOpt", "T", 0, "logical",
+    "plots", "P", 1, "character"),
     byrow=TRUE, ncol=4)
 opt <- getopt(spec)

@@ -322,28 +324,36 @@
 contrastData <- sanitiseEquation(contrastData)
 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)

-denOutPng <- makeOut("densityplots.png")
-denOutPdf <- makeOut("DensityPlots.pdf")
-boxOutPng <- makeOut("boxplots.png")
-boxOutPdf <- makeOut("BoxPlots.pdf")
-mdsOutPdf <- character() # Initialise character vector
-mdsOutPng <- character()
-for (i in 1:ncol(factors)) {
-    mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf"))
-    mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png"))
+plots <- character()
+if (!is.null(opt$plots)) {
+    plots <- unlist(strsplit(opt$plots, split=","))
 }

-mdOutPdf <- character()
+denOutPng <- makeOut("densityplots.png")
+denOutPdf <- makeOut("densityplots.pdf")
+cpmOutPdf <- makeOut("cpmplots.pdf")
+boxOutPng <- makeOut("boxplots.png")
+boxOutPdf <- makeOut("boxplots.pdf")
+mdsscreeOutPng <- makeOut("mdsscree.png")
+mdsscreeOutPdf <- makeOut("mdsscree.pdf")
+mdsxOutPdf <- makeOut("mdsplot_extra.pdf")
+mdsxOutPng <- makeOut("mdsplot_extra.png")
+mdsamOutPdf <- makeOut("mdplots_samples.pdf")
+mdOutPdf <- character() # Initialise character vector
 volOutPdf <- character()
 heatOutPdf <- character()
+stripOutPdf <- character()
 mdvolOutPng <- character()
 topOut <- character()
 for (i in 1:length(contrastData)) {
-    mdOutPdf[i] <- makeOut(paste0("mdplot_", contrastData[i], ".pdf"))
-    volOutPdf[i] <- makeOut(paste0("volplot_", contrastData[i], ".pdf"))
-    heatOutPdf[i] <- makeOut(paste0("heatmap_", contrastData[i], ".pdf"))
-    mdvolOutPng[i] <- makeOut(paste0("mdvolplot_", contrastData[i], ".png"))
-    topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv"))
+    con <- contrastData[i]
+    con <- gsub("\\(|\\)", "", con)
+    mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf"))
+    volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf"))
+    heatOutPdf[i] <- makeOut(paste0("heatmap_", con, ".pdf"))
+    stripOutPdf[i] <- makeOut(paste0("stripcharts_", con, ".pdf"))
+    mdvolOutPng[i] <- makeOut(paste0("mdvolplot_", con, ".png"))
+    topOut[i] <- makeOut(paste0(deMethod, "_", con, ".tsv"))
 }

 normOut <- makeOut(paste0(deMethod, "_normcounts.tsv"))
@@ -378,9 +388,18 @@
   data$genes <- data.frame(GeneID=row.names(counts))
 }

+# Creating naming data
+samplenames <- colnames(data$counts)
+sampleanno <- data.frame("sampleID"=samplenames, factors)
+
+# Creating colours for the groups
+cols <- as.numeric(factors[, 1])
+col.group <- palette()[cols]
+
 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
 # samples. Default is no filtering
 preFilterCount <- nrow(data$counts)
+nsamples <- ncol(data$counts)

 if (filtCPM || filtSmpCount || filtTotCount) {

@@ -389,21 +408,84 @@
     } else if (filtSmpCount) {
         keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
     } else if (filtCPM) {
-        keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
+        myCPM <- cpm(data$counts)
+        thresh <- myCPM >= opt$cpmReq
+        keep <- rowSums(thresh) >= opt$sampleReq
+
+        if ("c" %in% plots) {
+            # Plot CPM vs raw counts (to check threshold)
+            pdf(cpmOutPdf, width=6.5, height=10)
+            par(mfrow=c(3, 2))
+            for (i in 1:nsamples) {
+                plot(data$counts[, i], myCPM[, i], xlim=c(0,50), ylim=c(0,3), main=samplenames[i], xlab="Raw counts", ylab="CPM")
+                abline(v=10, col="red", lty=2, lwd=2)
+                abline(h=opt$cpmReq, col=4)
+            }
+            linkName <- "CpmPlots.pdf"
+            linkAddr <- "cpmplots.pdf"
+            linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
+            invisible(dev.off())
+        }
     }

     data$counts <- data$counts[keep, ]
     data$genes <- data$genes[keep, , drop=FALSE]
+
+    # Plot Density
+    if ("d" %in% plots) {
+        # PNG
+        png(denOutPng, width=1000, height=500)
+        par(mfrow=c(1,2), cex.axis=0.8)
+
+        # before filtering
+        lcpm1 <- cpm(counts, log=TRUE)
+        plot(density(lcpm1[, 1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
+        title(main="Density Plot: Raw counts", xlab="Log-cpm")
+        for (i in 2:nsamples){
+            den <- density(lcpm1[, i])
+            lines(den$x, den$y, col=col.group[i], lwd=2)
+        }
+
+        # after filtering
+        lcpm2 <- cpm(data$counts, log=TRUE)
+        plot(density(lcpm2[,1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
+        title(main="Density Plot: Filtered counts", xlab="Log-cpm")
+        for (i in 2:nsamples){
+            den <- density(lcpm2[, i])
+            lines(den$x, den$y, col=col.group[i], lwd=2)
+        }
+        legend("topright", samplenames, text.col=col.group, bty="n")
+        imgName <- "Densityplots.png"
+        imgAddr <- "densityplots.png"
+        imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
+        invisible(dev.off())
+
+        # PDF
+        pdf(denOutPdf, width=14)
+        par(mfrow=c(1,2), cex.axis=0.8)
+        plot(density(lcpm1[, 1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
+        title(main="Density Plot: Raw counts", xlab="Log-cpm")
+        for (i in 2:nsamples){
+            den <- density(lcpm1[, i])
+            lines(den$x, den$y, col=col.group[i], lwd=2)
+        }
+        plot(density(lcpm2[, 1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
+        title(main="Density Plot: Filtered counts", xlab="Log-cpm")
+        for (i in 2:nsamples){
+            den <- density(lcpm2[, i])
+            lines(den$x, den$y, col=col.group[i], lwd=2)
+        }
+        legend("topright", samplenames, text.col=col.group, bty="n")
+        linkName <- "DensityPlots.pdf"
+        linkAddr <- "densityplots.pdf"
+        linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
+        invisible(dev.off())
+    }
 }

 postFilterCount <- nrow(data$counts)
 filteredCount <- preFilterCount-postFilterCount

-# Creating naming data
-samplenames <- colnames(data$counts)
-sampleanno <- data.frame("sampleID"=samplenames, factors)
-
-
 # Generating the DGEList object "y"
 print("Generating DGEList object")
 data$samples <- sampleanno
@@ -438,87 +520,42 @@
 ### Data Output
 ################################################################################

-# Plot Density (if filtering low counts)
-if (filtCPM || filtSmpCount || filtTotCount) {
-    nsamples <- ncol(counts)
-
-    # PNG
-    png(denOutPng, width=1200, height=600)
-    par(mfrow=c(1,2), cex.axis=0.8)
-
-    # before filtering
-    logcpm <- cpm(counts, log=TRUE)
-    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
-    title(main="Density Plot: Raw counts", xlab="Log-cpm")
-    for (i in 2:nsamples){
-        den <- density(logcpm[,i])
-        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
-    }
-
-    # after filtering
-    logcpm <- cpm(data$counts, log=TRUE)
-    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
-    title(main="Density Plot: Filtered counts", xlab="Log-cpm")
-    for (i in 2:nsamples){
-        den <- density(logcpm[,i])
-        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
-    }
-    legend("topright", samplenames, text.col=as.numeric(factors[, 1]), bty="n")
-    imgName <- "Densityplots.png"
-    imgAddr <- "densityplots.png"
-    imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
-    invisible(dev.off())
+# Plot Box plots (before and after normalisation)
+if (opt$normOpt != "none" & "b" %in% plots) {
+    png(boxOutPng, width=1000, height=500)
+    par(mfrow=c(1,2), mar=c(6,4,2,2)+0.1)
+    labels <- colnames(counts)

-    # PDF
-    pdf(denOutPdf, width=14)
-    par(mfrow=c(1,2), cex.axis=0.8)
-    logcpm <- cpm(counts, log=TRUE)
-    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
-    title(main="Density Plot: Raw counts", xlab="Log-cpm")
-    for (i in 2:nsamples){
-        den <- density(logcpm[,i])
-        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
-    }
-    logcpm <- cpm(data$counts, log=TRUE)
-    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
-    title(main="Density Plot: Filtered counts", xlab="Log-cpm")
-    for (i in 2:nsamples){
-        den <- density(logcpm[,i])
-        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
-    }
-    legend("topright", samplenames, text.col=as.numeric(factors[, 1]), bty="n")
-    linkName <- "DensityPlots.pdf"
-    linkAddr <- "densityplots.pdf"
-    linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
-    invisible(dev.off())
-}
+    lcpm1 <- cpm(y$counts, log=TRUE)
+    boxplot(lcpm1, las=2, col=col.group, xaxt="n", xlab="")
+    axis(1, at=seq_along(labels), labels = FALSE)
+    abline(h=median(lcpm1), col=4)
+    text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
+    title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")

-# Plot Box plots (before and after normalisation)
-if (opt$normOpt != "none") {
-    png(boxOutPng, width=1200, height=600)
-    par(mfrow=c(1,2), cex.axis=0.8)
-    logcpm <- cpm(y$counts, log=TRUE)
-    boxplot(logcpm, las=2, col=as.numeric(factors[, 1]))
-    abline(h=median(logcpm), col=4)
-    title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")
-    lcpm <- cpm(y, log=TRUE)
-    boxplot(lcpm, las=2, col=as.numeric(factors[, 1]))
-    abline(h=median(lcpm), col=4)
+    lcpm2 <- cpm(y, log=TRUE)
+    boxplot(lcpm2, las=2, col=col.group, xaxt="n",  xlab="")
+    axis(1, at=seq_along(labels), labels = FALSE)
+    text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
+    abline(h=median(lcpm2), col=4)
     title(main="Box Plot: Normalised counts", ylab="Log-cpm")
+
     imgName <- "Boxplots.png"
     imgAddr <- "boxplots.png"
     imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
     invisible(dev.off())

     pdf(boxOutPdf, width=14)
-    par(mfrow=c(1,2), cex.axis=0.8)
-    logcpm <- cpm(y$counts, log=TRUE)
-    boxplot(logcpm, las=2, col=as.numeric(factors[, 1]))
-    abline(h=median(logcpm), col=4)
+    par(mfrow=c(1,2), mar=c(6,4,2,2)+0.1)
+    boxplot(lcpm1, las=2, col=col.group, xaxt="n", xlab="")
+    axis(1, at=seq_along(labels), labels = FALSE)
+    abline(h=median(lcpm1), col=4)
+    text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
     title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")
-    lcpm <- cpm(y, log=TRUE)
-    boxplot(lcpm, las=2, col=as.numeric(factors[, 1]))
-    abline(h=median(lcpm), col=4)
+    boxplot(lcpm2, las=2, col=col.group, xaxt="n",  xlab="")
+    axis(1, at=seq_along(labels), labels = FALSE)
+    text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
+    abline(h=median(lcpm2), col=4)
     title(main="Box Plot: Normalised counts", ylab="Log-cpm")
     linkName <- "BoxPlots.pdf"
     linkAddr <- "boxplots.pdf"
@@ -530,22 +567,100 @@
 print("Generating MDS plot")
 labels <- names(counts)

-for (i in 1:ncol(factors)) {
-    png(mdsOutPng[i], width=600, height=600)
-    plotMDS(y, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
-    imgName <- paste0("MDSPlot_", names(factors)[i], ".png")
-    imgAddr <- paste0("mdsplot_", names(factors)[i], ".png")
+# Scree plot (Variance Explained) code copied from Glimma
+
+# get column of matrix
+getCols <- function(x, inds) {
+  x[, inds, drop=FALSE]
+}
+
+x <- cpm(y, log=TRUE)
+ndim <- nsamples - 1
+nprobes <- nrow(x)
+top <- 500
+top <- min(top, nprobes)
+cn <- colnames(x)
+bad <- rowSums(is.finite(x)) < nsamples
+
+if (any(bad)) {
+  warning("Rows containing infinite values have been removed")
+  x <- x[!bad, , drop=FALSE]
+}
+
+dd <- matrix(0, nrow=nsamples, ncol=nsamples, dimnames=list(cn, cn))
+topindex <- nprobes - top + 1L
+for (i in 2L:(nsamples)) {
+  for (j in 1L:(i - 1L)) {
+    dists <- (getCols(x, i) - getCols(x, j))^2
+    dists <- sort.int(dists, partial = topindex )
+    topdist <- dists[topindex:nprobes]
+    dd[i, j] <- sqrt(mean(topdist))
+  }
+}
+
+a1 <- suppressWarnings(cmdscale(as.dist(dd), k=min(ndim, 8), eig=TRUE))
+eigen <- data.frame(name = 1:min(ndim, 8), eigen = round(a1$eig[1:min(ndim, 8)]/sum(a1$eig), 2))
+
+png(mdsscreeOutPng, width=1000, height=500)
+par(mfrow=c(1, 2))
+plotMDS(y, labels=samplenames, col=as.numeric(factors[, 1]), main="MDS Plot: Dims 1 and 2")
+barplot(eigen$eigen, names.arg=eigen$name,  main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las=1)
+imgName <- paste0("MDSPlot_", names(factors)[1], ".png")
+imgAddr <- "mdsscree.png"
+imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
+invisible(dev.off())
+
+pdf(mdsscreeOutPdf, width=14)
+par(mfrow=c(1, 2))
+plotMDS(y, labels=samplenames, col=as.numeric(factors[, 1]), main="MDS Plot: Dims 1 and 2")
+barplot(eigen$eigen, names.arg=eigen$name,  main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las=1)
+linkName <- paste0("MDSPlot_", names(factors)[1], ".pdf")
+linkAddr <- "mdsscree.pdf"
+linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
+invisible(dev.off())
+
+if ("x" %in% plots) {
+    png(mdsxOutPng, width=1000, height=500)
+    par(mfrow=c(1, 2))
+    for (i in 2:3) {
+        dim1 <- i
+        dim2 <- i + 1
+        plotMDS(y, dim=c(dim1, dim2), labels=samplenames, col=as.numeric(factors[, 1]), main=paste("MDS Plot: Dims", dim1, "and", dim2))
+    }
+    imgName <- paste0("MDSPlot_extra.png")
+    imgAddr <- paste0("mdsplot_extra.png")
     imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
     invisible(dev.off())

-    pdf(mdsOutPdf[i])
-    plotMDS(y, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
-    linkName <- paste0("MDSPlot_", names(factors)[i], ".pdf")
-    linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf")
+    pdf(mdsxOutPdf, width=14)
+    par(mfrow=c(1, 2))
+    for (i in 2:3) {
+        dim1 <- i
+        dim2 <- i + 1
+        plotMDS(y, dim=c(dim1, dim2), labels=samplenames, col=as.numeric(factors[, 1]), main=paste("MDS Plot: Dims", dim1, "and", dim2))
+    }
+    linkName <- "MDSPlot_extra.pdf"
+    linkAddr <- "mdsplot_extra.pdf"
     linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
     invisible(dev.off())
 }

+if ("m" %in% plots) {
+    # Plot MD plots for individual samples
+    print("Generating MD plots for samples")
+    pdf(mdsamOutPdf, width=6.5, height=10)
+    par(mfrow=c(3, 2))
+    for (i in 1:nsamples) {
+        plotMD(y, column = i)
+        abline(h=0, col="red", lty=2, lwd=2)
+    }
+    linkName <- "MDPlots_Samples.pdf"
+    linkAddr <- "mdplots_samples.pdf"
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    invisible(dev.off())
+}
+
+
 if (wantTrend) {
     # limma-trend approach
     logCPM <- cpm(y, log=TRUE, prior.count=opt$trend)
@@ -561,7 +676,7 @@
     saOutPng <- makeOut("saplot.png")
     saOutPdf <- makeOut("saplot.pdf")

-    png(saOutPng, width=600, height=600)
+    png(saOutPng, width=500, height=500)
     plotSA(fit, main="SA Plot")
     imgName <- "SAPlot.png"
     imgAddr <- "saplot.png"
@@ -589,7 +704,7 @@

     if (wantWeight) {
         # Creating voom data object and plot
-        png(voomOutPng, width=1000, height=600)
+        png(voomOutPng, width=1000, height=500)
         vData <- voomWithQualityWeights(y, design=design, plot=TRUE)
         imgName <- "VoomPlot.png"
         imgAddr <- "voomplot.png"
@@ -609,7 +724,7 @@

     } else {
         # Creating voom data object and plot
-        png(voomOutPng, width=600, height=600)
+        png(voomOutPng, width=500, height=500)
         vData <- voom(y, design=design, plot=TRUE)
         imgName <- "VoomPlot"
         imgAddr <- "voomplot.png"
@@ -661,6 +776,8 @@
 sumStatus <- summary(status)

 for (i in 1:length(contrastData)) {
+    con <- contrastData[i]
+    con <- gsub("\\(|\\)", "", con)
     # Collect counts for differential expression
     upCount[i] <- sumStatus["Up", i]
     downCount[i] <- sumStatus["Down", i]
@@ -673,22 +790,19 @@
         top <- topTable(fit, coef=i, number=Inf, sort.by="P")
     }
     write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
-
-    linkName <- paste0(deMethod, "_", contrastData[i], ".tsv")
-    linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv")
+    linkName <- paste0(deMethod, "_", con, ".tsv")
+    linkAddr <- paste0(deMethod, "_", con, ".tsv")
     linkData <- rbind(linkData, c(linkName, linkAddr))

     # Plot MD (log ratios vs mean average) using limma package on weighted
     pdf(mdOutPdf[i])
     limma::plotMD(fit, status=status[, i], coef=i,
-                  main=paste("MD Plot:", unmake.names(contrastData[i])),
-                  hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
-                  xlab="Average Expression", ylab="logFC")
-
+        main=paste("MD Plot:", unmake.names(con)),
+        hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
+        xlab="Average Expression", ylab="logFC")
     abline(h=0, col="grey", lty=2)
-
-    linkName <- paste0("MDPlot_", contrastData[i], ".pdf")
-    linkAddr <- paste0("mdplot_", contrastData[i], ".pdf")
+    linkName <- paste0("MDPlot_", con, ".pdf")
+    linkAddr <- paste0("mdplot_", con, ".pdf")
     linkData <- rbind(linkData, c(linkName, linkAddr))
     invisible(dev.off())

@@ -696,60 +810,30 @@
     pdf(volOutPdf[i])
     if (haveAnno) {
         # labels must be in second column currently
-        limma::volcanoplot(fit, coef=i,
-            main=paste("Volcano Plot:", unmake.names(contrastData[i])),
-            highlight=opt$topgenes,
-            names=fit$genes[, 2])
-    } else {
-        limma::volcanoplot(fit, coef=i,
-            main=paste("Volcano Plot:", unmake.names(contrastData[i])),
-            highlight=opt$topgenes,
-            names=fit$genes$GeneID)
-    }
-    linkName <- paste0("VolcanoPlot_", contrastData[i], ".pdf")
-    linkAddr <- paste0("volplot_", contrastData[i], ".pdf")
-    linkData <- rbind(linkData, c(linkName, linkAddr))
-    invisible(dev.off())
-
-    # Plot Heatmap
-    topgenes <- rownames(top[1:opt$topgenes, ])
-    if (wantTrend) {
-        topexp <- plotData[topgenes, ]
+        labels <- fit$genes[, 2]
     } else {
-        topexp <- plotData$E[topgenes, ]
+        labels <- fit$genes$GeneID
     }
-
-    pdf(heatOutPdf[i])
-    par(cex.main=0.8)
-    if (haveAnno) {
-        # labels must be in second column currently
-        heatmap.2(topexp, scale="row",
-            main=paste("Heatmap:", unmake.names(contrastData[i])),
-            col="bluered", trace="none", density.info="none",
-            margin=c(8,6), lhei=c(2,10), dendrogram="column",
-            cexRow=0.7, cexCol=0.7, srtCol=45,
-            labRow=top[topgenes, 2])
-    } else {
-        heatmap.2(topexp, scale="row",
-            main=paste("Heatmap:", unmake.names(contrastData[i])),
-            col="bluered", trace="none", density.info="none",
-            margin=c(8,6), lhei=c(2,10), dendrogram="column",
-            cexRow=0.7, cexCol=0.7, srtCol=45)
-    }
-    linkName <- paste0("Heatmap_", contrastData[i], ".pdf")
-    linkAddr <- paste0("heatmap_", contrastData[i], ".pdf")
+    limma::volcanoplot(fit, coef=i,
+        main=paste("Volcano Plot:", unmake.names(con)),
+        highlight=opt$topgenes,
+        names=labels)
+    linkName <- paste0("VolcanoPlot_", con, ".pdf")
+    linkAddr <- paste0("volplot_", con, ".pdf")
     linkData <- rbind(linkData, c(linkName, linkAddr))
     invisible(dev.off())

     # PNG of MD and Volcano
-    png(mdvolOutPng[i], width=1200, height=600)
+    png(mdvolOutPng[i], width=1000, height=500)
     par(mfrow=c(1, 2), mar=c(5,4,2,2)+0.1, oma=c(0,0,3,0))
+
+    # MD plot
     limma::plotMD(fit, status=status[, i], coef=i, main="MD Plot",
-                   hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
-                   xlab="Average Expression", ylab="logFC")
+        hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
+        xlab="Average Expression", ylab="logFC")
     abline(h=0, col="grey", lty=2)

-    # Volcano plots
+    # Volcano
     if (haveAnno) {
         # labels must be in second column currently
         limma::volcanoplot(fit, coef=i, main="Volcano Plot",
@@ -761,11 +845,60 @@
             names=fit$genes$GeneID)
     }

-    imgName <- paste0("MDVolPlot_", contrastData[i])
-    imgAddr <- paste0("mdvolplot_", contrastData[i], ".png")
+    imgName <- paste0("MDVolPlot_", con)
+    imgAddr <- paste0("mdvolplot_", con, ".png")
     imageData <- rbind(imageData, c(imgName, imgAddr))
-    title(paste0("Contrast: ", unmake.names(contrastData[i])), outer=TRUE, cex.main=1.5)
+    title(paste0("Contrast: ", unmake.names(con)), outer=TRUE, cex.main=1.5)
     invisible(dev.off())
+
+    if ("h" %in% plots) {
+        # Plot Heatmap
+        topgenes <- rownames(top[1:opt$topgenes, ])
+        if (wantTrend) {
+            topexp <- plotData[topgenes, ]
+        } else {
+            topexp <- plotData$E[topgenes, ]
+        }
+        pdf(heatOutPdf[i])
+        mycol <- colorpanel(1000,"blue","white","red")
+        if (haveAnno) {
+            # labels must be in second column currently
+            labels <- top[topgenes, 2]
+        } else {
+            labels <- rownames(topexp)
+        }
+        heatmap.2(topexp, scale="row", Colv=FALSE, Rowv=FALSE, dendrogram="none",
+            main=paste("Contrast:", unmake.names(con), "\nTop", opt$topgenes, "genes by adj.P.Val"),
+            trace="none", density.info="none", lhei=c(2,10), margin=c(8, 6), labRow=labels, cexRow=0.7, srtCol=45,
+            col=mycol, ColSideColors=col.group)
+        linkName <- paste0("Heatmap_", con, ".pdf")
+        linkAddr <- paste0("heatmap_", con, ".pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+    }
+
+    if ("s" %in% plots) {
+        # Plot Stripcharts of top genes
+        pdf(stripOutPdf[i], title=paste("Contrast:", unmake.names(con)))
+        par(mfrow = c(3,2), cex.main=0.8, cex.axis=0.8)
+        cols <- unique(col.group)
+
+        for (j in 1:length(topgenes)) {
+            lfc <- round(top[topgenes[j], "logFC"], 2)
+            pval <- round(top[topgenes[j], "adj.P.Val"], 5)
+            if (wantTrend) {
+                stripchart(plotData[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
+                    method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
+            } else {
+                stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
+                    method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
+            }
+        }
+        linkName <- paste0("Stripcharts_", con, ".pdf")
+        linkAddr <- paste0("stripcharts_", con, ".pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+    }
 }
 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
 row.names(sigDiff) <- contrastData
@@ -774,10 +907,10 @@
 if (wantRda) {
     print("Saving RData")
     if (wantWeight) {
-      save(counts, data, y, status, plotData, labels, factors, wts, fit, top, contrasts, design,
+      save(counts, data, y, status, plotData, labels, factors, wts, fit, top, contrastData, contrasts, design,
            file=rdaOut, ascii=TRUE)
     } else {
-      save(counts, data, y, status, plotData, labels, factors, fit, top, contrasts, design,
+      save(counts, data, y, status, plotData, labels, factors, fit, top, contrastData, contrasts, design,
            file=rdaOut, ascii=TRUE)
     }
     linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData"))))
@@ -805,8 +938,8 @@
 cata("Links to PDF copies of plots are in 'Plots' section below <br />\n")

 for (i in 1:nrow(imageData)) {
-    if (grepl("density|box|mdvol", imageData$Link[i])) {
-        HtmlImage(imageData$Link[i], imageData$Label[i], width=1200)
+    if (grepl("density|box|mds|mdvol", imageData$Link[i])) {
+        HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
     } else if (wantWeight) {
         HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
     } else {
@@ -834,8 +967,33 @@
 cata("</table>")

 cata("<h4>Plots:</h4>\n")
+#PDFs
 for (i in 1:nrow(linkData)) {
-    if (grepl(".pdf", linkData$Link[i])) {
+    if (grepl("density|cpm|boxplot|mds|mdplots|voomplot|saplot", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("mdplot_", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("volplot", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("heatmap", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("stripcharts", linkData$Link[i])) {
         HtmlLink(linkData$Link[i], linkData$Label[i])
   }
 }
--- a/limma_voom.xml	Wed May 09 13:27:14 2018 -0400
+++ b/limma_voom.xml	Tue May 15 02:36:36 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="limma_voom" name="limma" version="3.34.9.3">
+<tool id="limma_voom" name="limma" version="3.34.9.4">
     <description>
         Perform differential expression with limma-voom or limma-trend
     </description>
@@ -72,6 +72,10 @@
     #end if
 #end if

+#if $out.plots:
+    -P $out.plots
+#end if
+
 #if $out.normCounts:
     -x
 #end if
@@ -202,9 +206,9 @@

         <!-- Contrasts -->
         <repeat name="rep_contrast" title="Contrast" min="1" default="1">
-            <param name="contrast" type="text" label="Contrast of Interest" help="Names of two groups to compare separated by a hyphen e.g. Mut-WT. If the order is Mut-WT the fold changes in the results will be up/down in Mut relative to WT. If you have more than one contrast enter each separately using the Insert Contrast button below. For more info, see Chapter 8 in the limma User's guide: https://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf">
+            <param name="contrast" type="text" label="Contrast of Interest" help="Names of two groups to compare separated by a hyphen e.g. Mut-WT. If the order is Mut-WT the fold changes in the results will be up/down in Mut relative to WT. If you have more than one contrast enter each separately using the Insert Contrast button below. For differences between contrasts use e.g. (Mut1-WT)-(Mut2-WT). For more info, see Chapter 8 in the limma User's guide: https://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf">
                 <validator type="empty_field" />
-                <validator type="regex" message="Please only use letters, numbers or underscores">^[\w-]+$</validator>
+                <validator type="regex" message="Please only use letters, numbers or underscores">^[\(\w\)-]+$</validator>
             </param>
         </repeat>

@@ -250,6 +254,15 @@

         <!-- Output Options -->
         <section name="out" expanded="false" title="Output Options">
+            <param name="plots" type="select" display="checkboxes" multiple="True" optional="True" label="Additional Plots" help="Select additional plots to output in the report and as PDFs">
+                <option value="d">Density Plots (if filtering)</option>
+                <option value="c">CpmsVsCounts Plots (if filtering on cpms)</option>
+                <option value="b">Box Plots (if normalising)</option>
+                <option value="x">MDS Extra (Dims 2vs3 and 3vs4)</option>
+                <option value="m">MD Plots for individual samples</option>
+                <option value="h">Heatmaps (top DE genes) </option>
+                <option value="s">Stripcharts (top DE genes)</option>
+            </param>
             <param name="normCounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
                 label="Output Normalised Counts Table?"
                 help="Output a file containing the normalised counts, these are in log2 counts per million (logCPM). Default: No">
@@ -278,9 +291,9 @@
             <param name="treat" type="boolean" truevalue="1" falsevalue="0" checked="False"
                 label="Test significance relative to a fold-change threshold (TREAT)"
                 help="If you want to apply a cut-off on a fold change the TREAT function can be used, see Help section below. Default: No"/>
-            <param name="topgenes" type="integer" value="10" min="0"
-                label="Number of genes to highlight in Volcano plot and Heatmap"
-                help="The top DE genes will be highlighted in the Volcano plot for each contrast and output in a heatmap PDF. Default: 10."/>
+            <param name="topgenes" type="integer" value="10" min="0" max="100"
+                label="Number of genes to highlight in Volcano plot, Heatmap and Stripcharts"
+                help="The top DE genes will be highlighted in the Volcano plot for each contrast and can be output in heatmap and stripchart PDFs (max 100). Default: 10."/>
             <param name="normalisationOption" type="select" label="Normalisation Method" help="Default: TMM">
                 <option value="TMM" selected="true">TMM</option>
                 <option value="RLE">RLE</option>
--- a/test-data/out_rscript.txt	Wed May 09 13:27:14 2018 -0400
+++ b/test-data/out_rscript.txt	Tue May 15 02:36:36 2018 -0400
@@ -23,7 +23,8 @@
 #       trend", "t", 1, "double"            -Float for prior.count if limma-trend is used instead of voom
 #       weightOpt", "w", 0, "logical"       -String specifying if voomWithQualityWeights should be used
 #       topgenes", "G", 1, "integer"        -Integer specifying no. of genes to highlight in volcano and heatmap
-#       treatOpt", "T", 0, "logical"        -String specifying if TREAT function shuld be used
+#       treatOpt", "T", 0, "logical"        -String specifying if TREAT function should be used
+#       plots, "P", 1, "character"          -String specifying additional plots to be created
 #
 # OUT:
 #       Density Plots (if filtering)
@@ -125,7 +126,7 @@
 }

 # Function to write code for html images
-HtmlImage <- function(source, label=source, height=600, width=600) {
+HtmlImage <- function(source, label=source, height=500, width=500) {
     cata("<img src=\"", source, "\" alt=\"", label, "\" height=\"", height)
     cata("\" width=\"", width, "\"/>\n")
 }
@@ -175,7 +176,8 @@
     "trend", "t", 1, "double",
     "weightOpt", "w", 0, "logical",
     "topgenes", "G", 1, "integer",
-    "treatOpt", "T", 0, "logical"),
+    "treatOpt", "T", 0, "logical",
+    "plots", "P", 1, "character"),
     byrow=TRUE, ncol=4)
 opt <- getopt(spec)

@@ -322,28 +324,36 @@
 contrastData <- sanitiseEquation(contrastData)
 contrastData <- gsub(" ", ".", contrastData, fixed=TRUE)

-denOutPng <- makeOut("densityplots.png")
-denOutPdf <- makeOut("DensityPlots.pdf")
-boxOutPng <- makeOut("boxplots.png")
-boxOutPdf <- makeOut("BoxPlots.pdf")
-mdsOutPdf <- character() # Initialise character vector
-mdsOutPng <- character()
-for (i in 1:ncol(factors)) {
-    mdsOutPdf[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".pdf"))
-    mdsOutPng[i] <- makeOut(paste0("mdsplot_", names(factors)[i], ".png"))
+plots <- character()
+if (!is.null(opt$plots)) {
+    plots <- unlist(strsplit(opt$plots, split=","))
 }

-mdOutPdf <- character()
+denOutPng <- makeOut("densityplots.png")
+denOutPdf <- makeOut("densityplots.pdf")
+cpmOutPdf <- makeOut("cpmplots.pdf")
+boxOutPng <- makeOut("boxplots.png")
+boxOutPdf <- makeOut("boxplots.pdf")
+mdsscreeOutPng <- makeOut("mdsscree.png")
+mdsscreeOutPdf <- makeOut("mdsscree.pdf")
+mdsxOutPdf <- makeOut("mdsplot_extra.pdf")
+mdsxOutPng <- makeOut("mdsplot_extra.png")
+mdsamOutPdf <- makeOut("mdplots_samples.pdf")
+mdOutPdf <- character() # Initialise character vector
 volOutPdf <- character()
 heatOutPdf <- character()
+stripOutPdf <- character()
 mdvolOutPng <- character()
 topOut <- character()
 for (i in 1:length(contrastData)) {
-    mdOutPdf[i] <- makeOut(paste0("mdplot_", contrastData[i], ".pdf"))
-    volOutPdf[i] <- makeOut(paste0("volplot_", contrastData[i], ".pdf"))
-    heatOutPdf[i] <- makeOut(paste0("heatmap_", contrastData[i], ".pdf"))
-    mdvolOutPng[i] <- makeOut(paste0("mdvolplot_", contrastData[i], ".png"))
-    topOut[i] <- makeOut(paste0(deMethod, "_", contrastData[i], ".tsv"))
+    con <- contrastData[i]
+    con <- gsub("\\(|\\)", "", con)
+    mdOutPdf[i] <- makeOut(paste0("mdplot_", con, ".pdf"))
+    volOutPdf[i] <- makeOut(paste0("volplot_", con, ".pdf"))
+    heatOutPdf[i] <- makeOut(paste0("heatmap_", con, ".pdf"))
+    stripOutPdf[i] <- makeOut(paste0("stripcharts_", con, ".pdf"))
+    mdvolOutPng[i] <- makeOut(paste0("mdvolplot_", con, ".png"))
+    topOut[i] <- makeOut(paste0(deMethod, "_", con, ".tsv"))
 }

 normOut <- makeOut(paste0(deMethod, "_normcounts.tsv"))
@@ -378,9 +388,18 @@
   data$genes <- data.frame(GeneID=row.names(counts))
 }

+# Creating naming data
+samplenames <- colnames(data$counts)
+sampleanno <- data.frame("sampleID"=samplenames, factors)
+
+# Creating colours for the groups
+cols <- as.numeric(factors[, 1])
+col.group <- palette()[cols]
+
 # If filter crieteria set, filter out genes that do not have a required cpm/counts in a required number of
 # samples. Default is no filtering
 preFilterCount <- nrow(data$counts)
+nsamples <- ncol(data$counts)

 if (filtCPM || filtSmpCount || filtTotCount) {

@@ -389,21 +408,84 @@
     } else if (filtSmpCount) {
         keep <- rowSums(data$counts >= opt$cntReq) >= opt$sampleReq
     } else if (filtCPM) {
-        keep <- rowSums(cpm(data$counts) >= opt$cpmReq) >= opt$sampleReq
+        myCPM <- cpm(data$counts)
+        thresh <- myCPM >= opt$cpmReq
+        keep <- rowSums(thresh) >= opt$sampleReq
+
+        if ("c" %in% plots) {
+            # Plot CPM vs raw counts (to check threshold)
+            pdf(cpmOutPdf, width=6.5, height=10)
+            par(mfrow=c(3, 2))
+            for (i in 1:nsamples) {
+                plot(data$counts[, i], myCPM[, i], xlim=c(0,50), ylim=c(0,3), main=samplenames[i], xlab="Raw counts", ylab="CPM")
+                abline(v=10, col="red", lty=2, lwd=2)
+                abline(h=opt$cpmReq, col=4)
+            }
+            linkName <- "CpmPlots.pdf"
+            linkAddr <- "cpmplots.pdf"
+            linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
+            invisible(dev.off())
+        }
     }

     data$counts <- data$counts[keep, ]
     data$genes <- data$genes[keep, , drop=FALSE]
+
+    # Plot Density
+    if ("d" %in% plots) {
+        # PNG
+        png(denOutPng, width=1000, height=500)
+        par(mfrow=c(1,2), cex.axis=0.8)
+
+        # before filtering
+        lcpm1 <- cpm(counts, log=TRUE)
+        plot(density(lcpm1[, 1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
+        title(main="Density Plot: Raw counts", xlab="Log-cpm")
+        for (i in 2:nsamples){
+            den <- density(lcpm1[, i])
+            lines(den$x, den$y, col=col.group[i], lwd=2)
+        }
+
+        # after filtering
+        lcpm2 <- cpm(data$counts, log=TRUE)
+        plot(density(lcpm2[,1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
+        title(main="Density Plot: Filtered counts", xlab="Log-cpm")
+        for (i in 2:nsamples){
+            den <- density(lcpm2[, i])
+            lines(den$x, den$y, col=col.group[i], lwd=2)
+        }
+        legend("topright", samplenames, text.col=col.group, bty="n")
+        imgName <- "Densityplots.png"
+        imgAddr <- "densityplots.png"
+        imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
+        invisible(dev.off())
+
+        # PDF
+        pdf(denOutPdf, width=14)
+        par(mfrow=c(1,2), cex.axis=0.8)
+        plot(density(lcpm1[, 1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
+        title(main="Density Plot: Raw counts", xlab="Log-cpm")
+        for (i in 2:nsamples){
+            den <- density(lcpm1[, i])
+            lines(den$x, den$y, col=col.group[i], lwd=2)
+        }
+        plot(density(lcpm2[, 1]), col=col.group[1], lwd=2, las=2, main="", xlab="")
+        title(main="Density Plot: Filtered counts", xlab="Log-cpm")
+        for (i in 2:nsamples){
+            den <- density(lcpm2[, i])
+            lines(den$x, den$y, col=col.group[i], lwd=2)
+        }
+        legend("topright", samplenames, text.col=col.group, bty="n")
+        linkName <- "DensityPlots.pdf"
+        linkAddr <- "densityplots.pdf"
+        linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
+        invisible(dev.off())
+    }
 }

 postFilterCount <- nrow(data$counts)
 filteredCount <- preFilterCount-postFilterCount

-# Creating naming data
-samplenames <- colnames(data$counts)
-sampleanno <- data.frame("sampleID"=samplenames, factors)
-
-
 # Generating the DGEList object "y"
 print("Generating DGEList object")
 data$samples <- sampleanno
@@ -438,87 +520,42 @@
 ### Data Output
 ################################################################################

-# Plot Density (if filtering low counts)
-if (filtCPM || filtSmpCount || filtTotCount) {
-    nsamples <- ncol(counts)
-
-    # PNG
-    png(denOutPng, width=1200, height=600)
-    par(mfrow=c(1,2), cex.axis=0.8)
-
-    # before filtering
-    logcpm <- cpm(counts, log=TRUE)
-    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
-    title(main="Density Plot: Raw counts", xlab="Log-cpm")
-    for (i in 2:nsamples){
-        den <- density(logcpm[,i])
-        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
-    }
-
-    # after filtering
-    logcpm <- cpm(data$counts, log=TRUE)
-    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
-    title(main="Density Plot: Filtered counts", xlab="Log-cpm")
-    for (i in 2:nsamples){
-        den <- density(logcpm[,i])
-        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
-    }
-    legend("topright", samplenames, text.col=as.numeric(factors[, 1]), bty="n")
-    imgName <- "Densityplots.png"
-    imgAddr <- "densityplots.png"
-    imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
-    invisible(dev.off())
+# Plot Box plots (before and after normalisation)
+if (opt$normOpt != "none" & "b" %in% plots) {
+    png(boxOutPng, width=1000, height=500)
+    par(mfrow=c(1,2), mar=c(6,4,2,2)+0.1)
+    labels <- colnames(counts)

-    # PDF
-    pdf(denOutPdf, width=14)
-    par(mfrow=c(1,2), cex.axis=0.8)
-    logcpm <- cpm(counts, log=TRUE)
-    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
-    title(main="Density Plot: Raw counts", xlab="Log-cpm")
-    for (i in 2:nsamples){
-        den <- density(logcpm[,i])
-        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
-    }
-    logcpm <- cpm(data$counts, log=TRUE)
-    plot(density(logcpm[,1]), col=as.numeric(factors[1, 1]), lwd=2, las=2, main="", xlab="")
-    title(main="Density Plot: Filtered counts", xlab="Log-cpm")
-    for (i in 2:nsamples){
-        den <- density(logcpm[,i])
-        lines(den$x, den$y, col=as.numeric(factors[i, 1]), lwd=2)
-    }
-    legend("topright", samplenames, text.col=as.numeric(factors[, 1]), bty="n")
-    linkName <- "DensityPlots.pdf"
-    linkAddr <- "densityplots.pdf"
-    linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
-    invisible(dev.off())
-}
+    lcpm1 <- cpm(y$counts, log=TRUE)
+    boxplot(lcpm1, las=2, col=col.group, xaxt="n", xlab="")
+    axis(1, at=seq_along(labels), labels = FALSE)
+    abline(h=median(lcpm1), col=4)
+    text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
+    title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")

-# Plot Box plots (before and after normalisation)
-if (opt$normOpt != "none") {
-    png(boxOutPng, width=1200, height=600)
-    par(mfrow=c(1,2), cex.axis=0.8)
-    logcpm <- cpm(y$counts, log=TRUE)
-    boxplot(logcpm, las=2, col=as.numeric(factors[, 1]))
-    abline(h=median(logcpm), col=4)
-    title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")
-    lcpm <- cpm(y, log=TRUE)
-    boxplot(lcpm, las=2, col=as.numeric(factors[, 1]))
-    abline(h=median(lcpm), col=4)
+    lcpm2 <- cpm(y, log=TRUE)
+    boxplot(lcpm2, las=2, col=col.group, xaxt="n",  xlab="")
+    axis(1, at=seq_along(labels), labels = FALSE)
+    text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
+    abline(h=median(lcpm2), col=4)
     title(main="Box Plot: Normalised counts", ylab="Log-cpm")
+
     imgName <- "Boxplots.png"
     imgAddr <- "boxplots.png"
     imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
     invisible(dev.off())

     pdf(boxOutPdf, width=14)
-    par(mfrow=c(1,2), cex.axis=0.8)
-    logcpm <- cpm(y$counts, log=TRUE)
-    boxplot(logcpm, las=2, col=as.numeric(factors[, 1]))
-    abline(h=median(logcpm), col=4)
+    par(mfrow=c(1,2), mar=c(6,4,2,2)+0.1)
+    boxplot(lcpm1, las=2, col=col.group, xaxt="n", xlab="")
+    axis(1, at=seq_along(labels), labels = FALSE)
+    abline(h=median(lcpm1), col=4)
+    text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
     title(main="Box Plot: Unnormalised counts", ylab="Log-cpm")
-    lcpm <- cpm(y, log=TRUE)
-    boxplot(lcpm, las=2, col=as.numeric(factors[, 1]))
-    abline(h=median(lcpm), col=4)
+    boxplot(lcpm2, las=2, col=col.group, xaxt="n",  xlab="")
+    axis(1, at=seq_along(labels), labels = FALSE)
+    text(x=seq_along(labels), y=par("usr")[3]-1, srt=45, adj=1, labels=labels, xpd=TRUE)
+    abline(h=median(lcpm2), col=4)
     title(main="Box Plot: Normalised counts", ylab="Log-cpm")
     linkName <- "BoxPlots.pdf"
     linkAddr <- "boxplots.pdf"
@@ -530,22 +567,100 @@
 print("Generating MDS plot")
 labels <- names(counts)

-for (i in 1:ncol(factors)) {
-    png(mdsOutPng[i], width=600, height=600)
-    plotMDS(y, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
-    imgName <- paste0("MDSPlot_", names(factors)[i], ".png")
-    imgAddr <- paste0("mdsplot_", names(factors)[i], ".png")
+# Scree plot (Variance Explained) code copied from Glimma
+
+# get column of matrix
+getCols <- function(x, inds) {
+  x[, inds, drop=FALSE]
+}
+
+x <- cpm(y, log=TRUE)
+ndim <- nsamples - 1
+nprobes <- nrow(x)
+top <- 500
+top <- min(top, nprobes)
+cn <- colnames(x)
+bad <- rowSums(is.finite(x)) < nsamples
+
+if (any(bad)) {
+  warning("Rows containing infinite values have been removed")
+  x <- x[!bad, , drop=FALSE]
+}
+
+dd <- matrix(0, nrow=nsamples, ncol=nsamples, dimnames=list(cn, cn))
+topindex <- nprobes - top + 1L
+for (i in 2L:(nsamples)) {
+  for (j in 1L:(i - 1L)) {
+    dists <- (getCols(x, i) - getCols(x, j))^2
+    dists <- sort.int(dists, partial = topindex )
+    topdist <- dists[topindex:nprobes]
+    dd[i, j] <- sqrt(mean(topdist))
+  }
+}
+
+a1 <- suppressWarnings(cmdscale(as.dist(dd), k=min(ndim, 8), eig=TRUE))
+eigen <- data.frame(name = 1:min(ndim, 8), eigen = round(a1$eig[1:min(ndim, 8)]/sum(a1$eig), 2))
+
+png(mdsscreeOutPng, width=1000, height=500)
+par(mfrow=c(1, 2))
+plotMDS(y, labels=samplenames, col=as.numeric(factors[, 1]), main="MDS Plot: Dims 1 and 2")
+barplot(eigen$eigen, names.arg=eigen$name,  main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las=1)
+imgName <- paste0("MDSPlot_", names(factors)[1], ".png")
+imgAddr <- "mdsscree.png"
+imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
+invisible(dev.off())
+
+pdf(mdsscreeOutPdf, width=14)
+par(mfrow=c(1, 2))
+plotMDS(y, labels=samplenames, col=as.numeric(factors[, 1]), main="MDS Plot: Dims 1 and 2")
+barplot(eigen$eigen, names.arg=eigen$name,  main = "Scree Plot: Variance Explained", xlab = "Dimension", ylab = "Proportion", las=1)
+linkName <- paste0("MDSPlot_", names(factors)[1], ".pdf")
+linkAddr <- "mdsscree.pdf"
+linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
+invisible(dev.off())
+
+if ("x" %in% plots) {
+    png(mdsxOutPng, width=1000, height=500)
+    par(mfrow=c(1, 2))
+    for (i in 2:3) {
+        dim1 <- i
+        dim2 <- i + 1
+        plotMDS(y, dim=c(dim1, dim2), labels=samplenames, col=as.numeric(factors[, 1]), main=paste("MDS Plot: Dims", dim1, "and", dim2))
+    }
+    imgName <- paste0("MDSPlot_extra.png")
+    imgAddr <- paste0("mdsplot_extra.png")
     imageData <- rbind(imageData, data.frame(Label=imgName, Link=imgAddr, stringsAsFactors=FALSE))
     invisible(dev.off())

-    pdf(mdsOutPdf[i])
-    plotMDS(y, labels=labels, col=as.numeric(factors[, i]), cex=0.8, main=paste("MDS Plot:", names(factors)[i]))
-    linkName <- paste0("MDSPlot_", names(factors)[i], ".pdf")
-    linkAddr <- paste0("mdsplot_", names(factors)[i], ".pdf")
+    pdf(mdsxOutPdf, width=14)
+    par(mfrow=c(1, 2))
+    for (i in 2:3) {
+        dim1 <- i
+        dim2 <- i + 1
+        plotMDS(y, dim=c(dim1, dim2), labels=samplenames, col=as.numeric(factors[, 1]), main=paste("MDS Plot: Dims", dim1, "and", dim2))
+    }
+    linkName <- "MDSPlot_extra.pdf"
+    linkAddr <- "mdsplot_extra.pdf"
     linkData <- rbind(linkData, data.frame(Label=linkName, Link=linkAddr, stringsAsFactors=FALSE))
     invisible(dev.off())
 }

+if ("m" %in% plots) {
+    # Plot MD plots for individual samples
+    print("Generating MD plots for samples")
+    pdf(mdsamOutPdf, width=6.5, height=10)
+    par(mfrow=c(3, 2))
+    for (i in 1:nsamples) {
+        plotMD(y, column = i)
+        abline(h=0, col="red", lty=2, lwd=2)
+    }
+    linkName <- "MDPlots_Samples.pdf"
+    linkAddr <- "mdplots_samples.pdf"
+    linkData <- rbind(linkData, c(linkName, linkAddr))
+    invisible(dev.off())
+}
+
+
 if (wantTrend) {
     # limma-trend approach
     logCPM <- cpm(y, log=TRUE, prior.count=opt$trend)
@@ -561,7 +676,7 @@
     saOutPng <- makeOut("saplot.png")
     saOutPdf <- makeOut("saplot.pdf")

-    png(saOutPng, width=600, height=600)
+    png(saOutPng, width=500, height=500)
     plotSA(fit, main="SA Plot")
     imgName <- "SAPlot.png"
     imgAddr <- "saplot.png"
@@ -589,7 +704,7 @@

     if (wantWeight) {
         # Creating voom data object and plot
-        png(voomOutPng, width=1000, height=600)
+        png(voomOutPng, width=1000, height=500)
         vData <- voomWithQualityWeights(y, design=design, plot=TRUE)
         imgName <- "VoomPlot.png"
         imgAddr <- "voomplot.png"
@@ -609,7 +724,7 @@

     } else {
         # Creating voom data object and plot
-        png(voomOutPng, width=600, height=600)
+        png(voomOutPng, width=500, height=500)
         vData <- voom(y, design=design, plot=TRUE)
         imgName <- "VoomPlot"
         imgAddr <- "voomplot.png"
@@ -661,6 +776,8 @@
 sumStatus <- summary(status)

 for (i in 1:length(contrastData)) {
+    con <- contrastData[i]
+    con <- gsub("\\(|\\)", "", con)
     # Collect counts for differential expression
     upCount[i] <- sumStatus["Up", i]
     downCount[i] <- sumStatus["Down", i]
@@ -673,22 +790,19 @@
         top <- topTable(fit, coef=i, number=Inf, sort.by="P")
     }
     write.table(top, file=topOut[i], row.names=FALSE, sep="\t", quote=FALSE)
-
-    linkName <- paste0(deMethod, "_", contrastData[i], ".tsv")
-    linkAddr <- paste0(deMethod, "_", contrastData[i], ".tsv")
+    linkName <- paste0(deMethod, "_", con, ".tsv")
+    linkAddr <- paste0(deMethod, "_", con, ".tsv")
     linkData <- rbind(linkData, c(linkName, linkAddr))

     # Plot MD (log ratios vs mean average) using limma package on weighted
     pdf(mdOutPdf[i])
     limma::plotMD(fit, status=status[, i], coef=i,
-                  main=paste("MD Plot:", unmake.names(contrastData[i])),
-                  hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
-                  xlab="Average Expression", ylab="logFC")
-
+        main=paste("MD Plot:", unmake.names(con)),
+        hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
+        xlab="Average Expression", ylab="logFC")
     abline(h=0, col="grey", lty=2)
-
-    linkName <- paste0("MDPlot_", contrastData[i], ".pdf")
-    linkAddr <- paste0("mdplot_", contrastData[i], ".pdf")
+    linkName <- paste0("MDPlot_", con, ".pdf")
+    linkAddr <- paste0("mdplot_", con, ".pdf")
     linkData <- rbind(linkData, c(linkName, linkAddr))
     invisible(dev.off())

@@ -696,60 +810,30 @@
     pdf(volOutPdf[i])
     if (haveAnno) {
         # labels must be in second column currently
-        limma::volcanoplot(fit, coef=i,
-            main=paste("Volcano Plot:", unmake.names(contrastData[i])),
-            highlight=opt$topgenes,
-            names=fit$genes[, 2])
-    } else {
-        limma::volcanoplot(fit, coef=i,
-            main=paste("Volcano Plot:", unmake.names(contrastData[i])),
-            highlight=opt$topgenes,
-            names=fit$genes$GeneID)
-    }
-    linkName <- paste0("VolcanoPlot_", contrastData[i], ".pdf")
-    linkAddr <- paste0("volplot_", contrastData[i], ".pdf")
-    linkData <- rbind(linkData, c(linkName, linkAddr))
-    invisible(dev.off())
-
-    # Plot Heatmap
-    topgenes <- rownames(top[1:opt$topgenes, ])
-    if (wantTrend) {
-        topexp <- plotData[topgenes, ]
+        labels <- fit$genes[, 2]
     } else {
-        topexp <- plotData$E[topgenes, ]
+        labels <- fit$genes$GeneID
     }
-
-    pdf(heatOutPdf[i])
-    par(cex.main=0.8)
-    if (haveAnno) {
-        # labels must be in second column currently
-        heatmap.2(topexp, scale="row",
-            main=paste("Heatmap:", unmake.names(contrastData[i])),
-            col="bluered", trace="none", density.info="none",
-            margin=c(8,6), lhei=c(2,10), dendrogram="column",
-            cexRow=0.7, cexCol=0.7, srtCol=45,
-            labRow=top[topgenes, 2])
-    } else {
-        heatmap.2(topexp, scale="row",
-            main=paste("Heatmap:", unmake.names(contrastData[i])),
-            col="bluered", trace="none", density.info="none",
-            margin=c(8,6), lhei=c(2,10), dendrogram="column",
-            cexRow=0.7, cexCol=0.7, srtCol=45)
-    }
-    linkName <- paste0("Heatmap_", contrastData[i], ".pdf")
-    linkAddr <- paste0("heatmap_", contrastData[i], ".pdf")
+    limma::volcanoplot(fit, coef=i,
+        main=paste("Volcano Plot:", unmake.names(con)),
+        highlight=opt$topgenes,
+        names=labels)
+    linkName <- paste0("VolcanoPlot_", con, ".pdf")
+    linkAddr <- paste0("volplot_", con, ".pdf")
     linkData <- rbind(linkData, c(linkName, linkAddr))
     invisible(dev.off())

     # PNG of MD and Volcano
-    png(mdvolOutPng[i], width=1200, height=600)
+    png(mdvolOutPng[i], width=1000, height=500)
     par(mfrow=c(1, 2), mar=c(5,4,2,2)+0.1, oma=c(0,0,3,0))
+
+    # MD plot
     limma::plotMD(fit, status=status[, i], coef=i, main="MD Plot",
-                   hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
-                   xlab="Average Expression", ylab="logFC")
+        hl.col=alpha(c("firebrick", "blue"), 0.4), values=c(1, -1),
+        xlab="Average Expression", ylab="logFC")
     abline(h=0, col="grey", lty=2)

-    # Volcano plots
+    # Volcano
     if (haveAnno) {
         # labels must be in second column currently
         limma::volcanoplot(fit, coef=i, main="Volcano Plot",
@@ -761,11 +845,60 @@
             names=fit$genes$GeneID)
     }

-    imgName <- paste0("MDVolPlot_", contrastData[i])
-    imgAddr <- paste0("mdvolplot_", contrastData[i], ".png")
+    imgName <- paste0("MDVolPlot_", con)
+    imgAddr <- paste0("mdvolplot_", con, ".png")
     imageData <- rbind(imageData, c(imgName, imgAddr))
-    title(paste0("Contrast: ", unmake.names(contrastData[i])), outer=TRUE, cex.main=1.5)
+    title(paste0("Contrast: ", unmake.names(con)), outer=TRUE, cex.main=1.5)
     invisible(dev.off())
+
+    if ("h" %in% plots) {
+        # Plot Heatmap
+        topgenes <- rownames(top[1:opt$topgenes, ])
+        if (wantTrend) {
+            topexp <- plotData[topgenes, ]
+        } else {
+            topexp <- plotData$E[topgenes, ]
+        }
+        pdf(heatOutPdf[i])
+        mycol <- colorpanel(1000,"blue","white","red")
+        if (haveAnno) {
+            # labels must be in second column currently
+            labels <- top[topgenes, 2]
+        } else {
+            labels <- rownames(topexp)
+        }
+        heatmap.2(topexp, scale="row", Colv=FALSE, Rowv=FALSE, dendrogram="none",
+            main=paste("Contrast:", unmake.names(con), "\nTop", opt$topgenes, "genes by adj.P.Val"),
+            trace="none", density.info="none", lhei=c(2,10), margin=c(8, 6), labRow=labels, cexRow=0.7, srtCol=45,
+            col=mycol, ColSideColors=col.group)
+        linkName <- paste0("Heatmap_", con, ".pdf")
+        linkAddr <- paste0("heatmap_", con, ".pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+    }
+
+    if ("s" %in% plots) {
+        # Plot Stripcharts of top genes
+        pdf(stripOutPdf[i], title=paste("Contrast:", unmake.names(con)))
+        par(mfrow = c(3,2), cex.main=0.8, cex.axis=0.8)
+        cols <- unique(col.group)
+
+        for (j in 1:length(topgenes)) {
+            lfc <- round(top[topgenes[j], "logFC"], 2)
+            pval <- round(top[topgenes[j], "adj.P.Val"], 5)
+            if (wantTrend) {
+                stripchart(plotData[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
+                    method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
+            } else {
+                stripchart(plotData$E[topgenes[j], ] ~ factors[, 1], vertical=TRUE, las=2, pch=16, cex=0.8, cex.lab=0.8, col=cols,
+                    method="jitter", ylab="Normalised log2 expression", main=paste0(labels[j], "\nlogFC=", lfc, ", adj.P.Val=", pval))
+            }
+        }
+        linkName <- paste0("Stripcharts_", con, ".pdf")
+        linkAddr <- paste0("stripcharts_", con, ".pdf")
+        linkData <- rbind(linkData, c(linkName, linkAddr))
+        invisible(dev.off())
+    }
 }
 sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
 row.names(sigDiff) <- contrastData
@@ -774,10 +907,10 @@
 if (wantRda) {
     print("Saving RData")
     if (wantWeight) {
-      save(counts, data, y, status, plotData, labels, factors, wts, fit, top, contrasts, design,
+      save(counts, data, y, status, plotData, labels, factors, wts, fit, top, contrastData, contrasts, design,
            file=rdaOut, ascii=TRUE)
     } else {
-      save(counts, data, y, status, plotData, labels, factors, fit, top, contrasts, design,
+      save(counts, data, y, status, plotData, labels, factors, fit, top, contrastData, contrasts, design,
            file=rdaOut, ascii=TRUE)
     }
     linkData <- rbind(linkData, c((paste0(deMethod, "_analysis.RData")), (paste0(deMethod, "_analysis.RData"))))
@@ -805,8 +938,8 @@
 cata("Links to PDF copies of plots are in 'Plots' section below <br />\n")

 for (i in 1:nrow(imageData)) {
-    if (grepl("density|box|mdvol", imageData$Link[i])) {
-        HtmlImage(imageData$Link[i], imageData$Label[i], width=1200)
+    if (grepl("density|box|mds|mdvol", imageData$Link[i])) {
+        HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
     } else if (wantWeight) {
         HtmlImage(imageData$Link[i], imageData$Label[i], width=1000)
     } else {
@@ -834,8 +967,33 @@
 cata("</table>")

 cata("<h4>Plots:</h4>\n")
+#PDFs
 for (i in 1:nrow(linkData)) {
-    if (grepl(".pdf", linkData$Link[i])) {
+    if (grepl("density|cpm|boxplot|mds|mdplots|voomplot|saplot", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("mdplot_", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("volplot", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("heatmap", linkData$Link[i])) {
+        HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
+for (i in 1:nrow(linkData)) {
+    if (grepl("stripcharts", linkData$Link[i])) {
         HtmlLink(linkData$Link[i], linkData$Label[i])
   }
 }